# Mining testimonial fragments of the Holocaust

**Experience domain:**

### Load the necessary libraries

In [57]:
import sys; sys.path.insert(0, '..')
import itertools

In [58]:
import get_topic_model_concordance as topic_concordancer
from utils import blacklab, db, text
mongo = db.get_db()

In [59]:
%config Completer.use_jedi = False
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))
import random

### Helper functions

In [60]:
def create_contextual_query(lemmas,context_length=50):
    permutations = itertools.permutations(lemmas,len(lemmas))
    final_result = []
    for element in list(permutations):
        temp_result = []
        for el in element:
            temp_result.append('[lemma="'+el+'"]')
        temp_result = '('+('[]{0,'+str(context_length)+'}').join(temp_result)+')'
        final_result.append(temp_result)
    final_result = '|'.join(final_result)
    return final_result
        
        
            

In [61]:
from utils import blacklab, db, text
import requests
import json
def find_sentence_id(label):
    props = {'annotators': 'tokenize'}

    # set the encoding of the annotator
    requests.encoding = 'utf-8'
    # make a request
    r = requests.post('http://localhost:9000/', params={'properties':
                      json.dumps(props)},
                      data=label.encode('utf-8'))
    result = json.loads(r.text, encoding='utf-8')
    query = []
    for i, token in enumerate(result['tokens']):

        if ('...'in token['word'] and ((i == 0) or
           i == len(result['tokens']) - 1)):
            continue
        elif ('...'in token['word']):
            query.append('[]{0,50}')
        elif ('-'in token['word']):
            query.append('[]{0,3}')
        elif ("n't"in token['word']):
            query.append('[]')
        elif ("'re"in token['word']):
            query.append('[]')
        elif ("?"in token['word']):
            query.append('[]')
        elif ("."in token['word']):
            query.append('[]')
        elif ("'s"in token['word']):
            query.append('[]')
        elif (","in token['word']):
            query.append('[]')
        else:
            query.append('["' + token['word'] + '"]')

    query = ' '.join(query)
    try:
        sentence = blacklab.search_blacklab(query, window=0,
                                            lemma=False,
                                            include_match=True)
        token_end = sentence[0]['token_end']
        token_start = sentence[0]['token_start']
        print (sentence[0])
        mongo = db.get_db()
        results = mongo.tokens.find({'testimony_id':
                                    sentence[0]['testimony_id']},
                                    {'_id': 0})
        tokens = list(results)[0]['tokens']
        sentenceStart = tokens[token_start]['sentence_index']
        sentenceEnd = tokens[token_end]['sentence_index']
        originalsentence = sentence[0]['complete_match']
        return (sentenceStart,sentenceEnd,sentence[0]['testimony_id'])
    except:
        print("The following query returned a null result")
        print(query)
        
            


In [62]:
def create_parent_node(label):
    """Generate a root node for a tree structure."""
    testimony_id = random.randint(1, 20)
    node = {}
    node['label'] = label
    fragment = {'label': label,
                'essay_id': random.randint(1, 20),
                'tree': get_node(testimony_id, node, is_parent=True)}
    fragment['tree']['label'] = label

    return fragment

In [63]:
def get_node(testimony_id, node, is_parent=False):
    """Generate a parent or leaf node for a tree structure."""
    if is_parent:
        return {
            'label': node['label'],
            'testimony_id': random.randint(1, 20),
            'media_index': random.randint(1, 20),
            'media_offset': random.randint(1, 20),
            'start_sentence_index': random.randint(1, 20),
            'end_sentence_index': random.randint(1, 20),
            'children': [], }
    else:
        return {'label': node['label'],
                'testimony_id': node['testimony_id'],
                'media_index': float(node['media_index']),
                'media_offset': float(node['media_offset']),
                'start_sentence_index': float(node['start_sentence_index']),
                'end_sentence_index': float(node['end_sentence_index']),
                'children': [], }

In [64]:
def check_if_main_node_exist(node):
    results = mongo.fragments.find({'label':node},{'_id': 0})
    if len(results[0])==0:
        return False
    else:
        return True

In [65]:
def add_main_node(label):
    mongo.fragments.insert(create_parent_node(label))

In [66]:
def delete_main_node(label):
    mongo.fragments.delete_one({'label':label})

In [67]:
def add_testimonial_fragments(fragments):
    if check_if_main_node_exist(fragments['main_node']):
        results = mongo.fragments.find({'label':fragments['main_node']},{'_id':0})[0]
        mid_nodes = [element['label'] for element in results['tree']['children']]
        if fragments['mid_node'] in mid_nodes:
            print ("mid node exists cannot be added")
        else:
            
            mid_node = get_node('r',{'label':fragments['mid_node']},is_parent=True)
            for fragment in fragments['fragments']:
                leaf = get_node(fragment['testimony_id'],fragment)
                mid_node['children'].append(leaf)
            results['tree']['children'].append(mid_node)
            mongo.fragments.replace_one({'label':fragments['main_node']},results)

### Add the main node

In [68]:
main_node = "cry"
#delete_main_node("numbness")
add_main_node(main_node)

  


### Set up the query

query = '[word !="\[" & word!="\("][lemma="cry"]'

result = topic_concordancer.main(query,window=25,topicn=25)

### Print the key topics

for i,element in enumerate(result['topic_documents']):
    print (i)
    topic_words =  element['topic_words'][1]
    print (topic_words)
    print ('\n')

### Analyze documents

i=0
for text in result['topic_documents'][i]['texts'][0:25]:
    print (text['matched_text_words'])
    print ('\n')

## Testimonial fragments

### 1.  

In [69]:
lemmas = ["lose","cry"]

In [70]:
query = create_contextual_query(lemmas,context_length=25)
print (query)

([lemma="lose"][]{0,25}[lemma="cry"])|([lemma="cry"][]{0,25}[lemma="lose"])


In [71]:
domain_term = "lose"

In [72]:
fragments = {}
fragments['main_node'] = main_node
fragments['mid_node'] = domain_term
fragments['fragments'] = []

In [73]:
fragment_1 = {}
fragment_1['original_sentence'] = "The boy's name was Eli, and the little girl was named Dina. And she knew she lost them. And she started to cry terribly."
fragment_1['label']="The boy's name was Eli, and the little girl was named Dina. And she knew she lost them. And she started to cry terribly."
indices = find_sentence_id(fragment_1['original_sentence'])
fragment_1['start_sentence_index']=indices[0]
fragment_1['end_sentence_index']=indices[1]
fragment_1['media_offset'] = 0
fragment_1['media_index'] = 0
fragment_1['testimony_id'] = indices[2]
fragments['fragments'].append(fragment_1)

http://localhost:8080/blacklab-server-2.1.0/lts/hits?patt=%5B%22The%22%5D+%5B%22boy%22%5D+%5B%5D+%5B%22name%22%5D+%5B%22was%22%5D+%5B%22Eli%22%5D+%5B%5D+%5B%22and%22%5D+%5B%22the%22%5D+%5B%22little%22%5D+%5B%22girl%22%5D+%5B%22was%22%5D+%5B%22named%22%5D+%5B%22Dina%22%5D+%5B%5D+%5B%22And%22%5D+%5B%22she%22%5D+%5B%22knew%22%5D+%5B%22she%22%5D+%5B%22lost%22%5D+%5B%22them%22%5D+%5B%5D+%5B%22And%22%5D+%5B%22she%22%5D+%5B%22started%22%5D+%5B%22to%22%5D+%5B%22cry%22%5D+%5B%22terribly%22%5D+%5B%5D&waitfortotal=true&outputformat=json&prettyprint=no&wordsaroundhit=0
{'left': '', 'match_word': "The boy 's name was Eli , and the little girl was named Dina . And she knew she lost them . And she started to cry terribly . ", 'right': '', 'complete_match': "The boy 's name was Eli , and the little girl was named Dina . And she knew she lost them . And she started to cry terribly . ", 'testimony_id': 'HVT-107', 'shelfmark': ['Fortunoff HVT-107'], 'token_start': 8039, 'token_end': 8068}


In [74]:
fragment_2 = {}
fragment_2['original_sentence'] = "Everybody lost someone and maybe it was at this particular time easier to share the grief because everybody cried."
fragment_2['label']="Everybody lost someone and maybe it was at this particular time easier to share the grief because everybody cried."
indices = find_sentence_id(fragment_2['original_sentence'])
fragment_2['start_sentence_index']=indices[0]
fragment_2['end_sentence_index']=indices[1]
fragment_2['media_offset'] = 0
fragment_2['media_index'] = 0
fragment_2['testimony_id'] = indices[2]
fragments['fragments'].append(fragment_2)


http://localhost:8080/blacklab-server-2.1.0/lts/hits?patt=%5B%22Everybody%22%5D+%5B%22lost%22%5D+%5B%22someone%22%5D+%5B%22and%22%5D+%5B%22maybe%22%5D+%5B%22it%22%5D+%5B%22was%22%5D+%5B%22at%22%5D+%5B%22this%22%5D+%5B%22particular%22%5D+%5B%22time%22%5D+%5B%22easier%22%5D+%5B%22to%22%5D+%5B%22share%22%5D+%5B%22the%22%5D+%5B%22grief%22%5D+%5B%22because%22%5D+%5B%22everybody%22%5D+%5B%22cried%22%5D+%5B%5D&waitfortotal=true&outputformat=json&prettyprint=no&wordsaroundhit=0
{'left': '', 'match_word': 'Everybody lost someone and maybe it was at this particular time easier to share the grief because everybody cried . ', 'right': '', 'complete_match': 'Everybody lost someone and maybe it was at this particular time easier to share the grief because everybody cried . ', 'testimony_id': 'irn504795', 'shelfmark': ['USHMM RG-50.030*0300'], 'token_start': 6107, 'token_end': 6127}


In [75]:
fragment_3 = {}
fragment_3['original_sentence'] = "And she wasn't there, and she blame right now in five minutes I lost a family, and cries all, all the time."
fragment_3['label']="And she wasn't there, and she blame right now in five minutes I lost a family, and cries all, all the time."
indices = find_sentence_id(fragment_3['original_sentence'])
fragment_3['start_sentence_index']=indices[0]
fragment_3['end_sentence_index']=indices[1]
fragment_3['media_offset'] = 0
fragment_3['media_index'] = 0
fragment_3['testimony_id'] = indices[2]
fragments['fragments'].append(fragment_3)

http://localhost:8080/blacklab-server-2.1.0/lts/hits?patt=%5B%22And%22%5D+%5B%22she%22%5D+%5B%22was%22%5D+%5B%5D+%5B%22there%22%5D+%5B%5D+%5B%22and%22%5D+%5B%22she%22%5D+%5B%22blame%22%5D+%5B%22right%22%5D+%5B%22now%22%5D+%5B%22in%22%5D+%5B%22five%22%5D+%5B%22minutes%22%5D+%5B%22I%22%5D+%5B%22lost%22%5D+%5B%22a%22%5D+%5B%22family%22%5D+%5B%5D+%5B%22and%22%5D+%5B%22cries%22%5D+%5B%22all%22%5D+%5B%5D+%5B%22all%22%5D+%5B%22the%22%5D+%5B%22time%22%5D+%5B%5D&waitfortotal=true&outputformat=json&prettyprint=no&wordsaroundhit=0
{'left': '', 'match_word': "And she was n't there , and she blame right now in five minutes I lost a family , and cries all , all the time . ", 'right': '', 'complete_match': "And she was n't there , and she blame right now in five minutes I lost a family , and cries all , all the time . ", 'testimony_id': 'irn504850', 'shelfmark': ['USHMM RG-50.030*0357'], 'token_start': 25049, 'token_end': 25076}


In [76]:
fragment_4 = {}
fragment_4['original_sentence'] = "She was crying not one night, my mother, remembering that she lost her husband"
fragment_4['label']= "She was crying not one night, my mother, remembering that she lost her husband (..)"
indices = find_sentence_id(fragment_4['original_sentence'])
fragment_4['start_sentence_index']=indices[0]
fragment_4['end_sentence_index']=indices[1]
fragment_4['media_offset'] = 0
fragment_4['media_index'] = 0
fragment_4['testimony_id'] = indices[2]
fragments['fragments'].append(fragment_4)


http://localhost:8080/blacklab-server-2.1.0/lts/hits?patt=%5B%22She%22%5D+%5B%22was%22%5D+%5B%22crying%22%5D+%5B%22not%22%5D+%5B%22one%22%5D+%5B%22night%22%5D+%5B%5D+%5B%22my%22%5D+%5B%22mother%22%5D+%5B%5D+%5B%22remembering%22%5D+%5B%22that%22%5D+%5B%22she%22%5D+%5B%22lost%22%5D+%5B%22her%22%5D+%5B%22husband%22%5D&waitfortotal=true&outputformat=json&prettyprint=no&wordsaroundhit=0
{'left': '', 'match_word': 'She was crying not one night , my mother , remembering that she lost her husband ', 'right': '', 'complete_match': 'She was crying not one night , my mother , remembering that she lost her husband ', 'testimony_id': 'irn509677', 'shelfmark': ['USHMM RG-50.030*0416'], 'token_start': 3335, 'token_end': 3351}


In [77]:
fragment_5 = {}
fragment_5['original_sentence'] = "And she started crying to him that she had lost her family."
fragment_5['label']= "And she started crying to him that she had lost her family."
indices = find_sentence_id(fragment_5['original_sentence'])
fragment_5['start_sentence_index']=indices[0]
fragment_5['end_sentence_index']=indices[1]
fragment_5['media_offset'] = 0
fragment_5['media_index'] = 0
fragment_5['testimony_id'] = indices[2]
fragments['fragments'].append(fragment_5)

http://localhost:8080/blacklab-server-2.1.0/lts/hits?patt=%5B%22And%22%5D+%5B%22she%22%5D+%5B%22started%22%5D+%5B%22crying%22%5D+%5B%22to%22%5D+%5B%22him%22%5D+%5B%22that%22%5D+%5B%22she%22%5D+%5B%22had%22%5D+%5B%22lost%22%5D+%5B%22her%22%5D+%5B%22family%22%5D+%5B%5D&waitfortotal=true&outputformat=json&prettyprint=no&wordsaroundhit=0
{'left': '', 'match_word': 'And she started crying to him that she had lost her family . ', 'right': '', 'complete_match': 'And she started crying to him that she had lost her family . ', 'testimony_id': 'usc_shoah_4113', 'shelfmark': ['USC 4113'], 'token_start': 20577, 'token_end': 20590}


In [78]:
add_testimonial_fragments(fragments)

### 2.  

In [79]:
lemmas = ["beg","cry"]

In [80]:
query = create_contextual_query(lemmas,context_length=25)
print (query)

([lemma="beg"][]{0,25}[lemma="cry"])|([lemma="cry"][]{0,25}[lemma="beg"])


In [81]:
domain_term = ""

In [82]:
fragments = {}
fragments['main_node'] = main_node
fragments['mid_node'] = domain_term
fragments['fragments'] = []

In [83]:
fragment_1 = {}
fragment_1['original_sentence'] = "And my father started crying him, begging them."
fragment_1['label']="And my father started crying him, begging them."
indices = find_sentence_id(fragment_1['original_sentence'])
fragment_1['start_sentence_index']=indices[0]
fragment_1['end_sentence_index']=indices[1]
fragment_1['media_offset'] = 0
fragment_1['media_index'] = 0
fragment_1['testimony_id'] = indices[2]
fragments['fragments'].append(fragment_1)

http://localhost:8080/blacklab-server-2.1.0/lts/hits?patt=%5B%22And%22%5D+%5B%22my%22%5D+%5B%22father%22%5D+%5B%22started%22%5D+%5B%22crying%22%5D+%5B%22him%22%5D+%5B%5D+%5B%22begging%22%5D+%5B%22them%22%5D+%5B%5D&waitfortotal=true&outputformat=json&prettyprint=no&wordsaroundhit=0
{'left': '', 'match_word': 'And my father started crying him , begging them . ', 'right': '', 'complete_match': 'And my father started crying him , begging them . ', 'testimony_id': 'HVT-72', 'shelfmark': ['Fortunoff HVT-72'], 'token_start': 3026, 'token_end': 3036}


In [84]:
fragment_2 = {}
fragment_2['original_sentence'] = "And they said, we're going to kill you. And one guy was standing with a gun. And we were crying and begging."
fragment_2['label']="And they said, we're going to kill you. And one guy was standing with a gun. And we were crying and begging."
indices = find_sentence_id(fragment_2['original_sentence'])
fragment_2['start_sentence_index']=indices[0]
fragment_2['end_sentence_index']=indices[1]
fragment_2['media_offset'] = 0
fragment_2['media_index'] = 0
fragment_2['testimony_id'] = indices[2]
fragments['fragments'].append(fragment_2)


http://localhost:8080/blacklab-server-2.1.0/lts/hits?patt=%5B%22And%22%5D+%5B%22they%22%5D+%5B%22said%22%5D+%5B%5D+%5B%22we%22%5D+%5B%5D+%5B%22going%22%5D+%5B%22to%22%5D+%5B%22kill%22%5D+%5B%22you%22%5D+%5B%5D+%5B%22And%22%5D+%5B%22one%22%5D+%5B%22guy%22%5D+%5B%22was%22%5D+%5B%22standing%22%5D+%5B%22with%22%5D+%5B%22a%22%5D+%5B%22gun%22%5D+%5B%5D+%5B%22And%22%5D+%5B%22we%22%5D+%5B%22were%22%5D+%5B%22crying%22%5D+%5B%22and%22%5D+%5B%22begging%22%5D+%5B%5D&waitfortotal=true&outputformat=json&prettyprint=no&wordsaroundhit=0
{'left': '', 'match_word': "And they said , we 're going to kill you . And one guy was standing with a gun . And we were crying and begging . ", 'right': '', 'complete_match': "And they said , we 're going to kill you . And one guy was standing with a gun . And we were crying and begging . ", 'testimony_id': 'usc_shoah_13213', 'shelfmark': ['USC 13213'], 'token_start': 10309, 'token_end': 10336}


In [85]:
fragment_3 = {}
fragment_3['original_sentence'] = "So we all started to cry, you know? My mother is on her knees, begging. The kids are crying."
fragment_3['label']="So we all started to cry, you know? My mother is on her knees, begging. The kids are crying."
indices = find_sentence_id(fragment_3['original_sentence'])
fragment_3['start_sentence_index']=indices[0]
fragment_3['end_sentence_index']=indices[1]
fragment_3['media_offset'] = 0
fragment_3['media_index'] = 0
fragment_3['testimony_id'] = indices[2]
fragments['fragments'].append(fragment_3)

http://localhost:8080/blacklab-server-2.1.0/lts/hits?patt=%5B%22So%22%5D+%5B%22we%22%5D+%5B%22all%22%5D+%5B%22started%22%5D+%5B%22to%22%5D+%5B%22cry%22%5D+%5B%5D+%5B%22you%22%5D+%5B%22know%22%5D+%5B%5D+%5B%22My%22%5D+%5B%22mother%22%5D+%5B%22is%22%5D+%5B%22on%22%5D+%5B%22her%22%5D+%5B%22knees%22%5D+%5B%5D+%5B%22begging%22%5D+%5B%5D+%5B%22The%22%5D+%5B%22kids%22%5D+%5B%22are%22%5D+%5B%22crying%22%5D+%5B%5D&waitfortotal=true&outputformat=json&prettyprint=no&wordsaroundhit=0
{'left': '', 'match_word': 'So we all started to cry , you know ? My mother is on her knees , begging . The kids are crying . ', 'right': '', 'complete_match': 'So we all started to cry , you know ? My mother is on her knees , begging . The kids are crying . ', 'testimony_id': 'irn517612', 'shelfmark': ['USHMM RG-50.030*0498'], 'token_start': 24642, 'token_end': 24666}


In [86]:
fragment_4 = {}
fragment_4['original_sentence'] = "they picked out six men from our ranks from the work, and they were crying and begging for their life."
fragment_4['label']= "(..) they picked out six men from our ranks from the work, and they were crying and begging for their life."
indices = find_sentence_id(fragment_4['original_sentence'])
fragment_4['start_sentence_index']=indices[0]
fragment_4['end_sentence_index']=indices[1]
fragment_4['media_offset'] = 0
fragment_4['media_index'] = 0
fragment_4['testimony_id'] = indices[2]
fragments['fragments'].append(fragment_4)


http://localhost:8080/blacklab-server-2.1.0/lts/hits?patt=%5B%22they%22%5D+%5B%22picked%22%5D+%5B%22out%22%5D+%5B%22six%22%5D+%5B%22men%22%5D+%5B%22from%22%5D+%5B%22our%22%5D+%5B%22ranks%22%5D+%5B%22from%22%5D+%5B%22the%22%5D+%5B%22work%22%5D+%5B%5D+%5B%22and%22%5D+%5B%22they%22%5D+%5B%22were%22%5D+%5B%22crying%22%5D+%5B%22and%22%5D+%5B%22begging%22%5D+%5B%22for%22%5D+%5B%22their%22%5D+%5B%22life%22%5D+%5B%5D&waitfortotal=true&outputformat=json&prettyprint=no&wordsaroundhit=0
{'left': '', 'match_word': 'they picked out six men from our ranks from the work , and they were crying and begging for their life . ', 'right': '', 'complete_match': 'they picked out six men from our ranks from the work , and they were crying and begging for their life . ', 'testimony_id': 'irn504760', 'shelfmark': ['USHMM RG-50.030*0276'], 'token_start': 6925, 'token_end': 6947}


### 1.  

In [87]:
lemmas = ["day","cry"]

In [88]:
query = create_contextual_query(lemmas,context_length=25)
print (query)

([lemma="day"][]{0,25}[lemma="cry"])|([lemma="cry"][]{0,25}[lemma="day"])


In [89]:
domain_term = "day and night"

In [90]:
fragments = {}
fragments['main_node'] = main_node
fragments['mid_node'] = domain_term
fragments['fragments'] = []

In [91]:
fragment_1 = {}
fragment_1['original_sentence'] = "I was crying day and night because I saw myself finish"
fragment_1['label']="(..) I was crying day and night because I saw myself finish (..)"
indices = find_sentence_id(fragment_1['original_sentence'])
fragment_1['start_sentence_index']=indices[0]
fragment_1['end_sentence_index']=indices[1]
fragment_1['media_offset'] = 0
fragment_1['media_index'] = 0
fragment_1['testimony_id'] = indices[2]
fragments['fragments'].append(fragment_1)

http://localhost:8080/blacklab-server-2.1.0/lts/hits?patt=%5B%22I%22%5D+%5B%22was%22%5D+%5B%22crying%22%5D+%5B%22day%22%5D+%5B%22and%22%5D+%5B%22night%22%5D+%5B%22because%22%5D+%5B%22I%22%5D+%5B%22saw%22%5D+%5B%22myself%22%5D+%5B%22finish%22%5D&waitfortotal=true&outputformat=json&prettyprint=no&wordsaroundhit=0
{'left': '', 'match_word': 'I was crying day and night because I saw myself finish ', 'right': '', 'complete_match': 'I was crying day and night because I saw myself finish ', 'testimony_id': 'usc_shoah_13096', 'shelfmark': ['USC 13096'], 'token_start': 3528, 'token_end': 3539}


In [92]:
fragment_2 = {}
fragment_2['original_sentence'] = "All day and night, day and night she was crying why she left them."
fragment_2['label']="All day and night, day and night she was crying why she left them."
indices = find_sentence_id(fragment_2['original_sentence'])
fragment_2['start_sentence_index']=indices[0]
fragment_2['end_sentence_index']=indices[1]
fragment_2['media_offset'] = 0
fragment_2['media_index'] = 0
fragment_2['testimony_id'] = indices[2]
fragments['fragments'].append(fragment_2)


http://localhost:8080/blacklab-server-2.1.0/lts/hits?patt=%5B%22All%22%5D+%5B%22day%22%5D+%5B%22and%22%5D+%5B%22night%22%5D+%5B%5D+%5B%22day%22%5D+%5B%22and%22%5D+%5B%22night%22%5D+%5B%22she%22%5D+%5B%22was%22%5D+%5B%22crying%22%5D+%5B%22why%22%5D+%5B%22she%22%5D+%5B%22left%22%5D+%5B%22them%22%5D+%5B%5D&waitfortotal=true&outputformat=json&prettyprint=no&wordsaroundhit=0
{'left': '', 'match_word': 'All day and night , day and night she was crying why she left them . ', 'right': '', 'complete_match': 'All day and night , day and night she was crying why she left them . ', 'testimony_id': 'HVT-156', 'shelfmark': ['Fortunoff HVT-156'], 'token_start': 8756, 'token_end': 8772}


In [93]:
fragment_3 = {}
fragment_3['original_sentence'] = "We just cried day and night. wh-- where my son is. I wasn't sure if he's alive or not."
fragment_3['label']="We just cried day and night. wh-- where my son is. I wasn't sure if he's alive or not."
indices = find_sentence_id(fragment_3['original_sentence'])
fragment_3['start_sentence_index']=indices[0]
fragment_3['end_sentence_index']=indices[1]
fragment_3['media_offset'] = 0
fragment_3['media_index'] = 0
fragment_3['testimony_id'] = indices[2]
fragments['fragments'].append(fragment_3)

http://localhost:8080/blacklab-server-2.1.0/lts/hits?patt=%5B%22We%22%5D+%5B%22just%22%5D+%5B%22cried%22%5D+%5B%22day%22%5D+%5B%22and%22%5D+%5B%22night%22%5D+%5B%5D+%5B%22wh%22%5D+%5B%5D%7B0%2C3%7D+%5B%22where%22%5D+%5B%22my%22%5D+%5B%22son%22%5D+%5B%22is%22%5D+%5B%5D+%5B%22I%22%5D+%5B%22was%22%5D+%5B%5D+%5B%22sure%22%5D+%5B%22if%22%5D+%5B%22he%22%5D+%5B%5D+%5B%22alive%22%5D+%5B%22or%22%5D+%5B%22not%22%5D+%5B%5D&waitfortotal=true&outputformat=json&prettyprint=no&wordsaroundhit=0
{'left': '', 'match_word': "We just cried day and night . wh -- where my son is . I was n't sure if he 's alive or not . ", 'right': '', 'complete_match': "We just cried day and night . wh -- where my son is . I was n't sure if he 's alive or not . ", 'testimony_id': 'HVT-34', 'shelfmark': ['Fortunoff HVT-34'], 'token_start': 4417, 'token_end': 4442}


In [94]:
fragment_4 = {}
fragment_4['original_sentence'] = "I got my period 10 days before I should have. And then did not have it anymore for two years. And I cried for two weeks, constantly, day and night."
fragment_4['label']= "I got my period 10 days before I should have. And then did not have it anymore for two years. And I cried for two weeks, constantly, day and night."
indices = find_sentence_id(fragment_4['original_sentence'])
fragment_4['start_sentence_index']=indices[0]
fragment_4['end_sentence_index']=indices[1]
fragment_4['media_offset'] = 0
fragment_4['media_index'] = 0
fragment_4['testimony_id'] = indices[2]
fragments['fragments'].append(fragment_4)


http://localhost:8080/blacklab-server-2.1.0/lts/hits?patt=%5B%22I%22%5D+%5B%22got%22%5D+%5B%22my%22%5D+%5B%22period%22%5D+%5B%2210%22%5D+%5B%22days%22%5D+%5B%22before%22%5D+%5B%22I%22%5D+%5B%22should%22%5D+%5B%22have%22%5D+%5B%5D+%5B%22And%22%5D+%5B%22then%22%5D+%5B%22did%22%5D+%5B%22not%22%5D+%5B%22have%22%5D+%5B%22it%22%5D+%5B%22anymore%22%5D+%5B%22for%22%5D+%5B%22two%22%5D+%5B%22years%22%5D+%5B%5D+%5B%22And%22%5D+%5B%22I%22%5D+%5B%22cried%22%5D+%5B%22for%22%5D+%5B%22two%22%5D+%5B%22weeks%22%5D+%5B%5D+%5B%22constantly%22%5D+%5B%5D+%5B%22day%22%5D+%5B%22and%22%5D+%5B%22night%22%5D+%5B%5D&waitfortotal=true&outputformat=json&prettyprint=no&wordsaroundhit=0
{'left': '', 'match_word': 'I got my period 10 days before I should have . And then did not have it anymore for two years . And I cried for two weeks , constantly , day and night . ', 'right': '', 'complete_match': 'I got my period 10 days before I should have . And then did not have it anymore for two years . And I cried for two week

In [95]:
fragment_5 = {}
fragment_5['original_sentence'] = "And I was crying, day and night. Little bugs everywhere"
fragment_5['label']= "And I was crying, day and night. Little bugs everywhere"
indices = find_sentence_id(fragment_5['original_sentence'])
fragment_5['start_sentence_index']=indices[0]
fragment_5['end_sentence_index']=indices[1]
fragment_5['media_offset'] = 0
fragment_5['media_index'] = 0
fragment_5['testimony_id'] = indices[2]
fragments['fragments'].append(fragment_5)

http://localhost:8080/blacklab-server-2.1.0/lts/hits?patt=%5B%22And%22%5D+%5B%22I%22%5D+%5B%22was%22%5D+%5B%22crying%22%5D+%5B%5D+%5B%22day%22%5D+%5B%22and%22%5D+%5B%22night%22%5D+%5B%5D+%5B%22Little%22%5D+%5B%22bugs%22%5D+%5B%22everywhere%22%5D&waitfortotal=true&outputformat=json&prettyprint=no&wordsaroundhit=0
{'left': '', 'match_word': 'And I was crying , day and night . Little bugs everywhere ', 'right': '', 'complete_match': 'And I was crying , day and night . Little bugs everywhere ', 'testimony_id': 'usc_shoah_26888', 'shelfmark': ['USC 26888'], 'token_start': 23714, 'token_end': 23726}


In [96]:
add_testimonial_fragments(fragments)

### 1.  

In [97]:
lemmas = ["cry","God"]

In [98]:
query = create_contextual_query(lemmas,context_length=25)
print (query)

([lemma="cry"][]{0,25}[lemma="God"])|([lemma="God"][]{0,25}[lemma="cry"])


In [99]:
domain_term = "God"

In [100]:
fragments = {}
fragments['main_node'] = main_node
fragments['mid_node'] = domain_term
fragments['fragments'] = []

In [101]:
fragment_1 = {}
fragment_1['original_sentence'] = "The cries, the screams to God, the um, the mothers walked around with the pillows, with the blankets of the children"
fragment_1['label']="The cries, the screams to God, the um, the mothers walked around with the pillows, with the blankets of the children"
indices = find_sentence_id(fragment_1['original_sentence'])
fragment_1['start_sentence_index']=indices[0]
fragment_1['end_sentence_index']=indices[1]
fragment_1['media_offset'] = 0
fragment_1['media_index'] = 0
fragment_1['testimony_id'] = indices[2]
fragments['fragments'].append(fragment_1)

http://localhost:8080/blacklab-server-2.1.0/lts/hits?patt=%5B%22The%22%5D+%5B%22cries%22%5D+%5B%5D+%5B%22the%22%5D+%5B%22screams%22%5D+%5B%22to%22%5D+%5B%22God%22%5D+%5B%5D+%5B%22the%22%5D+%5B%22um%22%5D+%5B%5D+%5B%22the%22%5D+%5B%22mothers%22%5D+%5B%22walked%22%5D+%5B%22around%22%5D+%5B%22with%22%5D+%5B%22the%22%5D+%5B%22pillows%22%5D+%5B%5D+%5B%22with%22%5D+%5B%22the%22%5D+%5B%22blankets%22%5D+%5B%22of%22%5D+%5B%22the%22%5D+%5B%22children%22%5D&waitfortotal=true&outputformat=json&prettyprint=no&wordsaroundhit=0
{'left': '', 'match_word': 'The cries , the screams to God , the um , the mothers walked around with the pillows , with the blankets of the children ', 'right': '', 'complete_match': 'The cries , the screams to God , the um , the mothers walked around with the pillows , with the blankets of the children ', 'testimony_id': 'irn505572', 'shelfmark': ['USHMM RG-50.042*0019'], 'token_start': 4282, 'token_end': 4307}


In [102]:
fragment_2 = {}
fragment_2['original_sentence'] = "They knew that...those crying, those praying, praying and scream to God."
fragment_2['label']="They knew that...those crying, those praying, praying and scream to God."
indices = find_sentence_id(fragment_2['original_sentence'])
fragment_2['start_sentence_index']=indices[0]
fragment_2['end_sentence_index']=indices[1]
fragment_2['media_offset'] = 0
fragment_2['media_index'] = 0
fragment_2['testimony_id'] = indices[2]
fragments['fragments'].append(fragment_2)


http://localhost:8080/blacklab-server-2.1.0/lts/hits?patt=%5B%22They%22%5D+%5B%22knew%22%5D+%5B%22that%22%5D+%5B%5D%7B0%2C50%7D+%5B%22those%22%5D+%5B%22crying%22%5D+%5B%5D+%5B%22those%22%5D+%5B%22praying%22%5D+%5B%5D+%5B%22praying%22%5D+%5B%22and%22%5D+%5B%22scream%22%5D+%5B%22to%22%5D+%5B%22God%22%5D+%5B%5D&waitfortotal=true&outputformat=json&prettyprint=no&wordsaroundhit=0
{'left': '', 'match_word': 'They knew that ... those crying , those praying , praying and scream to God . ', 'right': '', 'complete_match': 'They knew that ... those crying , those praying , praying and scream to God . ', 'testimony_id': 'irn504592', 'shelfmark': ['USHMM RG-50.030*0098'], 'token_start': 2938, 'token_end': 2954}


In [103]:
fragment_3 = {}
fragment_3['original_sentence'] = "I remember it was the first time in my life I seen him cry and talk to God."
fragment_3['label']="I remember it was the first time in my life I seen him cry and talk to God."
indices = find_sentence_id(fragment_3['original_sentence'])
fragment_3['start_sentence_index']=indices[0]
fragment_3['end_sentence_index']=indices[1]
fragment_3['media_offset'] = 0
fragment_3['media_index'] = 0
fragment_3['testimony_id'] = indices[2]
fragments['fragments'].append(fragment_3)

http://localhost:8080/blacklab-server-2.1.0/lts/hits?patt=%5B%22I%22%5D+%5B%22remember%22%5D+%5B%22it%22%5D+%5B%22was%22%5D+%5B%22the%22%5D+%5B%22first%22%5D+%5B%22time%22%5D+%5B%22in%22%5D+%5B%22my%22%5D+%5B%22life%22%5D+%5B%22I%22%5D+%5B%22seen%22%5D+%5B%22him%22%5D+%5B%22cry%22%5D+%5B%22and%22%5D+%5B%22talk%22%5D+%5B%22to%22%5D+%5B%22God%22%5D+%5B%5D&waitfortotal=true&outputformat=json&prettyprint=no&wordsaroundhit=0
{'left': '', 'match_word': 'I remember it was the first time in my life I seen him cry and talk to God . ', 'right': '', 'complete_match': 'I remember it was the first time in my life I seen him cry and talk to God . ', 'testimony_id': 'irn508686', 'shelfmark': ['USHMM RG-50.462*0062'], 'token_start': 17692, 'token_end': 17711}


In [104]:
fragment_4 = {}
fragment_4['original_sentence'] = " There was this guy, he was screaming and crying and"
fragment_4['label']= " There was this guy, he was screaming and crying and, ah, you know? God save me and all that."
indices = find_sentence_id(fragment_4['original_sentence'])
fragment_4['start_sentence_index']=indices[0]
fragment_4['end_sentence_index']=indices[1]
fragment_4['media_offset'] = 0
fragment_4['media_index'] = 0
fragment_4['testimony_id'] = indices[2]
fragments['fragments'].append(fragment_4)


http://localhost:8080/blacklab-server-2.1.0/lts/hits?patt=%5B%22There%22%5D+%5B%22was%22%5D+%5B%22this%22%5D+%5B%22guy%22%5D+%5B%5D+%5B%22he%22%5D+%5B%22was%22%5D+%5B%22screaming%22%5D+%5B%22and%22%5D+%5B%22crying%22%5D+%5B%22and%22%5D&waitfortotal=true&outputformat=json&prettyprint=no&wordsaroundhit=0
{'left': '', 'match_word': 'There was this guy , he was screaming and crying and ', 'right': '', 'complete_match': 'There was this guy , he was screaming and crying and ', 'testimony_id': 'usc_shoah_2391', 'shelfmark': ['USC 2391'], 'token_start': 9879, 'token_end': 9890}


In [105]:
fragment_5 = {}
fragment_5['original_sentence'] = "a lot of the people was injured, crying. So what are you going to do? Crying. Crying to God."
fragment_5['label']= "(..) a lot of the people was injured, crying. So what are you going to do? Crying. Crying to God."
indices = find_sentence_id(fragment_5['original_sentence'])
fragment_5['start_sentence_index']=indices[0]
fragment_5['end_sentence_index']=indices[1]
fragment_5['media_offset'] = 0
fragment_5['media_index'] = 0
fragment_5['testimony_id'] = indices[2]
fragments['fragments'].append(fragment_5)

http://localhost:8080/blacklab-server-2.1.0/lts/hits?patt=%5B%22a%22%5D+%5B%22lot%22%5D+%5B%22of%22%5D+%5B%22the%22%5D+%5B%22people%22%5D+%5B%22was%22%5D+%5B%22injured%22%5D+%5B%5D+%5B%22crying%22%5D+%5B%5D+%5B%22So%22%5D+%5B%22what%22%5D+%5B%22are%22%5D+%5B%22you%22%5D+%5B%22going%22%5D+%5B%22to%22%5D+%5B%22do%22%5D+%5B%5D+%5B%22Crying%22%5D+%5B%5D+%5B%22Crying%22%5D+%5B%22to%22%5D+%5B%22God%22%5D+%5B%5D&waitfortotal=true&outputformat=json&prettyprint=no&wordsaroundhit=0
{'left': '', 'match_word': 'a lot of the people was injured , crying . So what are you going to do ? Crying . Crying to God . ', 'right': '', 'complete_match': 'a lot of the people was injured , crying . So what are you going to do ? Crying . Crying to God . ', 'testimony_id': 'usc_shoah_3653', 'shelfmark': ['USC 3653'], 'token_start': 5084, 'token_end': 5108}


In [106]:
add_testimonial_fragments(fragments)

### 7.  

In [107]:
lemmas = ["pray","cry"]

In [108]:
query = create_contextual_query(lemmas,context_length=25)
print (query)

([lemma="pray"][]{0,25}[lemma="cry"])|([lemma="cry"][]{0,25}[lemma="pray"])


In [109]:
domain_term = "pray"

In [110]:
fragments = {}
fragments['main_node'] = main_node
fragments['mid_node'] = domain_term
fragments['fragments'] = []

In [111]:
fragment_1 = {}
fragment_1['original_sentence'] = "We cried. All of us cried. And then this night, we als-- made a minion. We prayed and we said Kaddish."
fragment_1['label']="We cried. All of us cried. (..) We prayed and we said Kaddish."
indices = find_sentence_id(fragment_1['original_sentence'])
fragment_1['start_sentence_index']=indices[0]
fragment_1['end_sentence_index']=indices[1]
fragment_1['media_offset'] = 0
fragment_1['media_index'] = 0
fragment_1['testimony_id'] = indices[2]
fragments['fragments'].append(fragment_1)

http://localhost:8080/blacklab-server-2.1.0/lts/hits?patt=%5B%22We%22%5D+%5B%22cried%22%5D+%5B%5D+%5B%22All%22%5D+%5B%22of%22%5D+%5B%22us%22%5D+%5B%22cried%22%5D+%5B%5D+%5B%22And%22%5D+%5B%22then%22%5D+%5B%22this%22%5D+%5B%22night%22%5D+%5B%5D+%5B%22we%22%5D+%5B%22als%22%5D+%5B%5D%7B0%2C3%7D+%5B%22made%22%5D+%5B%22a%22%5D+%5B%22minion%22%5D+%5B%5D+%5B%22We%22%5D+%5B%22prayed%22%5D+%5B%22and%22%5D+%5B%22we%22%5D+%5B%22said%22%5D+%5B%22Kaddish%22%5D+%5B%5D&waitfortotal=true&outputformat=json&prettyprint=no&wordsaroundhit=0
{'left': '', 'match_word': 'We cried . All of us cried . And then this night , we als -- made a minion . We prayed and we said Kaddish . ', 'right': '', 'complete_match': 'We cried . All of us cried . And then this night , we als -- made a minion . We prayed and we said Kaddish . ', 'testimony_id': 'HVT-172', 'shelfmark': ['Fortunoff HVT-172'], 'token_start': 7472, 'token_end': 7499}


In [112]:
fragment_2 = {}
fragment_2['original_sentence'] = "There was only one, two small windows on the top, everything else was closed. They were crying. They were praying."
fragment_2['label']=" There was only one, two small windows on the top, everything else was closed. They were crying. They were praying."
indices = find_sentence_id(fragment_2['original_sentence'])
fragment_2['start_sentence_index']=indices[0]
fragment_2['end_sentence_index']=indices[1]
fragment_2['media_offset'] = 0
fragment_2['media_index'] = 0
fragment_2['testimony_id'] = indices[2]
fragments['fragments'].append(fragment_2)


http://localhost:8080/blacklab-server-2.1.0/lts/hits?patt=%5B%22There%22%5D+%5B%22was%22%5D+%5B%22only%22%5D+%5B%22one%22%5D+%5B%5D+%5B%22two%22%5D+%5B%22small%22%5D+%5B%22windows%22%5D+%5B%22on%22%5D+%5B%22the%22%5D+%5B%22top%22%5D+%5B%5D+%5B%22everything%22%5D+%5B%22else%22%5D+%5B%22was%22%5D+%5B%22closed%22%5D+%5B%5D+%5B%22They%22%5D+%5B%22were%22%5D+%5B%22crying%22%5D+%5B%5D+%5B%22They%22%5D+%5B%22were%22%5D+%5B%22praying%22%5D+%5B%5D&waitfortotal=true&outputformat=json&prettyprint=no&wordsaroundhit=0
{'left': '', 'match_word': 'There was only one , two small windows on the top , everything else was closed . They were crying . They were praying . ', 'right': '', 'complete_match': 'There was only one , two small windows on the top , everything else was closed . They were crying . They were praying . ', 'testimony_id': 'HVT-93', 'shelfmark': ['Fortunoff HVT-93'], 'token_start': 12348, 'token_end': 12373}


In [113]:
fragment_3 = {}
fragment_3['original_sentence'] = "I laid down on the bunk and cried, you know. And I only prayed one thing. I prayed now-- I prayed to God to take my life."
fragment_3['label']="I laid down on the bunk and cried, you know. And I only prayed one thing. (..) I prayed to God to take my life."
indices = find_sentence_id(fragment_3['original_sentence'])
fragment_3['start_sentence_index']=indices[0]
fragment_3['end_sentence_index']=indices[1]
fragment_3['media_offset'] = 0
fragment_3['media_index'] = 0
fragment_3['testimony_id'] = indices[2]
fragments['fragments'].append(fragment_3)

http://localhost:8080/blacklab-server-2.1.0/lts/hits?patt=%5B%22I%22%5D+%5B%22laid%22%5D+%5B%22down%22%5D+%5B%22on%22%5D+%5B%22the%22%5D+%5B%22bunk%22%5D+%5B%22and%22%5D+%5B%22cried%22%5D+%5B%5D+%5B%22you%22%5D+%5B%22know%22%5D+%5B%5D+%5B%22And%22%5D+%5B%22I%22%5D+%5B%22only%22%5D+%5B%22prayed%22%5D+%5B%22one%22%5D+%5B%22thing%22%5D+%5B%5D+%5B%22I%22%5D+%5B%22prayed%22%5D+%5B%22now%22%5D+%5B%5D%7B0%2C3%7D+%5B%22I%22%5D+%5B%22prayed%22%5D+%5B%22to%22%5D+%5B%22God%22%5D+%5B%22to%22%5D+%5B%22take%22%5D+%5B%22my%22%5D+%5B%22life%22%5D+%5B%5D&waitfortotal=true&outputformat=json&prettyprint=no&wordsaroundhit=0
{'left': '', 'match_word': 'I laid down on the bunk and cried , you know . And I only prayed one thing . I prayed now -- I prayed to God to take my life . ', 'right': '', 'complete_match': 'I laid down on the bunk and cried , you know . And I only prayed one thing . I prayed now -- I prayed to God to take my life . ', 'testimony_id': 'usc_shoah_11797', 'shelfmark': ['USC 11797'], 'toke

In [114]:
fragment_4 = {}
fragment_4['original_sentence'] = "I remember the seder table was nice, and my father was praying, and we was crying with my mother"
fragment_4['label']= "(..)I remember the seder table was nice, and my father was praying, and we was crying with my mother."
indices = find_sentence_id(fragment_4['original_sentence'])
fragment_4['start_sentence_index']=indices[0]
fragment_4['end_sentence_index']=indices[1]
fragment_4['media_offset'] = 0
fragment_4['media_index'] = 0
fragment_4['testimony_id'] = indices[2]
fragments['fragments'].append(fragment_4)


http://localhost:8080/blacklab-server-2.1.0/lts/hits?patt=%5B%22I%22%5D+%5B%22remember%22%5D+%5B%22the%22%5D+%5B%22seder%22%5D+%5B%22table%22%5D+%5B%22was%22%5D+%5B%22nice%22%5D+%5B%5D+%5B%22and%22%5D+%5B%22my%22%5D+%5B%22father%22%5D+%5B%22was%22%5D+%5B%22praying%22%5D+%5B%5D+%5B%22and%22%5D+%5B%22we%22%5D+%5B%22was%22%5D+%5B%22crying%22%5D+%5B%22with%22%5D+%5B%22my%22%5D+%5B%22mother%22%5D&waitfortotal=true&outputformat=json&prettyprint=no&wordsaroundhit=0
{'left': '', 'match_word': 'I remember the seder table was nice , and my father was praying , and we was crying with my mother ', 'right': '', 'complete_match': 'I remember the seder table was nice , and my father was praying , and we was crying with my mother ', 'testimony_id': 'irn510676', 'shelfmark': ['USHMM RG-50.156*0022'], 'token_start': 1287, 'token_end': 1308}


In [115]:
fragment_5 = {}
fragment_5['original_sentence'] = "And pray, and – oh, and cry. We cry. One cry, the other starts."
fragment_5['label']= "And pray, and – oh, and cry. We cry. One cry, the other starts."
indices = find_sentence_id(fragment_5['original_sentence'])
fragment_5['start_sentence_index']=indices[0]
fragment_5['end_sentence_index']=indices[1]
fragment_5['media_offset'] = 0
fragment_5['media_index'] = 0
fragment_5['testimony_id'] = indices[2]
fragments['fragments'].append(fragment_5)

http://localhost:8080/blacklab-server-2.1.0/lts/hits?patt=%5B%22And%22%5D+%5B%22pray%22%5D+%5B%5D+%5B%22and%22%5D+%5B%5D%7B0%2C3%7D+%5B%22oh%22%5D+%5B%5D+%5B%22and%22%5D+%5B%22cry%22%5D+%5B%5D+%5B%22We%22%5D+%5B%22cry%22%5D+%5B%5D+%5B%22One%22%5D+%5B%22cry%22%5D+%5B%5D+%5B%22the%22%5D+%5B%22other%22%5D+%5B%22starts%22%5D+%5B%5D&waitfortotal=true&outputformat=json&prettyprint=no&wordsaroundhit=0
{'left': '', 'match_word': 'And pray , and – oh , and cry . We cry . One cry , the other starts . ', 'right': '', 'complete_match': 'And pray , and – oh , and cry . We cry . One cry , the other starts . ', 'testimony_id': 'irn96024', 'shelfmark': ['USHMM RG-50.030*0790'], 'token_start': 22789, 'token_end': 22809}


In [116]:
add_testimonial_fragments(fragments)

### 8.  

In [117]:
lemmas = ["separate","cry"]

In [118]:
query = create_contextual_query(lemmas,context_length=25)
print (query)

([lemma="separate"][]{0,25}[lemma="cry"])|([lemma="cry"][]{0,25}[lemma="separate"])


In [119]:
domain_term = "separation"

In [120]:
fragments = {}
fragments['main_node'] = main_node
fragments['mid_node'] = domain_term
fragments['fragments'] = []

In [121]:
fragment_1 = {}
fragment_1['original_sentence'] = "Or sometimes the even mothers with children were separated from their husbands. And the crying and despair was, it was just devastating."
fragment_1['label']="Or sometimes the even mothers with children were separated from their husbands. And the crying and despair was, it was just devastating."
indices = find_sentence_id(fragment_1['original_sentence'])
fragment_1['start_sentence_index']=indices[0]
fragment_1['end_sentence_index']=indices[1]
fragment_1['media_offset'] = 0
fragment_1['media_index'] = 0
fragment_1['testimony_id'] = indices[2]
fragments['fragments'].append(fragment_1)

http://localhost:8080/blacklab-server-2.1.0/lts/hits?patt=%5B%22Or%22%5D+%5B%22sometimes%22%5D+%5B%22the%22%5D+%5B%22even%22%5D+%5B%22mothers%22%5D+%5B%22with%22%5D+%5B%22children%22%5D+%5B%22were%22%5D+%5B%22separated%22%5D+%5B%22from%22%5D+%5B%22their%22%5D+%5B%22husbands%22%5D+%5B%5D+%5B%22And%22%5D+%5B%22the%22%5D+%5B%22crying%22%5D+%5B%22and%22%5D+%5B%22despair%22%5D+%5B%22was%22%5D+%5B%5D+%5B%22it%22%5D+%5B%22was%22%5D+%5B%22just%22%5D+%5B%22devastating%22%5D+%5B%5D&waitfortotal=true&outputformat=json&prettyprint=no&wordsaroundhit=0
{'left': '', 'match_word': 'Or sometimes the even mothers with children were separated from their husbands . And the crying and despair was , it was just devastating . ', 'right': '', 'complete_match': 'Or sometimes the even mothers with children were separated from their husbands . And the crying and despair was , it was just devastating . ', 'testimony_id': 'irn511020', 'shelfmark': ['USHMM RG-50.471*0003'], 'token_start': 4278, 'token_end': 4303}


In [122]:
fragment_2 = {}
fragment_2['original_sentence'] = "and the little babies and even grown up women and mothers cried as they were separated,"
fragment_2['label']="(..) and the little babies and even grown up women and mothers cried as they were separated (..)"
indices = find_sentence_id(fragment_2['original_sentence'])
fragment_2['start_sentence_index']=indices[0]
fragment_2['end_sentence_index']=indices[1]
fragment_2['media_offset'] = 0
fragment_2['media_index'] = 0
fragment_2['testimony_id'] = indices[2]
fragments['fragments'].append(fragment_2)


http://localhost:8080/blacklab-server-2.1.0/lts/hits?patt=%5B%22and%22%5D+%5B%22the%22%5D+%5B%22little%22%5D+%5B%22babies%22%5D+%5B%22and%22%5D+%5B%22even%22%5D+%5B%22grown%22%5D+%5B%22up%22%5D+%5B%22women%22%5D+%5B%22and%22%5D+%5B%22mothers%22%5D+%5B%22cried%22%5D+%5B%22as%22%5D+%5B%22they%22%5D+%5B%22were%22%5D+%5B%22separated%22%5D+%5B%5D&waitfortotal=true&outputformat=json&prettyprint=no&wordsaroundhit=0
{'left': '', 'match_word': 'and the little babies and even grown up women and mothers cried as they were separated , ', 'right': '', 'complete_match': 'and the little babies and even grown up women and mothers cried as they were separated , ', 'testimony_id': 'irn515647', 'shelfmark': ['USHMM RG-50.462*0122'], 'token_start': 277, 'token_end': 294}


In [123]:
fragment_3 = {}
fragment_3['original_sentence'] = "And this is also where I've seen mothers with children being separated. And again the chaos was terrible, the cries and the lamenting."
fragment_3['label']="And this is also where I've seen mothers with children being separated. And again the chaos was terrible, the cries and the lamenting."
indices = find_sentence_id(fragment_3['original_sentence'])
fragment_3['start_sentence_index']=indices[0]
fragment_3['end_sentence_index']=indices[1]
fragment_3['media_offset'] = 0
fragment_3['media_index'] = 0
fragment_3['testimony_id'] = indices[2]
fragments['fragments'].append(fragment_3)

http://localhost:8080/blacklab-server-2.1.0/lts/hits?patt=%5B%22And%22%5D+%5B%22this%22%5D+%5B%22is%22%5D+%5B%22also%22%5D+%5B%22where%22%5D+%5B%22I%22%5D+%5B%22%27ve%22%5D+%5B%22seen%22%5D+%5B%22mothers%22%5D+%5B%22with%22%5D+%5B%22children%22%5D+%5B%22being%22%5D+%5B%22separated%22%5D+%5B%5D+%5B%22And%22%5D+%5B%22again%22%5D+%5B%22the%22%5D+%5B%22chaos%22%5D+%5B%22was%22%5D+%5B%22terrible%22%5D+%5B%5D+%5B%22the%22%5D+%5B%22cries%22%5D+%5B%22and%22%5D+%5B%22the%22%5D+%5B%22lamenting%22%5D+%5B%5D&waitfortotal=true&outputformat=json&prettyprint=no&wordsaroundhit=0
{'left': '', 'match_word': "And this is also where I 've seen mothers with children being separated . And again the chaos was terrible , the cries and the lamenting . ", 'right': '', 'complete_match': "And this is also where I 've seen mothers with children being separated . And again the chaos was terrible , the cries and the lamenting . ", 'testimony_id': 'usc_shoah_12003', 'shelfmark': ['USC 12003'], 'token_start': 6880, 't

In [124]:
fragment_4 = {}
fragment_4['original_sentence'] = "And [DRINKS] I was crying because now the last person that I had was taken away from me."
fragment_4['label']= "And [DRINKS] I was crying because now the last person that I had was taken away from me."
indices = find_sentence_id(fragment_4['original_sentence'])
fragment_4['start_sentence_index']=indices[0]
fragment_4['end_sentence_index']=indices[1]
fragment_4['media_offset'] = 0
fragment_4['media_index'] = 0
fragment_4['testimony_id'] = indices[2]
fragments['fragments'].append(fragment_4)


http://localhost:8080/blacklab-server-2.1.0/lts/hits?patt=%5B%22And%22%5D+%5B%5D%7B0%2C3%7D+%5B%22DRINKS%22%5D+%5B%5D%7B0%2C3%7D+%5B%22I%22%5D+%5B%22was%22%5D+%5B%22crying%22%5D+%5B%22because%22%5D+%5B%22now%22%5D+%5B%22the%22%5D+%5B%22last%22%5D+%5B%22person%22%5D+%5B%22that%22%5D+%5B%22I%22%5D+%5B%22had%22%5D+%5B%22was%22%5D+%5B%22taken%22%5D+%5B%22away%22%5D+%5B%22from%22%5D+%5B%22me%22%5D+%5B%5D&waitfortotal=true&outputformat=json&prettyprint=no&wordsaroundhit=0
{'left': '', 'match_word': 'And [ DRINKS ] I was crying because now the last person that I had was taken away from me . ', 'right': '', 'complete_match': 'And [ DRINKS ] I was crying because now the last person that I had was taken away from me . ', 'testimony_id': 'usc_shoah_8423', 'shelfmark': ['USC 8423'], 'token_start': 21260, 'token_end': 21281}


In [125]:
fragment_5 = {}
fragment_5['original_sentence'] = "She said that she had only cried once before and actually was a very controlled person. And that was when we were separated from the children."
fragment_5['label']= "(..)she had only cried once before (..). And that was when we were separated from the children."
indices = find_sentence_id(fragment_5['original_sentence'])
fragment_5['start_sentence_index']=indices[0]
fragment_5['end_sentence_index']=indices[1]
fragment_5['media_offset'] = 0
fragment_5['media_index'] = 0
fragment_5['testimony_id'] = indices[2]
fragments['fragments'].append(fragment_5)

http://localhost:8080/blacklab-server-2.1.0/lts/hits?patt=%5B%22She%22%5D+%5B%22said%22%5D+%5B%22that%22%5D+%5B%22she%22%5D+%5B%22had%22%5D+%5B%22only%22%5D+%5B%22cried%22%5D+%5B%22once%22%5D+%5B%22before%22%5D+%5B%22and%22%5D+%5B%22actually%22%5D+%5B%22was%22%5D+%5B%22a%22%5D+%5B%22very%22%5D+%5B%22controlled%22%5D+%5B%22person%22%5D+%5B%5D+%5B%22And%22%5D+%5B%22that%22%5D+%5B%22was%22%5D+%5B%22when%22%5D+%5B%22we%22%5D+%5B%22were%22%5D+%5B%22separated%22%5D+%5B%22from%22%5D+%5B%22the%22%5D+%5B%22children%22%5D+%5B%5D&waitfortotal=true&outputformat=json&prettyprint=no&wordsaroundhit=0
{'left': '', 'match_word': 'She said that she had only cried once before and actually was a very controlled person . And that was when we were separated from the children . ', 'right': '', 'complete_match': 'She said that she had only cried once before and actually was a very controlled person . And that was when we were separated from the children . ', 'testimony_id': 'usc_shoah_7598', 'shelfmark': ['US

In [126]:
add_testimonial_fragments(fragments)

### 9.  

In [127]:
lemmas = ["cry","bread"]

In [128]:
query = create_contextual_query(lemmas,context_length=25)
print (query)

([lemma="cry"][]{0,25}[lemma="bread"])|([lemma="bread"][]{0,25}[lemma="cry"])


In [129]:
domain_term = "bread"

In [130]:
fragments = {}
fragments['main_node'] = main_node
fragments['mid_node'] = domain_term
fragments['fragments'] = []

In [131]:
fragment_1 = {}
fragment_1['original_sentence'] = "what was left of that loaf of bread, chewing on it, and crying as hard as he could cry."
fragment_1['label']="(..) what was left of that loaf of bread, chewing on it, and crying as hard as he could cry."
indices = find_sentence_id(fragment_1['original_sentence'])
fragment_1['start_sentence_index']=indices[0]
fragment_1['end_sentence_index']=indices[1]
fragment_1['media_offset'] = 0
fragment_1['media_index'] = 0
fragment_1['testimony_id'] = indices[2]
fragments['fragments'].append(fragment_1)

http://localhost:8080/blacklab-server-2.1.0/lts/hits?patt=%5B%22what%22%5D+%5B%22was%22%5D+%5B%22left%22%5D+%5B%22of%22%5D+%5B%22that%22%5D+%5B%22loaf%22%5D+%5B%22of%22%5D+%5B%22bread%22%5D+%5B%5D+%5B%22chewing%22%5D+%5B%22on%22%5D+%5B%22it%22%5D+%5B%5D+%5B%22and%22%5D+%5B%22crying%22%5D+%5B%22as%22%5D+%5B%22hard%22%5D+%5B%22as%22%5D+%5B%22he%22%5D+%5B%22could%22%5D+%5B%22cry%22%5D+%5B%5D&waitfortotal=true&outputformat=json&prettyprint=no&wordsaroundhit=0
{'left': '', 'match_word': 'what was left of that loaf of bread , chewing on it , and crying as hard as he could cry . ', 'right': '', 'complete_match': 'what was left of that loaf of bread , chewing on it , and crying as hard as he could cry . ', 'testimony_id': 'usc_shoah_24814', 'shelfmark': ['USC 24814'], 'token_start': 14790, 'token_end': 14812}


In [132]:
fragment_2 = {}
fragment_2['original_sentence'] = "So we kept tearing the loaves of bread in half and giving one to each person. They kept cramming it in their mouths, crying and crying."
fragment_2['label']="So we kept tearing the loaves of bread in half and giving one to each person. They kept cramming it in their mouths, crying and crying."
indices = find_sentence_id(fragment_2['original_sentence'])
fragment_2['start_sentence_index']=indices[0]
fragment_2['end_sentence_index']=indices[1]
fragment_2['media_offset'] = 0
fragment_2['media_index'] = 0
fragment_2['testimony_id'] = indices[2]
fragments['fragments'].append(fragment_2)


http://localhost:8080/blacklab-server-2.1.0/lts/hits?patt=%5B%22So%22%5D+%5B%22we%22%5D+%5B%22kept%22%5D+%5B%22tearing%22%5D+%5B%22the%22%5D+%5B%22loaves%22%5D+%5B%22of%22%5D+%5B%22bread%22%5D+%5B%22in%22%5D+%5B%22half%22%5D+%5B%22and%22%5D+%5B%22giving%22%5D+%5B%22one%22%5D+%5B%22to%22%5D+%5B%22each%22%5D+%5B%22person%22%5D+%5B%5D+%5B%22They%22%5D+%5B%22kept%22%5D+%5B%22cramming%22%5D+%5B%22it%22%5D+%5B%22in%22%5D+%5B%22their%22%5D+%5B%22mouths%22%5D+%5B%5D+%5B%22crying%22%5D+%5B%22and%22%5D+%5B%22crying%22%5D+%5B%5D&waitfortotal=true&outputformat=json&prettyprint=no&wordsaroundhit=0
{'left': '', 'match_word': 'So we kept tearing the loaves of bread in half and giving one to each person . They kept cramming it in their mouths , crying and crying . ', 'right': '', 'complete_match': 'So we kept tearing the loaves of bread in half and giving one to each person . They kept cramming it in their mouths , crying and crying . ', 'testimony_id': 'usc_shoah_24814', 'shelfmark': ['USC 24814'], '

In [133]:
fragment_3 = {}
fragment_3['original_sentence'] = "And what I would do is, I would hold the bread for my sisters and myself to make sure that, because the little ones would eat up their bread right away and then they would cry they were hungry."
fragment_3['label']="(..) the little ones would eat up their bread right away and then they would cry they were hungry."
indices = find_sentence_id(fragment_3['original_sentence'])
fragment_3['start_sentence_index']=indices[0]
fragment_3['end_sentence_index']=indices[1]
fragment_3['media_offset'] = 0
fragment_3['media_index'] = 0
fragment_3['testimony_id'] = indices[2]
fragments['fragments'].append(fragment_3)

http://localhost:8080/blacklab-server-2.1.0/lts/hits?patt=%5B%22And%22%5D+%5B%22what%22%5D+%5B%22I%22%5D+%5B%22would%22%5D+%5B%22do%22%5D+%5B%22is%22%5D+%5B%5D+%5B%22I%22%5D+%5B%22would%22%5D+%5B%22hold%22%5D+%5B%22the%22%5D+%5B%22bread%22%5D+%5B%22for%22%5D+%5B%22my%22%5D+%5B%22sisters%22%5D+%5B%22and%22%5D+%5B%22myself%22%5D+%5B%22to%22%5D+%5B%22make%22%5D+%5B%22sure%22%5D+%5B%22that%22%5D+%5B%5D+%5B%22because%22%5D+%5B%22the%22%5D+%5B%22little%22%5D+%5B%22ones%22%5D+%5B%22would%22%5D+%5B%22eat%22%5D+%5B%22up%22%5D+%5B%22their%22%5D+%5B%22bread%22%5D+%5B%22right%22%5D+%5B%22away%22%5D+%5B%22and%22%5D+%5B%22then%22%5D+%5B%22they%22%5D+%5B%22would%22%5D+%5B%22cry%22%5D+%5B%22they%22%5D+%5B%22were%22%5D+%5B%22hungry%22%5D+%5B%5D&waitfortotal=true&outputformat=json&prettyprint=no&wordsaroundhit=0
{'left': '', 'match_word': 'And what I would do is , I would hold the bread for my sisters and myself to make sure that , because the little ones would eat up their bread right away and then the

In [134]:
fragment_4 = {}
fragment_4['original_sentence'] = "I looked around and people were on the street covered with papers, bodies, near bodies, were crying for bread."
fragment_4['label']= "I looked around and people were on the street covered with papers, bodies, near bodies, were crying for bread."
indices = find_sentence_id(fragment_4['original_sentence'])
fragment_4['start_sentence_index']=indices[0]
fragment_4['end_sentence_index']=indices[1]
fragment_4['media_offset'] = 0
fragment_4['media_index'] = 0
fragment_4['testimony_id'] = indices[2]
fragments['fragments'].append(fragment_4)


http://localhost:8080/blacklab-server-2.1.0/lts/hits?patt=%5B%22I%22%5D+%5B%22looked%22%5D+%5B%22around%22%5D+%5B%22and%22%5D+%5B%22people%22%5D+%5B%22were%22%5D+%5B%22on%22%5D+%5B%22the%22%5D+%5B%22street%22%5D+%5B%22covered%22%5D+%5B%22with%22%5D+%5B%22papers%22%5D+%5B%5D+%5B%22bodies%22%5D+%5B%5D+%5B%22near%22%5D+%5B%22bodies%22%5D+%5B%5D+%5B%22were%22%5D+%5B%22crying%22%5D+%5B%22for%22%5D+%5B%22bread%22%5D+%5B%5D&waitfortotal=true&outputformat=json&prettyprint=no&wordsaroundhit=0
{'left': '', 'match_word': 'I looked around and people were on the street covered with papers , bodies , near bodies , were crying for bread . ', 'right': '', 'complete_match': 'I looked around and people were on the street covered with papers , bodies , near bodies , were crying for bread . ', 'testimony_id': 'irn504581', 'shelfmark': ['USHMM RG-50.030*0086'], 'token_start': 1269, 'token_end': 1292}


In [135]:
add_testimonial_fragments(fragments)

### 1.  

In [136]:
lemmas = ["shoot","cry"]

In [137]:
query = create_contextual_query(lemmas,context_length=25)
print (query)

([lemma="shoot"][]{0,25}[lemma="cry"])|([lemma="cry"][]{0,25}[lemma="shoot"])


In [138]:
domain_term = "shoot"

In [139]:
fragments = {}
fragments['main_node'] = main_node
fragments['mid_node'] = domain_term
fragments['fragments'] = []

In [140]:
fragment_1 = {}
fragment_1['original_sentence'] = " and he killed the baby and the mother on the spot. Shot them on the spot."
fragment_1['label']="And the little baby was crying, and the German heard...heard the baby crying (..) Shot them on the spot."
indices = find_sentence_id(fragment_1['original_sentence'])
fragment_1['start_sentence_index']=indices[0]
fragment_1['end_sentence_index']=indices[1]
fragment_1['media_offset'] = 0
fragment_1['media_index'] = 0
fragment_1['testimony_id'] = indices[2]
fragments['fragments'].append(fragment_1)

http://localhost:8080/blacklab-server-2.1.0/lts/hits?patt=%5B%22and%22%5D+%5B%22he%22%5D+%5B%22killed%22%5D+%5B%22the%22%5D+%5B%22baby%22%5D+%5B%22and%22%5D+%5B%22the%22%5D+%5B%22mother%22%5D+%5B%22on%22%5D+%5B%22the%22%5D+%5B%22spot%22%5D+%5B%5D+%5B%22Shot%22%5D+%5B%22them%22%5D+%5B%22on%22%5D+%5B%22the%22%5D+%5B%22spot%22%5D+%5B%5D&waitfortotal=true&outputformat=json&prettyprint=no&wordsaroundhit=0
{'left': '', 'match_word': 'and he killed the baby and the mother on the spot . Shot them on the spot . ', 'right': '', 'complete_match': 'and he killed the baby and the mother on the spot . Shot them on the spot . ', 'testimony_id': 'irn504663', 'shelfmark': ['USHMM RG-50.030*0174'], 'token_start': 6296, 'token_end': 6314}


In [141]:
fragment_2 = {}
fragment_2['original_sentence'] = "You know babies was crying. Somebody picked up a baby, but it's shot right away."
fragment_2['label']="You know babies was crying. Somebody picked up a baby, but it's shot right away."
indices = find_sentence_id(fragment_2['original_sentence'])
fragment_2['start_sentence_index']=indices[0]
fragment_2['end_sentence_index']=indices[1]
fragment_2['media_offset'] = 0
fragment_2['media_index'] = 0
fragment_2['testimony_id'] = indices[2]
fragments['fragments'].append(fragment_2)


http://localhost:8080/blacklab-server-2.1.0/lts/hits?patt=%5B%22You%22%5D+%5B%22know%22%5D+%5B%22babies%22%5D+%5B%22was%22%5D+%5B%22crying%22%5D+%5B%5D+%5B%22Somebody%22%5D+%5B%22picked%22%5D+%5B%22up%22%5D+%5B%22a%22%5D+%5B%22baby%22%5D+%5B%5D+%5B%22but%22%5D+%5B%22it%22%5D+%5B%5D+%5B%22shot%22%5D+%5B%22right%22%5D+%5B%22away%22%5D+%5B%5D&waitfortotal=true&outputformat=json&prettyprint=no&wordsaroundhit=0
{'left': '', 'match_word': "You know babies was crying . Somebody picked up a baby , but it 's shot right away . ", 'right': '', 'complete_match': "You know babies was crying . Somebody picked up a baby , but it 's shot right away . ", 'testimony_id': 'irn504669', 'shelfmark': ['USHMM RG-50.030*0166'], 'token_start': 1192, 'token_end': 1211}


In [142]:
fragment_3 = {}
fragment_3['original_sentence'] = "The doctors that came right away, they took the crying people, they took guns and they shoot them and they fell in, right on the fire."
fragment_3['label']="The doctors that came right away, they took the crying people, they took guns and they shoot them and they fell in(..)"
indices = find_sentence_id(fragment_3['original_sentence'])
fragment_3['start_sentence_index']=indices[0]
fragment_3['end_sentence_index']=indices[1]
fragment_3['media_offset'] = 0
fragment_3['media_index'] = 0
fragment_3['testimony_id'] = indices[2]
fragments['fragments'].append(fragment_3)

http://localhost:8080/blacklab-server-2.1.0/lts/hits?patt=%5B%22The%22%5D+%5B%22doctors%22%5D+%5B%22that%22%5D+%5B%22came%22%5D+%5B%22right%22%5D+%5B%22away%22%5D+%5B%5D+%5B%22they%22%5D+%5B%22took%22%5D+%5B%22the%22%5D+%5B%22crying%22%5D+%5B%22people%22%5D+%5B%5D+%5B%22they%22%5D+%5B%22took%22%5D+%5B%22guns%22%5D+%5B%22and%22%5D+%5B%22they%22%5D+%5B%22shoot%22%5D+%5B%22them%22%5D+%5B%22and%22%5D+%5B%22they%22%5D+%5B%22fell%22%5D+%5B%22in%22%5D+%5B%5D+%5B%22right%22%5D+%5B%22on%22%5D+%5B%22the%22%5D+%5B%22fire%22%5D+%5B%5D&waitfortotal=true&outputformat=json&prettyprint=no&wordsaroundhit=0
{'left': '', 'match_word': 'The doctors that came right away , they took the crying people , they took guns and they shoot them and they fell in , right on the fire . ', 'right': '', 'complete_match': 'The doctors that came right away , they took the crying people , they took guns and they shoot them and they fell in , right on the fire . ', 'testimony_id': 'irn504771', 'shelfmark': ['USHMM RG-50.030

In [143]:
fragment_4 = {}
fragment_4['original_sentence'] = "when the mother started pleading with him and crying, he shot the mother on the spot."
fragment_4['label']= "(..) when the mother started pleading with him and crying, he shot the mother on the spot"
indices = find_sentence_id(fragment_4['original_sentence'])
fragment_4['start_sentence_index']=indices[0]
fragment_4['end_sentence_index']=indices[1]
fragment_4['media_offset'] = 0
fragment_4['media_index'] = 0
fragment_4['testimony_id'] = indices[2]
fragments['fragments'].append(fragment_4)


http://localhost:8080/blacklab-server-2.1.0/lts/hits?patt=%5B%22when%22%5D+%5B%22the%22%5D+%5B%22mother%22%5D+%5B%22started%22%5D+%5B%22pleading%22%5D+%5B%22with%22%5D+%5B%22him%22%5D+%5B%22and%22%5D+%5B%22crying%22%5D+%5B%5D+%5B%22he%22%5D+%5B%22shot%22%5D+%5B%22the%22%5D+%5B%22mother%22%5D+%5B%22on%22%5D+%5B%22the%22%5D+%5B%22spot%22%5D+%5B%5D&waitfortotal=true&outputformat=json&prettyprint=no&wordsaroundhit=0
{'left': '', 'match_word': 'when the mother started pleading with him and crying , he shot the mother on the spot . ', 'right': '', 'complete_match': 'when the mother started pleading with him and crying , he shot the mother on the spot . ', 'testimony_id': 'irn504712', 'shelfmark': ['USHMM RG-50.030*0220'], 'token_start': 2844, 'token_end': 2862}


In [144]:
add_testimonial_fragments(fragments)

### 1.  

In [145]:
lemmas = ["pain","cry"]

In [146]:
query = create_contextual_query(lemmas,context_length=25)
print (query)

([lemma="pain"][]{0,25}[lemma="cry"])|([lemma="cry"][]{0,25}[lemma="pain"])


In [147]:
domain_term = "pain"

In [148]:
fragments = {}
fragments['main_node'] = main_node
fragments['mid_node'] = domain_term
fragments['fragments'] = []

In [149]:
fragment_1 = {}
fragment_1['original_sentence'] = "the pain was just tearing my heart. As I was sitting in the worst circumstances and crying, and all of a sudden I could give up."
fragment_1['label']="(..) the pain was just tearing my heart. As I was sitting in the worst circumstances and crying (..)"
indices = find_sentence_id(fragment_1['original_sentence'])
fragment_1['start_sentence_index']=indices[0]
fragment_1['end_sentence_index']=indices[1]
fragment_1['media_offset'] = 0
fragment_1['media_index'] = 0
fragment_1['testimony_id'] = indices[2]
fragments['fragments'].append(fragment_1)

http://localhost:8080/blacklab-server-2.1.0/lts/hits?patt=%5B%22the%22%5D+%5B%22pain%22%5D+%5B%22was%22%5D+%5B%22just%22%5D+%5B%22tearing%22%5D+%5B%22my%22%5D+%5B%22heart%22%5D+%5B%5D+%5B%22As%22%5D+%5B%22I%22%5D+%5B%22was%22%5D+%5B%22sitting%22%5D+%5B%22in%22%5D+%5B%22the%22%5D+%5B%22worst%22%5D+%5B%22circumstances%22%5D+%5B%22and%22%5D+%5B%22crying%22%5D+%5B%5D+%5B%22and%22%5D+%5B%22all%22%5D+%5B%22of%22%5D+%5B%22a%22%5D+%5B%22sudden%22%5D+%5B%22I%22%5D+%5B%22could%22%5D+%5B%22give%22%5D+%5B%22up%22%5D+%5B%5D&waitfortotal=true&outputformat=json&prettyprint=no&wordsaroundhit=0
{'left': '', 'match_word': 'the pain was just tearing my heart . As I was sitting in the worst circumstances and crying , and all of a sudden I could give up . ', 'right': '', 'complete_match': 'the pain was just tearing my heart . As I was sitting in the worst circumstances and crying , and all of a sudden I could give up . ', 'testimony_id': 'usc_shoah_10588', 'shelfmark': ['USC 10588'], 'token_start': 24836, 

In [150]:
fragment_2 = {}
fragment_2['original_sentence'] = "My brother started to cry because he got his own pain."
fragment_2['label']="My brother started to cry because he got his own pain."
indices = find_sentence_id(fragment_2['original_sentence'])
fragment_2['start_sentence_index']=indices[0]
fragment_2['end_sentence_index']=indices[1]
fragment_2['media_offset'] = 0
fragment_2['media_index'] = 0
fragment_2['testimony_id'] = indices[2]
fragments['fragments'].append(fragment_2)


http://localhost:8080/blacklab-server-2.1.0/lts/hits?patt=%5B%22My%22%5D+%5B%22brother%22%5D+%5B%22started%22%5D+%5B%22to%22%5D+%5B%22cry%22%5D+%5B%22because%22%5D+%5B%22he%22%5D+%5B%22got%22%5D+%5B%22his%22%5D+%5B%22own%22%5D+%5B%22pain%22%5D+%5B%5D&waitfortotal=true&outputformat=json&prettyprint=no&wordsaroundhit=0
{'left': '', 'match_word': 'My brother started to cry because he got his own pain . ', 'right': '', 'complete_match': 'My brother started to cry because he got his own pain . ', 'testimony_id': 'usc_shoah_2031', 'shelfmark': ['USC 2031'], 'token_start': 3288, 'token_end': 3300}


In [151]:
fragment_3 = {}
fragment_3['original_sentence'] = " his face was bloody and black and after a few days the man just couldn’t stop crying so, uh we told him we understand he must be in terrible pain from the blows he got."
fragment_3['label']="(..) his face was bloody and black and after a few days the man just couldn’t stop crying so, uh we told him we understand he must be in terrible pain from the blows he got."
indices = find_sentence_id(fragment_3['original_sentence'])
fragment_3['start_sentence_index']=indices[0]
fragment_3['end_sentence_index']=indices[1]
fragment_3['media_offset'] = 0
fragment_3['media_index'] = 0
fragment_3['testimony_id'] = indices[2]
fragments['fragments'].append(fragment_3)

http://localhost:8080/blacklab-server-2.1.0/lts/hits?patt=%5B%22his%22%5D+%5B%22face%22%5D+%5B%22was%22%5D+%5B%22bloody%22%5D+%5B%22and%22%5D+%5B%22black%22%5D+%5B%22and%22%5D+%5B%22after%22%5D+%5B%22a%22%5D+%5B%22few%22%5D+%5B%22days%22%5D+%5B%22the%22%5D+%5B%22man%22%5D+%5B%22just%22%5D+%5B%22could%22%5D+%5B%5D+%5B%22stop%22%5D+%5B%22crying%22%5D+%5B%22so%22%5D+%5B%5D+%5B%22uh%22%5D+%5B%22we%22%5D+%5B%22told%22%5D+%5B%22him%22%5D+%5B%22we%22%5D+%5B%22understand%22%5D+%5B%22he%22%5D+%5B%22must%22%5D+%5B%22be%22%5D+%5B%22in%22%5D+%5B%22terrible%22%5D+%5B%22pain%22%5D+%5B%22from%22%5D+%5B%22the%22%5D+%5B%22blows%22%5D+%5B%22he%22%5D+%5B%22got%22%5D+%5B%5D&waitfortotal=true&outputformat=json&prettyprint=no&wordsaroundhit=0
{'left': '', 'match_word': 'his face was bloody and black and after a few days the man just could n’t stop crying so , uh we told him we understand he must be in terrible pain from the blows he got . ', 'right': '', 'complete_match': 'his face was bloody and black and 

In [152]:
fragment_4 = {}
fragment_4['original_sentence'] = " I was in such pain that I would just start crying and I'd just walk out of the room,"
fragment_4['label']= "(..) I was in such pain that I would just start crying and I'd just walk out of the room (..)"
indices = find_sentence_id(fragment_4['original_sentence'])
fragment_4['start_sentence_index']=indices[0]
fragment_4['end_sentence_index']=indices[1]
fragment_4['media_offset'] = 0
fragment_4['media_index'] = 0
fragment_4['testimony_id'] = indices[2]
fragments['fragments'].append(fragment_4)


http://localhost:8080/blacklab-server-2.1.0/lts/hits?patt=%5B%22I%22%5D+%5B%22was%22%5D+%5B%22in%22%5D+%5B%22such%22%5D+%5B%22pain%22%5D+%5B%22that%22%5D+%5B%22I%22%5D+%5B%22would%22%5D+%5B%22just%22%5D+%5B%22start%22%5D+%5B%22crying%22%5D+%5B%22and%22%5D+%5B%22I%22%5D+%5B%22%27d%22%5D+%5B%22just%22%5D+%5B%22walk%22%5D+%5B%22out%22%5D+%5B%22of%22%5D+%5B%22the%22%5D+%5B%22room%22%5D+%5B%5D&waitfortotal=true&outputformat=json&prettyprint=no&wordsaroundhit=0
{'left': '', 'match_word': "I was in such pain that I would just start crying and I 'd just walk out of the room , ", 'right': '', 'complete_match': "I was in such pain that I would just start crying and I 'd just walk out of the room , ", 'testimony_id': 'irn504568', 'shelfmark': ['USHMM RG-50.030*0072'], 'token_start': 6800, 'token_end': 6821}


In [153]:
add_testimonial_fragments(fragments)

### 1.  

In [154]:
lemmas = ["embrace","cry"]

In [155]:
query = create_contextual_query(lemmas,context_length=25)
print (query)

([lemma="embrace"][]{0,25}[lemma="cry"])|([lemma="cry"][]{0,25}[lemma="embrace"])


In [156]:
domain_term = "embrace"

In [157]:
fragments = {}
fragments['main_node'] = main_node
fragments['mid_node'] = domain_term
fragments['fragments'] = []

In [158]:
fragment_1 = {}
fragment_1['original_sentence'] = "And he was waiting, and he embraced us and he was crying and he took us back."
fragment_1['label']="And he was waiting, and he embraced us and he was crying and he took us back."
indices = find_sentence_id(fragment_1['original_sentence'])
fragment_1['start_sentence_index']=indices[0]
fragment_1['end_sentence_index']=indices[1]
fragment_1['media_offset'] = 0
fragment_1['media_index'] = 0
fragment_1['testimony_id'] = indices[2]
fragments['fragments'].append(fragment_1)

http://localhost:8080/blacklab-server-2.1.0/lts/hits?patt=%5B%22And%22%5D+%5B%22he%22%5D+%5B%22was%22%5D+%5B%22waiting%22%5D+%5B%5D+%5B%22and%22%5D+%5B%22he%22%5D+%5B%22embraced%22%5D+%5B%22us%22%5D+%5B%22and%22%5D+%5B%22he%22%5D+%5B%22was%22%5D+%5B%22crying%22%5D+%5B%22and%22%5D+%5B%22he%22%5D+%5B%22took%22%5D+%5B%22us%22%5D+%5B%22back%22%5D+%5B%5D&waitfortotal=true&outputformat=json&prettyprint=no&wordsaroundhit=0
{'left': '', 'match_word': 'And he was waiting , and he embraced us and he was crying and he took us back . ', 'right': '', 'complete_match': 'And he was waiting , and he embraced us and he was crying and he took us back . ', 'testimony_id': 'usc_shoah_13690', 'shelfmark': ['USC 13690'], 'token_start': 9622, 'token_end': 9641}


In [159]:
fragment_2 = {}
fragment_2['original_sentence'] = "So she grabbed her and embraced her. And she started to cry,"
fragment_2['label']="So she grabbed her and embraced her. And she started to cry (..)"
indices = find_sentence_id(fragment_2['original_sentence'])
fragment_2['start_sentence_index']=indices[0]
fragment_2['end_sentence_index']=indices[1]
fragment_2['media_offset'] = 0
fragment_2['media_index'] = 0
fragment_2['testimony_id'] = indices[2]
fragments['fragments'].append(fragment_2)


http://localhost:8080/blacklab-server-2.1.0/lts/hits?patt=%5B%22So%22%5D+%5B%22she%22%5D+%5B%22grabbed%22%5D+%5B%22her%22%5D+%5B%22and%22%5D+%5B%22embraced%22%5D+%5B%22her%22%5D+%5B%5D+%5B%22And%22%5D+%5B%22she%22%5D+%5B%22started%22%5D+%5B%22to%22%5D+%5B%22cry%22%5D+%5B%5D&waitfortotal=true&outputformat=json&prettyprint=no&wordsaroundhit=0
{'left': '', 'match_word': 'So she grabbed her and embraced her . And she started to cry , ', 'right': '', 'complete_match': 'So she grabbed her and embraced her . And she started to cry , ', 'testimony_id': 'usc_shoah_14190', 'shelfmark': ['USC 14190'], 'token_start': 13286, 'token_end': 13300}


In [160]:
fragment_3 = {}
fragment_3['original_sentence'] = "He look at me, look at me, he start crying. ... He was close to me. He put his arms around me. He embraced me, hugged me, kissed me."
fragment_3['label']="He look at me, look at me, he start crying. (..) He put his arms around me. He embraced me, hugged me, kissed me."
indices = find_sentence_id(fragment_3['original_sentence'])
fragment_3['start_sentence_index']=indices[0]
fragment_3['end_sentence_index']=indices[1]
fragment_3['media_offset'] = 0
fragment_3['media_index'] = 0
fragment_3['testimony_id'] = indices[2]
fragments['fragments'].append(fragment_3)

http://localhost:8080/blacklab-server-2.1.0/lts/hits?patt=%5B%22He%22%5D+%5B%22look%22%5D+%5B%22at%22%5D+%5B%22me%22%5D+%5B%5D+%5B%22look%22%5D+%5B%22at%22%5D+%5B%22me%22%5D+%5B%5D+%5B%22he%22%5D+%5B%22start%22%5D+%5B%22crying%22%5D+%5B%5D+%5B%5D%7B0%2C50%7D+%5B%22He%22%5D+%5B%22was%22%5D+%5B%22close%22%5D+%5B%22to%22%5D+%5B%22me%22%5D+%5B%5D+%5B%22He%22%5D+%5B%22put%22%5D+%5B%22his%22%5D+%5B%22arms%22%5D+%5B%22around%22%5D+%5B%22me%22%5D+%5B%5D+%5B%22He%22%5D+%5B%22embraced%22%5D+%5B%22me%22%5D+%5B%5D+%5B%22hugged%22%5D+%5B%22me%22%5D+%5B%5D+%5B%22kissed%22%5D+%5B%22me%22%5D+%5B%5D&waitfortotal=true&outputformat=json&prettyprint=no&wordsaroundhit=0
{'left': '', 'match_word': 'He look at me , look at me , he start crying . ... He was close to me . He put his arms around me . He embraced me , hugged me , kissed me . ', 'right': '', 'complete_match': 'He look at me , look at me , he start crying . ... He was close to me . He put his arms around me . He embraced me , hugged me , kissed me

In [161]:
fragment_4 = {}
fragment_4['original_sentence'] = "He embraces me, and we both cry."
fragment_4['label']= "He embraces me, and we both cry."
indices = find_sentence_id(fragment_4['original_sentence'])
fragment_4['start_sentence_index']=indices[0]
fragment_4['end_sentence_index']=indices[1]
fragment_4['media_offset'] = 0
fragment_4['media_index'] = 0
fragment_4['testimony_id'] = indices[2]
fragments['fragments'].append(fragment_4)


http://localhost:8080/blacklab-server-2.1.0/lts/hits?patt=%5B%22He%22%5D+%5B%22embraces%22%5D+%5B%22me%22%5D+%5B%5D+%5B%22and%22%5D+%5B%22we%22%5D+%5B%22both%22%5D+%5B%22cry%22%5D+%5B%5D&waitfortotal=true&outputformat=json&prettyprint=no&wordsaroundhit=0
{'left': '', 'match_word': 'He embraces me , and we both cry . ', 'right': '', 'complete_match': 'He embraces me , and we both cry . ', 'testimony_id': 'usc_shoah_268', 'shelfmark': ['USC 268'], 'token_start': 42331, 'token_end': 42340}


In [162]:
add_testimonial_fragments(fragments)