# Mining testimonial fragments of the Holocaust

**Experience domain:**

### Load the necessary libraries

In [540]:
import sys; sys.path.insert(0, '..')
import itertools

In [541]:
import get_topic_model_concordance as topic_concordancer
from utils import blacklab, db, text
mongo = db.get_db()

In [542]:
%config Completer.use_jedi = False
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))
import random

### Helper functions

In [543]:
def create_contextual_query(lemmas,context_length=50):
    permutations = itertools.permutations(lemmas,len(lemmas))
    final_result = []
    for element in list(permutations):
        temp_result = []
        for el in element:
            temp_result.append('[lemma="'+el+'"]')
        temp_result = '('+('[]{0,'+str(context_length)+'}').join(temp_result)+')'
        final_result.append(temp_result)
    final_result = '|'.join(final_result)
    return final_result
        
        
            

In [544]:
from utils import blacklab, db, text
import requests
import json
def find_sentence_id(label):
    props = {'annotators': 'tokenize'}

    # set the encoding of the annotator
    requests.encoding = 'utf-8'
    # make a request
    r = requests.post('http://localhost:9000/', params={'properties':
                      json.dumps(props)},
                      data=label.encode('utf-8'))
    result = json.loads(r.text, encoding='utf-8')
    query = []
    for i, token in enumerate(result['tokens']):

        if ('...'in token['word'] and ((i == 0) or
           i == len(result['tokens']) - 1)):
            continue
        elif ('...'in token['word']):
            query.append('[]{0,50}')
        elif ('-'in token['word']):
            query.append('[]{0,3}')
        elif ("n't"in token['word']):
            query.append('[]')
        elif ("'re"in token['word']):
            query.append('[]')
        elif ("?"in token['word']):
            query.append('[]')
        elif ("."in token['word']):
            query.append('[]')
        elif ("'s"in token['word']):
            query.append('[]')
        elif (","in token['word']):
            query.append('[]')
        else:
            query.append('["' + token['word'] + '"]')

    query = ' '.join(query)
    try:
        sentence = blacklab.search_blacklab(query, window=0,
                                            lemma=False,
                                            include_match=True)
        token_end = sentence[0]['token_end']
        token_start = sentence[0]['token_start']
        print (sentence[0])
        mongo = db.get_db()
        results = mongo.tokens.find({'testimony_id':
                                    sentence[0]['testimony_id']},
                                    {'_id': 0})
        tokens = list(results)[0]['tokens']
        sentenceStart = tokens[token_start]['sentence_index']
        sentenceEnd = tokens[token_end]['sentence_index']
        originalsentence = sentence[0]['complete_match']
        return (sentenceStart,sentenceEnd,sentence[0]['testimony_id'])
    except:
        print("The following query returned a null result")
        print(query)
        
            


In [545]:
def create_parent_node(label):
    """Generate a root node for a tree structure."""
    testimony_id = random.randint(1, 20)
    node = {}
    node['label'] = label
    fragment = {'label': label,
                'essay_id': random.randint(1, 20),
                'tree': get_node(testimony_id, node, is_parent=True)}
    fragment['tree']['label'] = label

    return fragment

In [546]:
def get_node(testimony_id, node, is_parent=False):
    """Generate a parent or leaf node for a tree structure."""
    if is_parent:
        return {
            'label': node['label'],
            'testimony_id': random.randint(1, 20),
            'media_index': random.randint(1, 20),
            'media_offset': random.randint(1, 20),
            'start_sentence_index': random.randint(1, 20),
            'end_sentence_index': random.randint(1, 20),
            'children': [], }
    else:
        return {'label': node['label'],
                'testimony_id': node['testimony_id'],
                'media_index': float(node['media_index']),
                'media_offset': float(node['media_offset']),
                'start_sentence_index': float(node['start_sentence_index']),
                'end_sentence_index': float(node['end_sentence_index']),
                'children': [], }

In [547]:
def check_if_main_node_exist(node):
    results = mongo.fragments.find({'label':node},{'_id': 0})
    if len(results[0])==0:
        return False
    else:
        return True

In [548]:
def add_main_node(label):
    mongo.fragments.insert(create_parent_node(label))

In [549]:
def delete_main_node(label):
    mongo.fragments.delete_one({'label':label})

In [550]:
def add_testimonial_fragments(fragments):
    if check_if_main_node_exist(fragments['main_node']):
        results = mongo.fragments.find({'label':fragments['main_node']},{'_id':0})[0]
        mid_nodes = [element['label'] for element in results['tree']['children']]
        if fragments['mid_node'] in mid_nodes:
            print ("mid node exists cannot be added")
        else:
            
            mid_node = get_node('r',{'label':fragments['mid_node']},is_parent=True)
            for fragment in fragments['fragments']:
                leaf = get_node(fragment['testimony_id'],fragment)
                mid_node['children'].append(leaf)
            results['tree']['children'].append(mid_node)
            mongo.fragments.replace_one({'label':fragments['main_node']},results)

### Add the main node

In [551]:
main_node = "hope"
delete_main_node(main_node)
add_main_node(main_node)

  


### Set up the query

In [552]:
query = '[lemma="hope"]'

result = topic_concordancer.main(query,window=15,topicn=25)

### Print the key topics

for i,element in enumerate(result['topic_documents']):
    print (i)
    topic_words =  element['topic_words'][1]
    print (topic_words)
    print ('\n')

### Analyze documents

i=16
for text in result['topic_documents'][i]['texts'][0:25]:
    print (text['matched_text_words'])
    print ('\n')

## Testimonial fragments

### 1. No hope 

In [553]:
lemmas = ["hope","stop"]

In [554]:
query = create_contextual_query(lemmas,context_length=25)
print (query)

([lemma="hope"][]{0,25}[lemma="stop"])|([lemma="stop"][]{0,25}[lemma="hope"])


In [555]:
domain_term = "no hope"

In [556]:
fragments = {}
fragments['main_node'] = main_node
fragments['mid_node'] = domain_term
fragments['fragments'] = []

In [557]:
fragment_1 = {}
fragment_1['original_sentence'] = "You don't hope, you give up, and yet you think of tomorrow."
fragment_1['label']="You don't hope, you give up, and yet you think of tomorrow."
indices = find_sentence_id(fragment_1['original_sentence'])
fragment_1['start_sentence_index']=indices[0]
fragment_1['end_sentence_index']=indices[1]
fragment_1['media_offset'] = 0
fragment_1['media_index'] = 0
fragment_1['testimony_id'] = indices[2]
fragments['fragments'].append(fragment_1)

http://localhost:8080/blacklab-server-2.1.0/lts/hits?patt=%5B%22You%22%5D+%5B%22do%22%5D+%5B%5D+%5B%22hope%22%5D+%5B%5D+%5B%22you%22%5D+%5B%22give%22%5D+%5B%22up%22%5D+%5B%5D+%5B%22and%22%5D+%5B%22yet%22%5D+%5B%22you%22%5D+%5B%22think%22%5D+%5B%22of%22%5D+%5B%22tomorrow%22%5D+%5B%5D&waitfortotal=true&outputformat=json&prettyprint=no&wordsaroundhit=0
{'left': '', 'match_word': "You do n't hope , you give up , and yet you think of tomorrow . ", 'right': '', 'complete_match': "You do n't hope , you give up , and yet you think of tomorrow . ", 'testimony_id': 'irn505577', 'shelfmark': ['USHMM RG-50.042*0024'], 'token_start': 12845, 'token_end': 12861}


In [558]:
fragment_2 = {}
fragment_2['original_sentence'] = "In the meantime, I don’t know, we had"
fragment_2['label']="In the meantime, I don’t know, we had -- not some hope."
indices = find_sentence_id(fragment_2['original_sentence'])
fragment_2['start_sentence_index']=indices[0]
fragment_2['end_sentence_index']=indices[1]
fragment_2['media_offset'] = 0
fragment_2['media_index'] = 0
fragment_2['testimony_id'] = indices[2]
fragments['fragments'].append(fragment_2)


http://localhost:8080/blacklab-server-2.1.0/lts/hits?patt=%5B%22In%22%5D+%5B%22the%22%5D+%5B%22meantime%22%5D+%5B%5D+%5B%22I%22%5D+%5B%22do%22%5D+%5B%5D+%5B%22know%22%5D+%5B%5D+%5B%22we%22%5D+%5B%22had%22%5D&waitfortotal=true&outputformat=json&prettyprint=no&wordsaroundhit=0
{'left': '', 'match_word': 'In the meantime , I do n’t know , we had ', 'right': '', 'complete_match': 'In the meantime , I do n’t know , we had ', 'testimony_id': 'irn509676', 'shelfmark': ['USHMM RG-50.030*0415'], 'token_start': 16812, 'token_end': 16823}


In [559]:
fragment_3 = {}
fragment_3['original_sentence'] = "hope, we had lived by hope so long that we didn’t even hope anymore."
fragment_3['label']="ur hope, we had lived by hope so long that we didn’t even hope anymore."
indices = find_sentence_id(fragment_3['original_sentence'])
fragment_3['start_sentence_index']=indices[0]
fragment_3['end_sentence_index']=indices[1]
fragment_3['media_offset'] = 0
fragment_3['media_index'] = 0
fragment_3['testimony_id'] = indices[2]
fragments['fragments'].append(fragment_3)

http://localhost:8080/blacklab-server-2.1.0/lts/hits?patt=%5B%22hope%22%5D+%5B%5D+%5B%22we%22%5D+%5B%22had%22%5D+%5B%22lived%22%5D+%5B%22by%22%5D+%5B%22hope%22%5D+%5B%22so%22%5D+%5B%22long%22%5D+%5B%22that%22%5D+%5B%22we%22%5D+%5B%22did%22%5D+%5B%5D+%5B%22even%22%5D+%5B%22hope%22%5D+%5B%22anymore%22%5D+%5B%5D&waitfortotal=true&outputformat=json&prettyprint=no&wordsaroundhit=0
{'left': '', 'match_word': 'hope , we had lived by hope so long that we did n’t even hope anymore . ', 'right': '', 'complete_match': 'hope , we had lived by hope so long that we did n’t even hope anymore . ', 'testimony_id': 'irn510468', 'shelfmark': ['USHMM RG-50.322*0014'], 'token_start': 59957, 'token_end': 59974}


In [560]:
fragment_4 = {}
fragment_4['original_sentence'] = "There's not much hope left."
fragment_4['label']= "There's not much hope left."
indices = find_sentence_id(fragment_4['original_sentence'])
fragment_4['start_sentence_index']=indices[0]
fragment_4['end_sentence_index']=indices[1]
fragment_4['media_offset'] = 0
fragment_4['media_index'] = 0
fragment_4['testimony_id'] = indices[2]
fragments['fragments'].append(fragment_4)


http://localhost:8080/blacklab-server-2.1.0/lts/hits?patt=%5B%22There%22%5D+%5B%5D+%5B%22not%22%5D+%5B%22much%22%5D+%5B%22hope%22%5D+%5B%22left%22%5D+%5B%5D&waitfortotal=true&outputformat=json&prettyprint=no&wordsaroundhit=0
{'left': '', 'match_word': "There 's not much hope left . ", 'right': '', 'complete_match': "There 's not much hope left . ", 'testimony_id': 'usc_shoah_17330', 'shelfmark': ['USC 17330'], 'token_start': 15990, 'token_end': 15997}


In [561]:
fragment_5 = {}
fragment_5['original_sentence'] = "And we didn't hope too much."
fragment_5['label']= "And we didn't hope too much."
indices = find_sentence_id(fragment_5['original_sentence'])
fragment_5['start_sentence_index']=indices[0]
fragment_5['end_sentence_index']=indices[1]
fragment_5['media_offset'] = 0
fragment_5['media_index'] = 0
fragment_5['testimony_id'] = indices[2]
fragments['fragments'].append(fragment_5)

http://localhost:8080/blacklab-server-2.1.0/lts/hits?patt=%5B%22And%22%5D+%5B%22we%22%5D+%5B%22did%22%5D+%5B%5D+%5B%22hope%22%5D+%5B%22too%22%5D+%5B%22much%22%5D+%5B%5D&waitfortotal=true&outputformat=json&prettyprint=no&wordsaroundhit=0
{'left': '', 'match_word': "And we did n't hope too much . ", 'right': '', 'complete_match': "And we did n't hope too much . ", 'testimony_id': 'usc_shoah_39', 'shelfmark': ['USC 39'], 'token_start': 4789, 'token_end': 4797}


In [562]:
add_testimonial_fragments(fragments)

### 2.  see each other

In [563]:
query = '([lemma="hope"] []{0,2} [lemma="see"|"meet"])'

In [564]:
query = create_contextual_query(lemmas,context_length=50)
print (query)

([lemma="hope"][]{0,50}[lemma="stop"])|([lemma="stop"][]{0,50}[lemma="hope"])


In [565]:
domain_term = "to see"

In [566]:
fragments = {}
fragments['main_node'] = main_node
fragments['mid_node'] = domain_term
fragments['fragments'] = []

In [567]:
fragment_1 = {}
fragment_1['original_sentence'] = "We said our good-byes and hoped to see them again."
fragment_1['label']="We said our good-byes and hoped to see them again."
indices = find_sentence_id(fragment_1['original_sentence'])
fragment_1['start_sentence_index']=indices[0]
fragment_1['end_sentence_index']=indices[1]
fragment_1['media_offset'] = 0
fragment_1['media_index'] = 0
fragment_1['testimony_id'] = indices[2]
fragments['fragments'].append(fragment_1)

http://localhost:8080/blacklab-server-2.1.0/lts/hits?patt=%5B%22We%22%5D+%5B%22said%22%5D+%5B%22our%22%5D+%5B%5D%7B0%2C3%7D+%5B%22and%22%5D+%5B%22hoped%22%5D+%5B%22to%22%5D+%5B%22see%22%5D+%5B%22them%22%5D+%5B%22again%22%5D+%5B%5D&waitfortotal=true&outputformat=json&prettyprint=no&wordsaroundhit=0
{'left': '', 'match_word': 'We said our good-byes and hoped to see them again . ', 'right': '', 'complete_match': 'We said our good-byes and hoped to see them again . ', 'testimony_id': 'irn510474', 'shelfmark': ['USHMM RG-50.322*0020'], 'token_start': 26520, 'token_end': 26531}


In [568]:
fragment_2 = {}
fragment_2['original_sentence'] = "Anyway, I left that bed, and I ran out, hoping I might see my mother."
fragment_2['label']="Anyway, I left that bed, and I ran out, hoping I might see my mother."
indices = find_sentence_id(fragment_2['original_sentence'])
fragment_2['start_sentence_index']=indices[0]
fragment_2['end_sentence_index']=indices[1]
fragment_2['media_offset'] = 0
fragment_2['media_index'] = 0
fragment_2['testimony_id'] = indices[2]
fragments['fragments'].append(fragment_2)


http://localhost:8080/blacklab-server-2.1.0/lts/hits?patt=%5B%22Anyway%22%5D+%5B%5D+%5B%22I%22%5D+%5B%22left%22%5D+%5B%22that%22%5D+%5B%22bed%22%5D+%5B%5D+%5B%22and%22%5D+%5B%22I%22%5D+%5B%22ran%22%5D+%5B%22out%22%5D+%5B%5D+%5B%22hoping%22%5D+%5B%22I%22%5D+%5B%22might%22%5D+%5B%22see%22%5D+%5B%22my%22%5D+%5B%22mother%22%5D+%5B%5D&waitfortotal=true&outputformat=json&prettyprint=no&wordsaroundhit=0
{'left': '', 'match_word': 'Anyway , I left that bed , and I ran out , hoping I might see my mother . ', 'right': '', 'complete_match': 'Anyway , I left that bed , and I ran out , hoping I might see my mother . ', 'testimony_id': 'irn510705', 'shelfmark': ['USHMM RG-50.156*0051'], 'token_start': 4166, 'token_end': 4185}


In [569]:
fragment_3 = {}
fragment_3['original_sentence'] = "The hope of seeing my family again, that they would be still alive, somehow they would have escaped."
fragment_3['label']="The hope of seeing my family again, that they would be still alive, somehow they would have escaped."
indices = find_sentence_id(fragment_3['original_sentence'])
fragment_3['start_sentence_index']=indices[0]
fragment_3['end_sentence_index']=indices[1]
fragment_3['media_offset'] = 0
fragment_3['media_index'] = 0
fragment_3['testimony_id'] = indices[2]
fragments['fragments'].append(fragment_3)

http://localhost:8080/blacklab-server-2.1.0/lts/hits?patt=%5B%22The%22%5D+%5B%22hope%22%5D+%5B%22of%22%5D+%5B%22seeing%22%5D+%5B%22my%22%5D+%5B%22family%22%5D+%5B%22again%22%5D+%5B%5D+%5B%22that%22%5D+%5B%22they%22%5D+%5B%22would%22%5D+%5B%22be%22%5D+%5B%22still%22%5D+%5B%22alive%22%5D+%5B%5D+%5B%22somehow%22%5D+%5B%22they%22%5D+%5B%22would%22%5D+%5B%22have%22%5D+%5B%22escaped%22%5D+%5B%5D&waitfortotal=true&outputformat=json&prettyprint=no&wordsaroundhit=0
{'left': '', 'match_word': 'The hope of seeing my family again , that they would be still alive , somehow they would have escaped . ', 'right': '', 'complete_match': 'The hope of seeing my family again , that they would be still alive , somehow they would have escaped . ', 'testimony_id': 'irn505558', 'shelfmark': ['USHMM RG-50.042*0004'], 'token_start': 16003, 'token_end': 16024}


In [570]:
fragment_4 = {}
fragment_4['original_sentence'] = "I was always hoping to see my family."
fragment_4['label']= "I was always hoping to see my family."
indices = find_sentence_id(fragment_4['original_sentence'])
fragment_4['start_sentence_index']=indices[0]
fragment_4['end_sentence_index']=indices[1]
fragment_4['media_offset'] = 0
fragment_4['media_index'] = 0
fragment_4['testimony_id'] = indices[2]
fragments['fragments'].append(fragment_4)


http://localhost:8080/blacklab-server-2.1.0/lts/hits?patt=%5B%22I%22%5D+%5B%22was%22%5D+%5B%22always%22%5D+%5B%22hoping%22%5D+%5B%22to%22%5D+%5B%22see%22%5D+%5B%22my%22%5D+%5B%22family%22%5D+%5B%5D&waitfortotal=true&outputformat=json&prettyprint=no&wordsaroundhit=0
{'left': '', 'match_word': 'I was always hoping to see my family . ', 'right': '', 'complete_match': 'I was always hoping to see my family . ', 'testimony_id': 'usc_shoah_5496', 'shelfmark': ['USC 5496'], 'token_start': 9198, 'token_end': 9207}


In [571]:
fragment_5 = {}
fragment_5['original_sentence'] = "That we hope we see each other again. It was not so easy."
fragment_5['label']= "That we hope we see each other again. It was not so easy."
indices = find_sentence_id(fragment_5['original_sentence'])
fragment_5['start_sentence_index']=indices[0]
fragment_5['end_sentence_index']=indices[1]
fragment_5['media_offset'] = 0
fragment_5['media_index'] = 0
fragment_5['testimony_id'] = indices[2]
fragments['fragments'].append(fragment_5)

http://localhost:8080/blacklab-server-2.1.0/lts/hits?patt=%5B%22That%22%5D+%5B%22we%22%5D+%5B%22hope%22%5D+%5B%22we%22%5D+%5B%22see%22%5D+%5B%22each%22%5D+%5B%22other%22%5D+%5B%22again%22%5D+%5B%5D+%5B%22It%22%5D+%5B%22was%22%5D+%5B%22not%22%5D+%5B%22so%22%5D+%5B%22easy%22%5D+%5B%5D&waitfortotal=true&outputformat=json&prettyprint=no&wordsaroundhit=0
{'left': '', 'match_word': 'That we hope we see each other again . It was not so easy . ', 'right': '', 'complete_match': 'That we hope we see each other again . It was not so easy . ', 'testimony_id': 'usc_shoah_23540', 'shelfmark': ['USC 23540'], 'token_start': 9939, 'token_end': 9954}


In [572]:
add_testimonial_fragments(fragments)

### 3.  they are alive

In [573]:
lemmas = []

In [574]:
query = create_contextual_query(lemmas,context_length=50)
print (query)

()


In [575]:
domain_term = "they are alive"

In [576]:
fragments = {}
fragments['main_node'] = main_node
fragments['mid_node'] = domain_term
fragments['fragments'] = []

In [577]:
fragment_1 = {}
fragment_1['original_sentence'] = "I just hoped that Eli would be alive so I could bring him here."
fragment_1['label']="I just hoped that Eli would be alive so I could bring him here."
indices = find_sentence_id(fragment_1['original_sentence'])
fragment_1['start_sentence_index']=indices[0]
fragment_1['end_sentence_index']=indices[1]
fragment_1['media_offset'] = 0
fragment_1['media_index'] = 0
fragment_1['testimony_id'] = indices[2]
fragments['fragments'].append(fragment_1)

http://localhost:8080/blacklab-server-2.1.0/lts/hits?patt=%5B%22I%22%5D+%5B%22just%22%5D+%5B%22hoped%22%5D+%5B%22that%22%5D+%5B%22Eli%22%5D+%5B%22would%22%5D+%5B%22be%22%5D+%5B%22alive%22%5D+%5B%22so%22%5D+%5B%22I%22%5D+%5B%22could%22%5D+%5B%22bring%22%5D+%5B%22him%22%5D+%5B%22here%22%5D+%5B%5D&waitfortotal=true&outputformat=json&prettyprint=no&wordsaroundhit=0
{'left': '', 'match_word': 'I just hoped that Eli would be alive so I could bring him here . ', 'right': '', 'complete_match': 'I just hoped that Eli would be alive so I could bring him here . ', 'testimony_id': 'irn504802', 'shelfmark': ['USHMM RG-50.030*0307'], 'token_start': 8720, 'token_end': 8735}


In [578]:
fragment_2 = {}
fragment_2['original_sentence'] = "I hoped that my oldest brother remained alive, but he didn't"
fragment_2['label']=" I hoped that my oldest brother remained alive, but he didn't"
indices = find_sentence_id(fragment_2['original_sentence'])
fragment_2['start_sentence_index']=indices[0]
fragment_2['end_sentence_index']=indices[1]
fragment_2['media_offset'] = 0
fragment_2['media_index'] = 0
fragment_2['testimony_id'] = indices[2]
fragments['fragments'].append(fragment_2)


http://localhost:8080/blacklab-server-2.1.0/lts/hits?patt=%5B%22I%22%5D+%5B%22hoped%22%5D+%5B%22that%22%5D+%5B%22my%22%5D+%5B%22oldest%22%5D+%5B%22brother%22%5D+%5B%22remained%22%5D+%5B%22alive%22%5D+%5B%5D+%5B%22but%22%5D+%5B%22he%22%5D+%5B%22did%22%5D+%5B%5D&waitfortotal=true&outputformat=json&prettyprint=no&wordsaroundhit=0
{'left': '', 'match_word': "I hoped that my oldest brother remained alive , but he did n't ", 'right': '', 'complete_match': "I hoped that my oldest brother remained alive , but he did n't ", 'testimony_id': 'HVT-18', 'shelfmark': ['Fortunoff HVT-18'], 'token_start': 13169, 'token_end': 13182}


In [579]:
fragment_3 = {}
fragment_3['original_sentence'] = "Somebody told me that it's not time to tell the particulars, but I still hoped that they are alive."
fragment_3['label']="Somebody told me that it's not time to tell the particulars, but I still hoped that they are alive."
indices = find_sentence_id(fragment_3['original_sentence'])
fragment_3['start_sentence_index']=indices[0]
fragment_3['end_sentence_index']=indices[1]
fragment_3['media_offset'] = 0
fragment_3['media_index'] = 0
fragment_3['testimony_id'] = indices[2]
fragments['fragments'].append(fragment_3)

http://localhost:8080/blacklab-server-2.1.0/lts/hits?patt=%5B%22Somebody%22%5D+%5B%22told%22%5D+%5B%22me%22%5D+%5B%22that%22%5D+%5B%22it%22%5D+%5B%5D+%5B%22not%22%5D+%5B%22time%22%5D+%5B%22to%22%5D+%5B%22tell%22%5D+%5B%22the%22%5D+%5B%22particulars%22%5D+%5B%5D+%5B%22but%22%5D+%5B%22I%22%5D+%5B%22still%22%5D+%5B%22hoped%22%5D+%5B%22that%22%5D+%5B%22they%22%5D+%5B%22are%22%5D+%5B%22alive%22%5D+%5B%5D&waitfortotal=true&outputformat=json&prettyprint=no&wordsaroundhit=0
{'left': '', 'match_word': "Somebody told me that it 's not time to tell the particulars , but I still hoped that they are alive . ", 'right': '', 'complete_match': "Somebody told me that it 's not time to tell the particulars , but I still hoped that they are alive . ", 'testimony_id': 'irn504724', 'shelfmark': ['USHMM RG-50.030*0235'], 'token_start': 888, 'token_end': 910}


In [580]:
fragment_4 = {}
fragment_4['original_sentence'] = "And they return after the war to Lithuania, to Wilna because I hoped that somebody of the family will stay alive"
fragment_4['label']= "(..) I hoped that somebody of the family will stay alive (..)"
indices = find_sentence_id(fragment_4['original_sentence'])
fragment_4['start_sentence_index']=indices[0]
fragment_4['end_sentence_index']=indices[1]
fragment_4['media_offset'] = 0
fragment_4['media_index'] = 0
fragment_4['testimony_id'] = indices[2]
fragments['fragments'].append(fragment_4)


http://localhost:8080/blacklab-server-2.1.0/lts/hits?patt=%5B%22And%22%5D+%5B%22they%22%5D+%5B%22return%22%5D+%5B%22after%22%5D+%5B%22the%22%5D+%5B%22war%22%5D+%5B%22to%22%5D+%5B%22Lithuania%22%5D+%5B%5D+%5B%22to%22%5D+%5B%22Wilna%22%5D+%5B%22because%22%5D+%5B%22I%22%5D+%5B%22hoped%22%5D+%5B%22that%22%5D+%5B%22somebody%22%5D+%5B%22of%22%5D+%5B%22the%22%5D+%5B%22family%22%5D+%5B%22will%22%5D+%5B%22stay%22%5D+%5B%22alive%22%5D&waitfortotal=true&outputformat=json&prettyprint=no&wordsaroundhit=0
{'left': '', 'match_word': 'And they return after the war to Lithuania , to Wilna because I hoped that somebody of the family will stay alive ', 'right': '', 'complete_match': 'And they return after the war to Lithuania , to Wilna because I hoped that somebody of the family will stay alive ', 'testimony_id': 'irn507289', 'shelfmark': ['USHMM RG-50.030*0400'], 'token_start': 30847, 'token_end': 30869}


In [581]:
fragment_5 = {}
fragment_5['original_sentence'] = "And I hoped that my son is alive."
fragment_5['label']= "And I hoped that my son is alive."
indices = find_sentence_id(fragment_5['original_sentence'])
fragment_5['start_sentence_index']=indices[0]
fragment_5['end_sentence_index']=indices[1]
fragment_5['media_offset'] = 0
fragment_5['media_index'] = 0
fragment_5['testimony_id'] = indices[2]
fragments['fragments'].append(fragment_5)

http://localhost:8080/blacklab-server-2.1.0/lts/hits?patt=%5B%22And%22%5D+%5B%22I%22%5D+%5B%22hoped%22%5D+%5B%22that%22%5D+%5B%22my%22%5D+%5B%22son%22%5D+%5B%22is%22%5D+%5B%22alive%22%5D+%5B%5D&waitfortotal=true&outputformat=json&prettyprint=no&wordsaroundhit=0
{'left': '', 'match_word': 'And I hoped that my son is alive . ', 'right': '', 'complete_match': 'And I hoped that my son is alive . ', 'testimony_id': 'usc_shoah_11641', 'shelfmark': ['USC 11641'], 'token_start': 16062, 'token_end': 16071}


In [582]:
add_testimonial_fragments(fragments)

### 4.  come back

In [583]:
lemmas = []

In [584]:
query = create_contextual_query(lemmas,context_length=50)
print (query)

()


In [585]:
domain_term = "come back"

In [586]:
fragments = {}
fragments['main_node'] = main_node
fragments['mid_node'] = domain_term
fragments['fragments'] = []

In [587]:
fragment_3 = {}
fragment_3['original_sentence'] = "well, we were always hoping that he's coming back, because that's what my mother said"
fragment_3['label']="(..) well, we were always hoping that he's coming back, because that's what my mother said (..)"
indices = find_sentence_id(fragment_3['original_sentence'])
fragment_3['start_sentence_index']=indices[0]
fragment_3['end_sentence_index']=indices[1]
fragment_3['media_offset'] = 0
fragment_3['media_index'] = 0
fragment_3['testimony_id'] = indices[2]
fragments['fragments'].append(fragment_3)

http://localhost:8080/blacklab-server-2.1.0/lts/hits?patt=%5B%22well%22%5D+%5B%5D+%5B%22we%22%5D+%5B%22were%22%5D+%5B%22always%22%5D+%5B%22hoping%22%5D+%5B%22that%22%5D+%5B%22he%22%5D+%5B%5D+%5B%22coming%22%5D+%5B%22back%22%5D+%5B%5D+%5B%22because%22%5D+%5B%22that%22%5D+%5B%5D+%5B%22what%22%5D+%5B%22my%22%5D+%5B%22mother%22%5D+%5B%22said%22%5D&waitfortotal=true&outputformat=json&prettyprint=no&wordsaroundhit=0
{'left': '', 'match_word': "well , we were always hoping that he 's coming back , because that 's what my mother said ", 'right': '', 'complete_match': "well , we were always hoping that he 's coming back , because that 's what my mother said ", 'testimony_id': 'usc_shoah_1160', 'shelfmark': ['USC 1160'], 'token_start': 3532, 'token_end': 3551}


In [588]:
fragment_4 = {}
fragment_4['original_sentence'] = " I was hoping my mother maybe come back."
fragment_4['label']= " I was hoping my mother maybe come back."
indices = find_sentence_id(fragment_4['original_sentence'])
fragment_4['start_sentence_index']=indices[0]
fragment_4['end_sentence_index']=indices[1]
fragment_4['media_offset'] = 0
fragment_4['media_index'] = 0
fragment_4['testimony_id'] = indices[2]
fragments['fragments'].append(fragment_4)


http://localhost:8080/blacklab-server-2.1.0/lts/hits?patt=%5B%22I%22%5D+%5B%22was%22%5D+%5B%22hoping%22%5D+%5B%22my%22%5D+%5B%22mother%22%5D+%5B%22maybe%22%5D+%5B%22come%22%5D+%5B%22back%22%5D+%5B%5D&waitfortotal=true&outputformat=json&prettyprint=no&wordsaroundhit=0
{'left': '', 'match_word': 'I was hoping my mother maybe come back . ', 'right': '', 'complete_match': 'I was hoping my mother maybe come back . ', 'testimony_id': 'usc_shoah_12810', 'shelfmark': ['USC 12810'], 'token_start': 16646, 'token_end': 16655}


In [589]:
fragment_5 = {}
fragment_5['original_sentence'] = " my mother was always fighting to survive and hoping that, by some miracle, she will come back."
fragment_5['label']= "(..) my mother was always fighting to survive and hoping that, by some miracle, she will come back."
indices = find_sentence_id(fragment_5['original_sentence'])
fragment_5['start_sentence_index']=indices[0]
fragment_5['end_sentence_index']=indices[1]
fragment_5['media_offset'] = 0
fragment_5['media_index'] = 0
fragment_5['testimony_id'] = indices[2]
fragments['fragments'].append(fragment_5)

http://localhost:8080/blacklab-server-2.1.0/lts/hits?patt=%5B%22my%22%5D+%5B%22mother%22%5D+%5B%22was%22%5D+%5B%22always%22%5D+%5B%22fighting%22%5D+%5B%22to%22%5D+%5B%22survive%22%5D+%5B%22and%22%5D+%5B%22hoping%22%5D+%5B%22that%22%5D+%5B%5D+%5B%22by%22%5D+%5B%22some%22%5D+%5B%22miracle%22%5D+%5B%5D+%5B%22she%22%5D+%5B%22will%22%5D+%5B%22come%22%5D+%5B%22back%22%5D+%5B%5D&waitfortotal=true&outputformat=json&prettyprint=no&wordsaroundhit=0
{'left': '', 'match_word': 'my mother was always fighting to survive and hoping that , by some miracle , she will come back . ', 'right': '', 'complete_match': 'my mother was always fighting to survive and hoping that , by some miracle , she will come back . ', 'testimony_id': 'usc_shoah_15962', 'shelfmark': ['USC 15962'], 'token_start': 4142, 'token_end': 4162}


In [590]:
add_testimonial_fragments(fragments)

### 5.  

In [591]:
lemmas = ["pray","hope"]

In [592]:
query = create_contextual_query(lemmas,context_length=25)
print (query)

([lemma="pray"][]{0,25}[lemma="hope"])|([lemma="hope"][]{0,25}[lemma="pray"])


In [593]:
domain_term = "pray"

In [594]:
fragments = {}
fragments['main_node'] = main_node
fragments['mid_node'] = domain_term
fragments['fragments'] = []

In [595]:
fragment_1 = {}
fragment_1['original_sentence'] = "That’s what I prayed for. And so I never lost hope."
fragment_1['label']="That’s what I prayed for. And so I never lost hope."
indices = find_sentence_id(fragment_1['original_sentence'])
fragment_1['start_sentence_index']=indices[0]
fragment_1['end_sentence_index']=indices[1]
fragment_1['media_offset'] = 0
fragment_1['media_index'] = 0
fragment_1['testimony_id'] = indices[2]
fragments['fragments'].append(fragment_1)

http://localhost:8080/blacklab-server-2.1.0/lts/hits?patt=%5B%22That%22%5D+%5B%5D+%5B%22what%22%5D+%5B%22I%22%5D+%5B%22prayed%22%5D+%5B%22for%22%5D+%5B%5D+%5B%22And%22%5D+%5B%22so%22%5D+%5B%22I%22%5D+%5B%22never%22%5D+%5B%22lost%22%5D+%5B%22hope%22%5D+%5B%5D&waitfortotal=true&outputformat=json&prettyprint=no&wordsaroundhit=0
{'left': '', 'match_word': 'That ’s what I prayed for . And so I never lost hope . ', 'right': '', 'complete_match': 'That ’s what I prayed for . And so I never lost hope . ', 'testimony_id': 'irn35973', 'shelfmark': ['USHMM RG-50.106*0173'], 'token_start': 26657, 'token_end': 26671}


In [596]:
fragment_2 = {}
fragment_2['original_sentence'] = "my father still prayed and hoped that God will help."
fragment_2['label']="(..) my father still prayed and hoped that God will help."
indices = find_sentence_id(fragment_2['original_sentence'])
fragment_2['start_sentence_index']=indices[0]
fragment_2['end_sentence_index']=indices[1]
fragment_2['media_offset'] = 0
fragment_2['media_index'] = 0
fragment_2['testimony_id'] = indices[2]
fragments['fragments'].append(fragment_2)


http://localhost:8080/blacklab-server-2.1.0/lts/hits?patt=%5B%22my%22%5D+%5B%22father%22%5D+%5B%22still%22%5D+%5B%22prayed%22%5D+%5B%22and%22%5D+%5B%22hoped%22%5D+%5B%22that%22%5D+%5B%22God%22%5D+%5B%22will%22%5D+%5B%22help%22%5D+%5B%5D&waitfortotal=true&outputformat=json&prettyprint=no&wordsaroundhit=0
{'left': '', 'match_word': 'my father still prayed and hoped that God will help . ', 'right': '', 'complete_match': 'my father still prayed and hoped that God will help . ', 'testimony_id': 'HVT-125', 'shelfmark': ['Fortunoff HVT-125'], 'token_start': 6647, 'token_end': 6658}


In [597]:
fragment_3 = {}
fragment_3['original_sentence'] = "But we prayed, and we hoped that they would show up."
fragment_3['label']="But we prayed, and we hoped that they would show up."
indices = find_sentence_id(fragment_3['original_sentence'])
fragment_3['start_sentence_index']=indices[0]
fragment_3['end_sentence_index']=indices[1]
fragment_3['media_offset'] = 0
fragment_3['media_index'] = 0
fragment_3['testimony_id'] = indices[2]
fragments['fragments'].append(fragment_3)

http://localhost:8080/blacklab-server-2.1.0/lts/hits?patt=%5B%22But%22%5D+%5B%22we%22%5D+%5B%22prayed%22%5D+%5B%5D+%5B%22and%22%5D+%5B%22we%22%5D+%5B%22hoped%22%5D+%5B%22that%22%5D+%5B%22they%22%5D+%5B%22would%22%5D+%5B%22show%22%5D+%5B%22up%22%5D+%5B%5D&waitfortotal=true&outputformat=json&prettyprint=no&wordsaroundhit=0
{'left': '', 'match_word': 'But we prayed , and we hoped that they would show up . ', 'right': '', 'complete_match': 'But we prayed , and we hoped that they would show up . ', 'testimony_id': 'HVT-131', 'shelfmark': ['Fortunoff HVT-131'], 'token_start': 4003, 'token_end': 4016}


In [598]:
fragment_4 = {}
fragment_4['original_sentence'] = "Everybody prayed without hope."
fragment_4['label']= "Everybody prayed without hope."
indices = find_sentence_id(fragment_4['original_sentence'])
fragment_4['start_sentence_index']=indices[0]
fragment_4['end_sentence_index']=indices[1]
fragment_4['media_offset'] = 0
fragment_4['media_index'] = 0
fragment_4['testimony_id'] = indices[2]
fragments['fragments'].append(fragment_4)


http://localhost:8080/blacklab-server-2.1.0/lts/hits?patt=%5B%22Everybody%22%5D+%5B%22prayed%22%5D+%5B%22without%22%5D+%5B%22hope%22%5D+%5B%5D&waitfortotal=true&outputformat=json&prettyprint=no&wordsaroundhit=0
{'left': '', 'match_word': 'Everybody prayed without hope . ', 'right': '', 'complete_match': 'Everybody prayed without hope . ', 'testimony_id': 'irn518937', 'shelfmark': ['USHMM RG-50.030*0515'], 'token_start': 14232, 'token_end': 14237}


In [599]:
fragment_5 = {}
fragment_5['original_sentence'] = " I prayed to God, I hope I'm not going to be next,"
fragment_5['label']= " I prayed to God, I hope I'm not going to be next (..)"
indices = find_sentence_id(fragment_5['original_sentence'])
fragment_5['start_sentence_index']=indices[0]
fragment_5['end_sentence_index']=indices[1]
fragment_5['media_offset'] = 0
fragment_5['media_index'] = 0
fragment_5['testimony_id'] = indices[2]
fragments['fragments'].append(fragment_5)

http://localhost:8080/blacklab-server-2.1.0/lts/hits?patt=%5B%22I%22%5D+%5B%22prayed%22%5D+%5B%22to%22%5D+%5B%22God%22%5D+%5B%5D+%5B%22I%22%5D+%5B%22hope%22%5D+%5B%22I%22%5D+%5B%22%27m%22%5D+%5B%22not%22%5D+%5B%22going%22%5D+%5B%22to%22%5D+%5B%22be%22%5D+%5B%22next%22%5D+%5B%5D&waitfortotal=true&outputformat=json&prettyprint=no&wordsaroundhit=0
{'left': '', 'match_word': "I prayed to God , I hope I 'm not going to be next , ", 'right': '', 'complete_match': "I prayed to God , I hope I 'm not going to be next , ", 'testimony_id': 'usc_shoah_15193', 'shelfmark': ['USC 15193'], 'token_start': 7696, 'token_end': 7711}


In [600]:
add_testimonial_fragments(fragments)

### 6.  Cry

In [601]:
lemmas = ["cry","hope"]

In [602]:
query = create_contextual_query(lemmas,context_length=10)
print (query)

([lemma="cry"][]{0,10}[lemma="hope"])|([lemma="hope"][]{0,10}[lemma="cry"])


In [603]:
domain_term = "cry"

In [604]:
fragments = {}
fragments['main_node'] = main_node
fragments['mid_node'] = domain_term
fragments['fragments'] = []

In [605]:
fragment_1 = {}
fragment_1['original_sentence'] = "And she had tears, crying, hoping."
fragment_1['label']="And she had tears, crying, hoping."
indices = find_sentence_id(fragment_1['original_sentence'])
fragment_1['start_sentence_index']=indices[0]
fragment_1['end_sentence_index']=indices[1]
fragment_1['media_offset'] = 0
fragment_1['media_index'] = 0
fragment_1['testimony_id'] = indices[2]
fragments['fragments'].append(fragment_1)

http://localhost:8080/blacklab-server-2.1.0/lts/hits?patt=%5B%22And%22%5D+%5B%22she%22%5D+%5B%22had%22%5D+%5B%22tears%22%5D+%5B%5D+%5B%22crying%22%5D+%5B%5D+%5B%22hoping%22%5D+%5B%5D&waitfortotal=true&outputformat=json&prettyprint=no&wordsaroundhit=0
{'left': '', 'match_word': 'And she had tears , crying , hoping . ', 'right': '', 'complete_match': 'And she had tears , crying , hoping . ', 'testimony_id': 'usc_shoah_7455', 'shelfmark': ['USC 7455'], 'token_start': 21056, 'token_end': 21065}


In [606]:
fragment_2 = {}
fragment_2['original_sentence'] = " I said, I hope mother is not cold. We were both crying and saying,"
fragment_2['label']="(..) I hope mother is not cold. We were both crying (..)"
indices = find_sentence_id(fragment_2['original_sentence'])
fragment_2['start_sentence_index']=indices[0]
fragment_2['end_sentence_index']=indices[1]
fragment_2['media_offset'] = 0
fragment_2['media_index'] = 0
fragment_2['testimony_id'] = indices[2]
fragments['fragments'].append(fragment_2)


http://localhost:8080/blacklab-server-2.1.0/lts/hits?patt=%5B%22I%22%5D+%5B%22said%22%5D+%5B%5D+%5B%22I%22%5D+%5B%22hope%22%5D+%5B%22mother%22%5D+%5B%22is%22%5D+%5B%22not%22%5D+%5B%22cold%22%5D+%5B%5D+%5B%22We%22%5D+%5B%22were%22%5D+%5B%22both%22%5D+%5B%22crying%22%5D+%5B%22and%22%5D+%5B%22saying%22%5D+%5B%5D&waitfortotal=true&outputformat=json&prettyprint=no&wordsaroundhit=0
{'left': '', 'match_word': 'I said , I hope mother is not cold . We were both crying and saying , ', 'right': '', 'complete_match': 'I said , I hope mother is not cold . We were both crying and saying , ', 'testimony_id': 'usc_shoah_1354', 'shelfmark': ['USC 1354'], 'token_start': 7812, 'token_end': 7829}


In [607]:
fragment_3 = {}
fragment_3['original_sentence'] = "And she started to cry. She said, I am young. I hope my, my husband will make it."
fragment_3['label']="And she started to cry. She said, I am young. I hope my, my husband will make it."
indices = find_sentence_id(fragment_3['original_sentence'])
fragment_3['start_sentence_index']=indices[0]
fragment_3['end_sentence_index']=indices[1]
fragment_3['media_offset'] = 0
fragment_3['media_index'] = 0
fragment_3['testimony_id'] = indices[2]
fragments['fragments'].append(fragment_3)

http://localhost:8080/blacklab-server-2.1.0/lts/hits?patt=%5B%22And%22%5D+%5B%22she%22%5D+%5B%22started%22%5D+%5B%22to%22%5D+%5B%22cry%22%5D+%5B%5D+%5B%22She%22%5D+%5B%22said%22%5D+%5B%5D+%5B%22I%22%5D+%5B%22am%22%5D+%5B%22young%22%5D+%5B%5D+%5B%22I%22%5D+%5B%22hope%22%5D+%5B%22my%22%5D+%5B%5D+%5B%22my%22%5D+%5B%22husband%22%5D+%5B%22will%22%5D+%5B%22make%22%5D+%5B%22it%22%5D+%5B%5D&waitfortotal=true&outputformat=json&prettyprint=no&wordsaroundhit=0
{'left': '', 'match_word': 'And she started to cry . She said , I am young . I hope my , my husband will make it . ', 'right': '', 'complete_match': 'And she started to cry . She said , I am young . I hope my , my husband will make it . ', 'testimony_id': 'usc_shoah_747', 'shelfmark': ['USC 747'], 'token_start': 15273, 'token_end': 15296}


In [608]:
fragment_4 = {}
fragment_4['original_sentence'] = "And she had tears, crying, hoping."
fragment_4['label']= "And she had tears, crying, hoping."
indices = find_sentence_id(fragment_4['original_sentence'])
fragment_4['start_sentence_index']=indices[0]
fragment_4['end_sentence_index']=indices[1]
fragment_4['media_offset'] = 0
fragment_4['media_index'] = 0
fragment_4['testimony_id'] = indices[2]
fragments['fragments'].append(fragment_4)


http://localhost:8080/blacklab-server-2.1.0/lts/hits?patt=%5B%22And%22%5D+%5B%22she%22%5D+%5B%22had%22%5D+%5B%22tears%22%5D+%5B%5D+%5B%22crying%22%5D+%5B%5D+%5B%22hoping%22%5D+%5B%5D&waitfortotal=true&outputformat=json&prettyprint=no&wordsaroundhit=0
{'left': '', 'match_word': 'And she had tears , crying , hoping . ', 'right': '', 'complete_match': 'And she had tears , crying , hoping . ', 'testimony_id': 'usc_shoah_7455', 'shelfmark': ['USC 7455'], 'token_start': 21056, 'token_end': 21065}


In [609]:
fragment_5 = {}
fragment_5['original_sentence'] = " And I was crying, putting myself to sleep with the hope-- I was always saying, maybe my mother-- maybe she's sick."
fragment_5['label']= "I was crying, putting myself to sleep with the hope-- I was always saying, maybe my mother-- maybe she's sick."
indices = find_sentence_id(fragment_5['original_sentence'])
fragment_5['start_sentence_index']=indices[0]
fragment_5['end_sentence_index']=indices[1]
fragment_5['media_offset'] = 0
fragment_5['media_index'] = 0
fragment_5['testimony_id'] = indices[2]
fragments['fragments'].append(fragment_5)

http://localhost:8080/blacklab-server-2.1.0/lts/hits?patt=%5B%22And%22%5D+%5B%22I%22%5D+%5B%22was%22%5D+%5B%22crying%22%5D+%5B%5D+%5B%22putting%22%5D+%5B%22myself%22%5D+%5B%22to%22%5D+%5B%22sleep%22%5D+%5B%22with%22%5D+%5B%22the%22%5D+%5B%22hope%22%5D+%5B%5D%7B0%2C3%7D+%5B%22I%22%5D+%5B%22was%22%5D+%5B%22always%22%5D+%5B%22saying%22%5D+%5B%5D+%5B%22maybe%22%5D+%5B%22my%22%5D+%5B%22mother%22%5D+%5B%5D%7B0%2C3%7D+%5B%22maybe%22%5D+%5B%22she%22%5D+%5B%5D+%5B%22sick%22%5D+%5B%5D&waitfortotal=true&outputformat=json&prettyprint=no&wordsaroundhit=0
{'left': '', 'match_word': "And I was crying , putting myself to sleep with the hope -- I was always saying , maybe my mother -- maybe she 's sick . ", 'right': '', 'complete_match': "And I was crying , putting myself to sleep with the hope -- I was always saying , maybe my mother -- maybe she 's sick . ", 'testimony_id': 'usc_shoah_19982', 'shelfmark': ['USC 19982'], 'token_start': 10782, 'token_end': 10809}


In [610]:
add_testimonial_fragments(fragments)

### 7.  die 

In [611]:
lemmas = ["die","hope"]

In [612]:
query = create_contextual_query(lemmas,context_length=25)
print (query)

([lemma="die"][]{0,25}[lemma="hope"])|([lemma="hope"][]{0,25}[lemma="die"])


In [613]:
domain_term = "to die"

In [614]:
fragments = {}
fragments['main_node'] = main_node
fragments['mid_node'] = domain_term
fragments['fragments'] = []

In [615]:
fragment_1 = {}
fragment_1['original_sentence'] = "I screamed and I said I didn’t care if I died, and I hope we all die today, "
fragment_1['label']="(..) I screamed and I said I didn’t care if I died, and I hope we all die today (..)"
indices = find_sentence_id(fragment_1['original_sentence'])
fragment_1['start_sentence_index']=indices[0]
fragment_1['end_sentence_index']=indices[1]
fragment_1['media_offset'] = 0
fragment_1['media_index'] = 0
fragment_1['testimony_id'] = indices[2]
fragments['fragments'].append(fragment_1)

http://localhost:8080/blacklab-server-2.1.0/lts/hits?patt=%5B%22I%22%5D+%5B%22screamed%22%5D+%5B%22and%22%5D+%5B%22I%22%5D+%5B%22said%22%5D+%5B%22I%22%5D+%5B%22did%22%5D+%5B%5D+%5B%22care%22%5D+%5B%22if%22%5D+%5B%22I%22%5D+%5B%22died%22%5D+%5B%5D+%5B%22and%22%5D+%5B%22I%22%5D+%5B%22hope%22%5D+%5B%22we%22%5D+%5B%22all%22%5D+%5B%22die%22%5D+%5B%22today%22%5D+%5B%5D&waitfortotal=true&outputformat=json&prettyprint=no&wordsaroundhit=0
{'left': '', 'match_word': 'I screamed and I said I did n’t care if I died , and I hope we all die today , ', 'right': '', 'complete_match': 'I screamed and I said I did n’t care if I died , and I hope we all die today , ', 'testimony_id': 'irn39792', 'shelfmark': ['USHMM RG-50.030*0543'], 'token_start': 11135, 'token_end': 11156}


In [616]:
fragment_2 = {}
fragment_2['original_sentence'] = "that hope that somebody will come and kill us, maybe that kept me alive"
fragment_2['label']="(..)that hope that somebody will come and kill us, maybe that kept me alive (..)"
indices = find_sentence_id(fragment_2['original_sentence'])
fragment_2['start_sentence_index']=indices[0]
fragment_2['end_sentence_index']=indices[1]
fragment_2['media_offset'] = 0
fragment_2['media_index'] = 0
fragment_2['testimony_id'] = indices[2]
fragments['fragments'].append(fragment_2)


http://localhost:8080/blacklab-server-2.1.0/lts/hits?patt=%5B%22that%22%5D+%5B%22hope%22%5D+%5B%22that%22%5D+%5B%22somebody%22%5D+%5B%22will%22%5D+%5B%22come%22%5D+%5B%22and%22%5D+%5B%22kill%22%5D+%5B%22us%22%5D+%5B%5D+%5B%22maybe%22%5D+%5B%22that%22%5D+%5B%22kept%22%5D+%5B%22me%22%5D+%5B%22alive%22%5D&waitfortotal=true&outputformat=json&prettyprint=no&wordsaroundhit=0
{'left': '', 'match_word': 'that hope that somebody will come and kill us , maybe that kept me alive ', 'right': '', 'complete_match': 'that hope that somebody will come and kill us , maybe that kept me alive ', 'testimony_id': 'irn506715', 'shelfmark': ['USHMM RG-50.549.02*0058'], 'token_start': 1247, 'token_end': 1262}


In [617]:
fragment_3 = {}
fragment_3['original_sentence'] = "And I just hope that she died on the way."
fragment_3['label']="And I just hope that she died on the way."
indices = find_sentence_id(fragment_3['original_sentence'])
fragment_3['start_sentence_index']=indices[0]
fragment_3['end_sentence_index']=indices[1]
fragment_3['media_offset'] = 0
fragment_3['media_index'] = 0
fragment_3['testimony_id'] = indices[2]
fragments['fragments'].append(fragment_3)

http://localhost:8080/blacklab-server-2.1.0/lts/hits?patt=%5B%22And%22%5D+%5B%22I%22%5D+%5B%22just%22%5D+%5B%22hope%22%5D+%5B%22that%22%5D+%5B%22she%22%5D+%5B%22died%22%5D+%5B%22on%22%5D+%5B%22the%22%5D+%5B%22way%22%5D+%5B%5D&waitfortotal=true&outputformat=json&prettyprint=no&wordsaroundhit=0
{'left': '', 'match_word': 'And I just hope that she died on the way . ', 'right': '', 'complete_match': 'And I just hope that she died on the way . ', 'testimony_id': 'usc_shoah_21013', 'shelfmark': ['USC 21013'], 'token_start': 10479, 'token_end': 10490}


In [618]:
fragment_4 = {}
fragment_4['original_sentence'] = " You hope you die. I was always hoping I'd die fast, you know."
fragment_4['label']= " You hope you die. I was always hoping I'd die fast, you know."
indices = find_sentence_id(fragment_4['original_sentence'])
fragment_4['start_sentence_index']=indices[0]
fragment_4['end_sentence_index']=indices[1]
fragment_4['media_offset'] = 0
fragment_4['media_index'] = 0
fragment_4['testimony_id'] = indices[2]
fragments['fragments'].append(fragment_4)


http://localhost:8080/blacklab-server-2.1.0/lts/hits?patt=%5B%22You%22%5D+%5B%22hope%22%5D+%5B%22you%22%5D+%5B%22die%22%5D+%5B%5D+%5B%22I%22%5D+%5B%22was%22%5D+%5B%22always%22%5D+%5B%22hoping%22%5D+%5B%22I%22%5D+%5B%22%27d%22%5D+%5B%22die%22%5D+%5B%22fast%22%5D+%5B%5D+%5B%22you%22%5D+%5B%22know%22%5D+%5B%5D&waitfortotal=true&outputformat=json&prettyprint=no&wordsaroundhit=0
{'left': '', 'match_word': "You hope you die . I was always hoping I 'd die fast , you know . ", 'right': '', 'complete_match': "You hope you die . I was always hoping I 'd die fast , you know . ", 'testimony_id': 'usc_shoah_543', 'shelfmark': ['USC 543'], 'token_start': 13848, 'token_end': 13865}


In [619]:
fragment_5 = {}
fragment_5['original_sentence'] = "I hope that my grandmother died"
fragment_5['label']= "I hope that my grandmother died (..) I hope she died on the train."
indices = find_sentence_id(fragment_5['original_sentence'])
fragment_5['start_sentence_index']=indices[0]
fragment_5['end_sentence_index']=indices[1]
fragment_5['media_offset'] = 0
fragment_5['media_index'] = 0
fragment_5['testimony_id'] = indices[2]
fragments['fragments'].append(fragment_5)

http://localhost:8080/blacklab-server-2.1.0/lts/hits?patt=%5B%22I%22%5D+%5B%22hope%22%5D+%5B%22that%22%5D+%5B%22my%22%5D+%5B%22grandmother%22%5D+%5B%22died%22%5D&waitfortotal=true&outputformat=json&prettyprint=no&wordsaroundhit=0
{'left': '', 'match_word': 'I hope that my grandmother died ', 'right': '', 'complete_match': 'I hope that my grandmother died ', 'testimony_id': 'usc_shoah_7094', 'shelfmark': ['USC 7094'], 'token_start': 29255, 'token_end': 29261}


In [620]:
add_testimonial_fragments(fragments)

### 8.  Hope

In [621]:
lemmas = ["hope","together"]

In [622]:
query = create_contextual_query(lemmas,context_length=50)
print (query)

([lemma="hope"][]{0,50}[lemma="together"])|([lemma="together"][]{0,50}[lemma="hope"])


In [623]:
domain_term = "stay together"

In [624]:
fragments = {}
fragments['main_node'] = main_node
fragments['mid_node'] = domain_term
fragments['fragments'] = []

In [625]:
fragment_1 = {}
fragment_1['original_sentence'] = "And we hoped that we could stay together."
fragment_1['label']="And we hoped that we could stay together."
indices = find_sentence_id(fragment_1['original_sentence'])
fragment_1['start_sentence_index']=indices[0]
fragment_1['end_sentence_index']=indices[1]
fragment_1['media_offset'] = 0
fragment_1['media_index'] = 0
fragment_1['testimony_id'] = indices[2]
fragments['fragments'].append(fragment_1)

http://localhost:8080/blacklab-server-2.1.0/lts/hits?patt=%5B%22And%22%5D+%5B%22we%22%5D+%5B%22hoped%22%5D+%5B%22that%22%5D+%5B%22we%22%5D+%5B%22could%22%5D+%5B%22stay%22%5D+%5B%22together%22%5D+%5B%5D&waitfortotal=true&outputformat=json&prettyprint=no&wordsaroundhit=0
{'left': '', 'match_word': 'And we hoped that we could stay together . ', 'right': '', 'complete_match': 'And we hoped that we could stay together . ', 'testimony_id': 'usc_shoah_543', 'shelfmark': ['USC 543'], 'token_start': 9238, 'token_end': 9247}


In [626]:
fragment_2 = {}
fragment_2['original_sentence'] = "I was just hoping that I can stay together with my mother."
fragment_2['label']="I was just hoping that I can stay together with my mother."
indices = find_sentence_id(fragment_2['original_sentence'])
fragment_2['start_sentence_index']=indices[0]
fragment_2['end_sentence_index']=indices[1]
fragment_2['media_offset'] = 0
fragment_2['media_index'] = 0
fragment_2['testimony_id'] = indices[2]
fragments['fragments'].append(fragment_2)


http://localhost:8080/blacklab-server-2.1.0/lts/hits?patt=%5B%22I%22%5D+%5B%22was%22%5D+%5B%22just%22%5D+%5B%22hoping%22%5D+%5B%22that%22%5D+%5B%22I%22%5D+%5B%22can%22%5D+%5B%22stay%22%5D+%5B%22together%22%5D+%5B%22with%22%5D+%5B%22my%22%5D+%5B%22mother%22%5D+%5B%5D&waitfortotal=true&outputformat=json&prettyprint=no&wordsaroundhit=0
{'left': '', 'match_word': 'I was just hoping that I can stay together with my mother . ', 'right': '', 'complete_match': 'I was just hoping that I can stay together with my mother . ', 'testimony_id': 'usc_shoah_8775', 'shelfmark': ['USC 8775'], 'token_start': 8874, 'token_end': 8887}


In [627]:
fragment_3 = {}
fragment_3['original_sentence'] = "to put her arms around and embrace us all. It's like the saying Yiddish, [YIDDISH], like cuddle up all to the parents and just hoping to stay together."
fragment_3['label']="(..)my mother was trying (.. )to put her arms around and embrace us all (..) just hoping to stay together."
indices = find_sentence_id(fragment_3['original_sentence'])
fragment_3['start_sentence_index']=indices[0]
fragment_3['end_sentence_index']=indices[1]
fragment_3['media_offset'] = 0
fragment_3['media_index'] = 0
fragment_3['testimony_id'] = indices[2]
fragments['fragments'].append(fragment_3)

http://localhost:8080/blacklab-server-2.1.0/lts/hits?patt=%5B%22to%22%5D+%5B%22put%22%5D+%5B%22her%22%5D+%5B%22arms%22%5D+%5B%22around%22%5D+%5B%22and%22%5D+%5B%22embrace%22%5D+%5B%22us%22%5D+%5B%22all%22%5D+%5B%5D+%5B%22It%22%5D+%5B%5D+%5B%22like%22%5D+%5B%22the%22%5D+%5B%22saying%22%5D+%5B%22Yiddish%22%5D+%5B%5D+%5B%5D%7B0%2C3%7D+%5B%22YIDDISH%22%5D+%5B%5D%7B0%2C3%7D+%5B%5D+%5B%22like%22%5D+%5B%22cuddle%22%5D+%5B%22up%22%5D+%5B%22all%22%5D+%5B%22to%22%5D+%5B%22the%22%5D+%5B%22parents%22%5D+%5B%22and%22%5D+%5B%22just%22%5D+%5B%22hoping%22%5D+%5B%22to%22%5D+%5B%22stay%22%5D+%5B%22together%22%5D+%5B%5D&waitfortotal=true&outputformat=json&prettyprint=no&wordsaroundhit=0
{'left': '', 'match_word': "to put her arms around and embrace us all . It 's like the saying Yiddish , [ YIDDISH ] , like cuddle up all to the parents and just hoping to stay together . ", 'right': '', 'complete_match': "to put her arms around and embrace us all . It 's like the saying Yiddish , [ YIDDISH ] , like cuddle

In [628]:
fragment_4 = {}
fragment_4['original_sentence'] = "So the only thing really what we were hoping for and what it matters that the families just to remain together."
fragment_4['label']= "(..)what we were hoping for and what it matters that the families just to remain together."
indices = find_sentence_id(fragment_4['original_sentence'])
fragment_4['start_sentence_index']=indices[0]
fragment_4['end_sentence_index']=indices[1]
fragment_4['media_offset'] = 0
fragment_4['media_index'] = 0
fragment_4['testimony_id'] = indices[2]
fragments['fragments'].append(fragment_4)


http://localhost:8080/blacklab-server-2.1.0/lts/hits?patt=%5B%22So%22%5D+%5B%22the%22%5D+%5B%22only%22%5D+%5B%22thing%22%5D+%5B%22really%22%5D+%5B%22what%22%5D+%5B%22we%22%5D+%5B%22were%22%5D+%5B%22hoping%22%5D+%5B%22for%22%5D+%5B%22and%22%5D+%5B%22what%22%5D+%5B%22it%22%5D+%5B%22matters%22%5D+%5B%22that%22%5D+%5B%22the%22%5D+%5B%22families%22%5D+%5B%22just%22%5D+%5B%22to%22%5D+%5B%22remain%22%5D+%5B%22together%22%5D+%5B%5D&waitfortotal=true&outputformat=json&prettyprint=no&wordsaroundhit=0
{'left': '', 'match_word': 'So the only thing really what we were hoping for and what it matters that the families just to remain together . ', 'right': '', 'complete_match': 'So the only thing really what we were hoping for and what it matters that the families just to remain together . ', 'testimony_id': 'usc_shoah_39', 'shelfmark': ['USC 39'], 'token_start': 3550, 'token_end': 3572}


In [630]:
add_testimonial_fragments(fragments)

mid node exists cannot be added
