# Mining testimonial fragments of the Holocaust

**Experience domain:**

### Load the necessary libraries

In [1]:
import sys; sys.path.insert(0, '..')
import itertools

In [2]:
import get_topic_model_concordance as topic_concordancer
from utils import blacklab, db, text
mongo = db.get_db()

In [3]:
%config Completer.use_jedi = False
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))
import random

### Helper functions

In [4]:
def create_contextual_query(lemmas,context_length=50):
    permutations = itertools.permutations(lemmas,len(lemmas))
    final_result = []
    for element in list(permutations):
        temp_result = []
        for el in element:
            temp_result.append('[lemma="'+el+'"]')
        temp_result = '('+('[]{0,'+str(context_length)+'}').join(temp_result)+')'
        final_result.append(temp_result)
    final_result = '|'.join(final_result)
    return final_result
        
        
            

In [5]:
from utils import blacklab, db, text
import requests
import json
def find_sentence_id(label):
    props = {'annotators': 'tokenize'}

    # set the encoding of the annotator
    requests.encoding = 'utf-8'
    # make a request
    r = requests.post('http://localhost:9000/', params={'properties':
                      json.dumps(props)},
                      data=label.encode('utf-8'))
    result = json.loads(r.text, encoding='utf-8')
    query = []
    for i, token in enumerate(result['tokens']):

        if ('...'in token['word'] and ((i == 0) or
           i == len(result['tokens']) - 1)):
            continue
        elif ('...'in token['word']):
            query.append('[]{0,50}')
        elif ('-'in token['word']):
            query.append('[]{0,3}')
        elif ("n't"in token['word']):
            query.append('[]')
        elif ("'re"in token['word']):
            query.append('[]')
        elif ("?"in token['word']):
            query.append('[]')
        elif ("."in token['word']):
            query.append('[]')
        elif ("'s"in token['word']):
            query.append('[]')
        elif (","in token['word']):
            query.append('[]')
        else:
            query.append('["' + token['word'] + '"]')

    query = ' '.join(query)
    try:
        sentence = blacklab.search_blacklab(query, window=0,
                                            lemma=False,
                                            include_match=True)
        token_end = sentence[0]['token_end']
        token_start = sentence[0]['token_start']
        print (sentence[0])
        mongo = db.get_db()
        results = mongo.tokens.find({'testimony_id':
                                    sentence[0]['testimony_id']},
                                    {'_id': 0})
        tokens = list(results)[0]['tokens']
        sentenceStart = tokens[token_start]['sentence_index']
        sentenceEnd = tokens[token_end]['sentence_index']
        originalsentence = sentence[0]['complete_match']
        return (sentenceStart,sentenceEnd,sentence[0]['testimony_id'])
    except:
        print("The following query returned a null result")
        print(query)
        
            


In [6]:
def create_parent_node(label):
    """Generate a root node for a tree structure."""
    testimony_id = random.randint(1, 20)
    node = {}
    node['label'] = label
    fragment = {'label': label,
                'essay_id': random.randint(1, 20),
                'tree': get_node(testimony_id, node, is_parent=True)}
    fragment['tree']['label'] = label

    return fragment

In [7]:
def get_node(testimony_id, node, is_parent=False):
    """Generate a parent or leaf node for a tree structure."""
    if is_parent:
        return {
            'label': node['label'],
            'testimony_id': random.randint(1, 20),
            'media_index': random.randint(1, 20),
            'media_offset': random.randint(1, 20),
            'start_sentence_index': random.randint(1, 20),
            'end_sentence_index': random.randint(1, 20),
            'children': [], }
    else:
        return {'label': node['label'],
                'testimony_id': node['testimony_id'],
                'media_index': float(node['media_index']),
                'media_offset': float(node['media_offset']),
                'start_sentence_index': float(node['start_sentence_index']),
                'end_sentence_index': float(node['end_sentence_index']),
                'children': [], }

In [8]:
def check_if_main_node_exist(node):
    results = mongo.fragments.find({'label':node},{'_id': 0})
    if len(results[0])==0:
        return False
    else:
        return True

In [9]:
def add_main_node(label):
    mongo.fragments.insert(create_parent_node(label))

In [10]:
def delete_main_node(label):
    mongo.fragments.delete_one({'label':label})

In [11]:
def add_testimonial_fragments(fragments):
    if check_if_main_node_exist(fragments['main_node']):
        results = mongo.fragments.find({'label':fragments['main_node']},{'_id':0})[0]
        mid_nodes = [element['label'] for element in results['tree']['children']]
        if fragments['mid_node'] in mid_nodes:
            print ("mid node exists cannot be added")
        else:
            
            mid_node = get_node('r',{'label':fragments['mid_node']},is_parent=True)
            for fragment in fragments['fragments']:
                leaf = get_node(fragment['testimony_id'],fragment)
                mid_node['children'].append(leaf)
            results['tree']['children'].append(mid_node)
            mongo.fragments.replace_one({'label':fragments['main_node']},results)

### Add the main node

In [12]:
main_node = "rip"
delete_main_node(main_node)
add_main_node(main_node)

  


### Set up the query

query = '[lemma="rip"]'

result = topic_concordancer.main(query,window=25,topicn=25)

### Print the key topics

for i,element in enumerate(result['topic_documents']):
    print (i)
    topic_words =  element['topic_words'][1]
    print (topic_words)
    print ('\n')

### Analyze documents

i=0
for text in result['topic_documents'][i]['texts'][0:25]:
    print (text['matched_text_words'])
    print ('\n')

## Testimonial fragments

### 1.  

In [13]:
lemmas = ["rip","child"]

In [14]:
query = create_contextual_query(lemmas,context_length=10)
print (query)

([lemma="rip"][]{0,10}[lemma="child"])|([lemma="child"][]{0,10}[lemma="rip"])


In [15]:
domain_term = "child"

In [16]:
fragments = {}
fragments['main_node'] = main_node
fragments['mid_node'] = domain_term
fragments['fragments'] = []

In [17]:
fragment_1 = {}
fragment_1['original_sentence'] = "one German taking a child, and taking him by his legs, and ripping him up to here and throwing against the wall."
fragment_1['label']="(..) one German taking a child, and taking him by his legs, and ripping him up to here and throwing against the wall."
indices = find_sentence_id(fragment_1['original_sentence'])
fragment_1['start_sentence_index']=indices[0]
fragment_1['end_sentence_index']=indices[1]
fragment_1['media_offset'] = 0
fragment_1['media_index'] = 0
fragment_1['testimony_id'] = indices[2]
fragments['fragments'].append(fragment_1)

http://localhost:8080/blacklab-server-2.1.0/lts/hits?patt=%5B%22one%22%5D+%5B%22German%22%5D+%5B%22taking%22%5D+%5B%22a%22%5D+%5B%22child%22%5D+%5B%5D+%5B%22and%22%5D+%5B%22taking%22%5D+%5B%22him%22%5D+%5B%22by%22%5D+%5B%22his%22%5D+%5B%22legs%22%5D+%5B%5D+%5B%22and%22%5D+%5B%22ripping%22%5D+%5B%22him%22%5D+%5B%22up%22%5D+%5B%22to%22%5D+%5B%22here%22%5D+%5B%22and%22%5D+%5B%22throwing%22%5D+%5B%22against%22%5D+%5B%22the%22%5D+%5B%22wall%22%5D+%5B%5D&waitfortotal=true&outputformat=json&prettyprint=no&wordsaroundhit=0
{'left': '', 'match_word': 'one German taking a child , and taking him by his legs , and ripping him up to here and throwing against the wall . ', 'right': '', 'complete_match': 'one German taking a child , and taking him by his legs , and ripping him up to here and throwing against the wall . ', 'testimony_id': 'usc_shoah_9995', 'shelfmark': ['USC Shoah Foundation 9995'], 'token_start': 11159, 'token_end': 11184}


In [18]:
fragment_2 = {}
fragment_2['original_sentence'] = "soldiers came to a house, they could take a child. Rip off in two"
fragment_2['label']="(..) soldiers came to a house, they could take a child. Rip off in two (..)"
indices = find_sentence_id(fragment_2['original_sentence'])
fragment_2['start_sentence_index']=indices[0]
fragment_2['end_sentence_index']=indices[1]
fragment_2['media_offset'] = 0
fragment_2['media_index'] = 0
fragment_2['testimony_id'] = indices[2]
fragments['fragments'].append(fragment_2)


http://localhost:8080/blacklab-server-2.1.0/lts/hits?patt=%5B%22soldiers%22%5D+%5B%22came%22%5D+%5B%22to%22%5D+%5B%22a%22%5D+%5B%22house%22%5D+%5B%5D+%5B%22they%22%5D+%5B%22could%22%5D+%5B%22take%22%5D+%5B%22a%22%5D+%5B%22child%22%5D+%5B%5D+%5B%22Rip%22%5D+%5B%22off%22%5D+%5B%22in%22%5D+%5B%22two%22%5D&waitfortotal=true&outputformat=json&prettyprint=no&wordsaroundhit=0
{'left': '', 'match_word': 'soldiers came to a house , they could take a child . Rip off in two ', 'right': '', 'complete_match': 'soldiers came to a house , they could take a child . Rip off in two ', 'testimony_id': 'HVT-81', 'shelfmark': ['Fortunoff Archive HVT-81'], 'token_start': 8562, 'token_end': 8578}


In [19]:
fragment_3 = {}
fragment_3['original_sentence'] = "I see people lying dead with open mouths. Children, blood, flesh, pieces ripped apart."
fragment_3['label']=" I see people lying dead with open mouths. Children, blood, flesh, pieces ripped apart."
indices = find_sentence_id(fragment_3['original_sentence'])
fragment_3['start_sentence_index']=indices[0]
fragment_3['end_sentence_index']=indices[1]
fragment_3['media_offset'] = 0
fragment_3['media_index'] = 0
fragment_3['testimony_id'] = indices[2]
fragments['fragments'].append(fragment_3)

http://localhost:8080/blacklab-server-2.1.0/lts/hits?patt=%5B%22I%22%5D+%5B%22see%22%5D+%5B%22people%22%5D+%5B%22lying%22%5D+%5B%22dead%22%5D+%5B%22with%22%5D+%5B%22open%22%5D+%5B%22mouths%22%5D+%5B%5D+%5B%22Children%22%5D+%5B%5D+%5B%22blood%22%5D+%5B%5D+%5B%22flesh%22%5D+%5B%5D+%5B%22pieces%22%5D+%5B%22ripped%22%5D+%5B%22apart%22%5D+%5B%5D&waitfortotal=true&outputformat=json&prettyprint=no&wordsaroundhit=0
{'left': '', 'match_word': 'I see people lying dead with open mouths . Children , blood , flesh , pieces ripped apart . ', 'right': '', 'complete_match': 'I see people lying dead with open mouths . Children , blood , flesh , pieces ripped apart . ', 'testimony_id': 'irn504926', 'shelfmark': ['USHMM RG-50.549.01*0022'], 'token_start': 3557, 'token_end': 3576}


In [20]:
fragment_4 = {}
fragment_4['original_sentence'] = "That she has a little baby, he should let her go or whatever. And he took that baby and ripped it apart. "
fragment_4['label']= "That she has a little baby, he should let her go or whatever. And he took that baby and ripped it apart. "
indices = find_sentence_id(fragment_4['original_sentence'])
fragment_4['start_sentence_index']=indices[0]
fragment_4['end_sentence_index']=indices[1]
fragment_4['media_offset'] = 0
fragment_4['media_index'] = 0
fragment_4['testimony_id'] = indices[2]
fragments['fragments'].append(fragment_4)


http://localhost:8080/blacklab-server-2.1.0/lts/hits?patt=%5B%22That%22%5D+%5B%22she%22%5D+%5B%22has%22%5D+%5B%22a%22%5D+%5B%22little%22%5D+%5B%22baby%22%5D+%5B%5D+%5B%22he%22%5D+%5B%22should%22%5D+%5B%22let%22%5D+%5B%22her%22%5D+%5B%22go%22%5D+%5B%22or%22%5D+%5B%22whatever%22%5D+%5B%5D+%5B%22And%22%5D+%5B%22he%22%5D+%5B%22took%22%5D+%5B%22that%22%5D+%5B%22baby%22%5D+%5B%22and%22%5D+%5B%22ripped%22%5D+%5B%22it%22%5D+%5B%22apart%22%5D+%5B%5D&waitfortotal=true&outputformat=json&prettyprint=no&wordsaroundhit=0
{'left': '', 'match_word': 'That she has a little baby , he should let her go or whatever . And he took that baby and ripped it apart . ', 'right': '', 'complete_match': 'That she has a little baby , he should let her go or whatever . And he took that baby and ripped it apart . ', 'testimony_id': 'HVT-81', 'shelfmark': ['Fortunoff Archive HVT-81'], 'token_start': 8644, 'token_end': 8669}


In [21]:
fragment_5 = {}
fragment_5['original_sentence'] = "if there is a God, how could he allow a little tiny six-month-old baby being ripped apart?"
fragment_5['label']= "(..) if there is a God, how could he allow a little tiny six-month-old baby being ripped apart?"
indices = find_sentence_id(fragment_5['original_sentence'])
fragment_5['start_sentence_index']=indices[0]
fragment_5['end_sentence_index']=indices[1]
fragment_5['media_offset'] = 0
fragment_5['media_index'] = 0
fragment_5['testimony_id'] = indices[2]
fragments['fragments'].append(fragment_5)

http://localhost:8080/blacklab-server-2.1.0/lts/hits?patt=%5B%22if%22%5D+%5B%22there%22%5D+%5B%22is%22%5D+%5B%22a%22%5D+%5B%22God%22%5D+%5B%5D+%5B%22how%22%5D+%5B%22could%22%5D+%5B%22he%22%5D+%5B%22allow%22%5D+%5B%22a%22%5D+%5B%22little%22%5D+%5B%22tiny%22%5D+%5B%5D%7B0%2C3%7D+%5B%22baby%22%5D+%5B%22being%22%5D+%5B%22ripped%22%5D+%5B%22apart%22%5D+%5B%5D&waitfortotal=true&outputformat=json&prettyprint=no&wordsaroundhit=0
{'left': '', 'match_word': 'if there is a God , how could he allow a little tiny six-month-old baby being ripped apart ? ', 'right': '', 'complete_match': 'if there is a God , how could he allow a little tiny six-month-old baby being ripped apart ? ', 'testimony_id': 'usc_shoah_17867', 'shelfmark': ['USC Shoah Foundation 17867'], 'token_start': 14704, 'token_end': 14723}


In [22]:
add_testimonial_fragments(fragments)

### 2.  

In [23]:
lemmas = ["clothes","rip"]

In [24]:
query = create_contextual_query(lemmas,context_length=10)
print (query)

([lemma="clothes"][]{0,10}[lemma="rip"])|([lemma="rip"][]{0,10}[lemma="clothes"])


In [25]:
domain_term = "clothes"

In [26]:
fragments = {}
fragments['main_node'] = main_node
fragments['mid_node'] = domain_term
fragments['fragments'] = []

In [27]:
fragment_1 = {}
fragment_1['original_sentence'] = "They took the clothes off of you. They ripped the clothes off of you."
fragment_1['label']="They took the clothes off of you. They ripped the clothes off of you."
indices = find_sentence_id(fragment_1['original_sentence'])
fragment_1['start_sentence_index']=indices[0]
fragment_1['end_sentence_index']=indices[1]
fragment_1['media_offset'] = 0
fragment_1['media_index'] = 0
fragment_1['testimony_id'] = indices[2]
fragments['fragments'].append(fragment_1)

http://localhost:8080/blacklab-server-2.1.0/lts/hits?patt=%5B%22They%22%5D+%5B%22took%22%5D+%5B%22the%22%5D+%5B%22clothes%22%5D+%5B%22off%22%5D+%5B%22of%22%5D+%5B%22you%22%5D+%5B%5D+%5B%22They%22%5D+%5B%22ripped%22%5D+%5B%22the%22%5D+%5B%22clothes%22%5D+%5B%22off%22%5D+%5B%22of%22%5D+%5B%22you%22%5D+%5B%5D&waitfortotal=true&outputformat=json&prettyprint=no&wordsaroundhit=0
{'left': '', 'match_word': 'They took the clothes off of you . They ripped the clothes off of you . ', 'right': '', 'complete_match': 'They took the clothes off of you . They ripped the clothes off of you . ', 'testimony_id': 'usc_shoah_7188', 'shelfmark': ['USC Shoah Foundation 7188'], 'token_start': 6684, 'token_end': 6700}


In [28]:
fragment_2 = {}
fragment_2['original_sentence'] = "So they dragged me out and ripped my clothes off and thought that that was very funny."
fragment_2['label']="So they dragged me out and ripped my clothes off and thought that that was very funny."
indices = find_sentence_id(fragment_2['original_sentence'])
fragment_2['start_sentence_index']=indices[0]
fragment_2['end_sentence_index']=indices[1]
fragment_2['media_offset'] = 0
fragment_2['media_index'] = 0
fragment_2['testimony_id'] = indices[2]
fragments['fragments'].append(fragment_2)


http://localhost:8080/blacklab-server-2.1.0/lts/hits?patt=%5B%22So%22%5D+%5B%22they%22%5D+%5B%22dragged%22%5D+%5B%22me%22%5D+%5B%22out%22%5D+%5B%22and%22%5D+%5B%22ripped%22%5D+%5B%22my%22%5D+%5B%22clothes%22%5D+%5B%22off%22%5D+%5B%22and%22%5D+%5B%22thought%22%5D+%5B%22that%22%5D+%5B%22that%22%5D+%5B%22was%22%5D+%5B%22very%22%5D+%5B%22funny%22%5D+%5B%5D&waitfortotal=true&outputformat=json&prettyprint=no&wordsaroundhit=0
{'left': '', 'match_word': 'So they dragged me out and ripped my clothes off and thought that that was very funny . ', 'right': '', 'complete_match': 'So they dragged me out and ripped my clothes off and thought that that was very funny . ', 'testimony_id': 'usc_shoah_4284', 'shelfmark': ['USC Shoah Foundation 4284'], 'token_start': 6173, 'token_end': 6191}


In [29]:
fragment_3 = {}
fragment_3['original_sentence'] = "My mother was attacked, too, when she picked me up. They ripped the clothes off, you know?"
fragment_3['label']="My mother was attacked, too, when she picked me up. They ripped the clothes off, you know?"
indices = find_sentence_id(fragment_3['original_sentence'])
fragment_3['start_sentence_index']=indices[0]
fragment_3['end_sentence_index']=indices[1]
fragment_3['media_offset'] = 0
fragment_3['media_index'] = 0
fragment_3['testimony_id'] = indices[2]
fragments['fragments'].append(fragment_3)

http://localhost:8080/blacklab-server-2.1.0/lts/hits?patt=%5B%22My%22%5D+%5B%22mother%22%5D+%5B%22was%22%5D+%5B%22attacked%22%5D+%5B%5D+%5B%22too%22%5D+%5B%5D+%5B%22when%22%5D+%5B%22she%22%5D+%5B%22picked%22%5D+%5B%22me%22%5D+%5B%22up%22%5D+%5B%5D+%5B%22They%22%5D+%5B%22ripped%22%5D+%5B%22the%22%5D+%5B%22clothes%22%5D+%5B%22off%22%5D+%5B%5D+%5B%22you%22%5D+%5B%22know%22%5D+%5B%5D&waitfortotal=true&outputformat=json&prettyprint=no&wordsaroundhit=0
{'left': '', 'match_word': 'My mother was attacked , too , when she picked me up . They ripped the clothes off , you know ? ', 'right': '', 'complete_match': 'My mother was attacked , too , when she picked me up . They ripped the clothes off , you know ? ', 'testimony_id': 'usc_shoah_7188', 'shelfmark': ['USC Shoah Foundation 7188'], 'token_start': 3974, 'token_end': 3996}


In [30]:
fragment_4 = {}
fragment_4['original_sentence'] = "It was a -- he got them help, they ripped their clothes and he shot them."
fragment_4['label']= "It was a -- he got them help, they ripped their clothes and he shot them."
indices = find_sentence_id(fragment_4['original_sentence'])
fragment_4['start_sentence_index']=indices[0]
fragment_4['end_sentence_index']=indices[1]
fragment_4['media_offset'] = 0
fragment_4['media_index'] = 0
fragment_4['testimony_id'] = indices[2]
fragments['fragments'].append(fragment_4)


http://localhost:8080/blacklab-server-2.1.0/lts/hits?patt=%5B%22It%22%5D+%5B%22was%22%5D+%5B%22a%22%5D+%5B%5D%7B0%2C3%7D+%5B%22he%22%5D+%5B%22got%22%5D+%5B%22them%22%5D+%5B%22help%22%5D+%5B%5D+%5B%22they%22%5D+%5B%22ripped%22%5D+%5B%22their%22%5D+%5B%22clothes%22%5D+%5B%22and%22%5D+%5B%22he%22%5D+%5B%22shot%22%5D+%5B%22them%22%5D+%5B%5D&waitfortotal=true&outputformat=json&prettyprint=no&wordsaroundhit=0
{'left': '', 'match_word': 'It was a -- he got them help , they ripped their clothes and he shot them . ', 'right': '', 'complete_match': 'It was a -- he got them help , they ripped their clothes and he shot them . ', 'testimony_id': 'irn504860', 'shelfmark': ['USHMM RG-50.030*0367'], 'token_start': 14284, 'token_end': 14302}


In [31]:
fragment_5 = {}
fragment_5['original_sentence'] = "The dog came and ripped up the clothes."
fragment_5['label']= "The dog came and ripped up the clothes."
indices = find_sentence_id(fragment_5['original_sentence'])
fragment_5['start_sentence_index']=indices[0]
fragment_5['end_sentence_index']=indices[1]
fragment_5['media_offset'] = 0
fragment_5['media_index'] = 0
fragment_5['testimony_id'] = indices[2]
fragments['fragments'].append(fragment_5)

http://localhost:8080/blacklab-server-2.1.0/lts/hits?patt=%5B%22The%22%5D+%5B%22dog%22%5D+%5B%22came%22%5D+%5B%22and%22%5D+%5B%22ripped%22%5D+%5B%22up%22%5D+%5B%22the%22%5D+%5B%22clothes%22%5D+%5B%5D&waitfortotal=true&outputformat=json&prettyprint=no&wordsaroundhit=0
{'left': '', 'match_word': 'The dog came and ripped up the clothes . ', 'right': '', 'complete_match': 'The dog came and ripped up the clothes . ', 'testimony_id': 'HVT-70', 'shelfmark': ['Fortunoff Archive HVT-70'], 'token_start': 7269, 'token_end': 7278}


In [32]:
add_testimonial_fragments(fragments)

### 3.  

In [33]:
lemmas = ["dog","rip"]

In [34]:
query = create_contextual_query(lemmas,context_length=10)
print (query)

([lemma="dog"][]{0,10}[lemma="rip"])|([lemma="rip"][]{0,10}[lemma="dog"])


In [35]:
domain_term = "dog"

In [36]:
fragments = {}
fragments['main_node'] = main_node
fragments['mid_node'] = domain_term
fragments['fragments'] = []

In [37]:
fragment_1 = {}
fragment_1['original_sentence'] = "And everybody was very scared. The dog came and ripped up the clothes."
fragment_1['label']="And everybody was very scared. The dog came and ripped up the clothes."
indices = find_sentence_id(fragment_1['original_sentence'])
fragment_1['start_sentence_index']=indices[0]
fragment_1['end_sentence_index']=indices[1]
fragment_1['media_offset'] = 0
fragment_1['media_index'] = 0
fragment_1['testimony_id'] = indices[2]
fragments['fragments'].append(fragment_1)

http://localhost:8080/blacklab-server-2.1.0/lts/hits?patt=%5B%22And%22%5D+%5B%22everybody%22%5D+%5B%22was%22%5D+%5B%22very%22%5D+%5B%22scared%22%5D+%5B%5D+%5B%22The%22%5D+%5B%22dog%22%5D+%5B%22came%22%5D+%5B%22and%22%5D+%5B%22ripped%22%5D+%5B%22up%22%5D+%5B%22the%22%5D+%5B%22clothes%22%5D+%5B%5D&waitfortotal=true&outputformat=json&prettyprint=no&wordsaroundhit=0
{'left': '', 'match_word': 'And everybody was very scared . The dog came and ripped up the clothes . ', 'right': '', 'complete_match': 'And everybody was very scared . The dog came and ripped up the clothes . ', 'testimony_id': 'HVT-70', 'shelfmark': ['Fortunoff Archive HVT-70'], 'token_start': 7263, 'token_end': 7278}


In [38]:
fragment_2 = {}
fragment_2['original_sentence'] = "I don’t -- don’t want to remember the face of th-the people who, on our appelleplatz, the man who sends the dog to rip me apart."
fragment_2['label']="don’t want to remember the face of the people who, on our appelleplatz, the man who sends the dog to rip me apart."
indices = find_sentence_id(fragment_2['original_sentence'])
fragment_2['start_sentence_index']=indices[0]
fragment_2['end_sentence_index']=indices[1]
fragment_2['media_offset'] = 0
fragment_2['media_index'] = 0
fragment_2['testimony_id'] = indices[2]
fragments['fragments'].append(fragment_2)


http://localhost:8080/blacklab-server-2.1.0/lts/hits?patt=%5B%22I%22%5D+%5B%22do%22%5D+%5B%5D+%5B%5D%7B0%2C3%7D+%5B%22do%22%5D+%5B%5D+%5B%22want%22%5D+%5B%22to%22%5D+%5B%22remember%22%5D+%5B%22the%22%5D+%5B%22face%22%5D+%5B%22of%22%5D+%5B%5D%7B0%2C3%7D+%5B%22people%22%5D+%5B%22who%22%5D+%5B%5D+%5B%22on%22%5D+%5B%22our%22%5D+%5B%22appelleplatz%22%5D+%5B%5D+%5B%22the%22%5D+%5B%22man%22%5D+%5B%22who%22%5D+%5B%22sends%22%5D+%5B%22the%22%5D+%5B%22dog%22%5D+%5B%22to%22%5D+%5B%22rip%22%5D+%5B%22me%22%5D+%5B%22apart%22%5D+%5B%5D&waitfortotal=true&outputformat=json&prettyprint=no&wordsaroundhit=0
{'left': '', 'match_word': 'I do n’t -- do n’t want to remember the face of th-the people who , on our appelleplatz , the man who sends the dog to rip me apart . ', 'right': '', 'complete_match': 'I do n’t -- do n’t want to remember the face of th-the people who , on our appelleplatz , the man who sends the dog to rip me apart . ', 'testimony_id': 'irn506730', 'shelfmark': ['USHMM RG-50.549.02*0057'], 

In [39]:
fragment_3 = {}
fragment_3['original_sentence'] = "they were thrown and ripped apart by the dogs. The dogs ripped them apart."
fragment_3['label']="(..) they were thrown and ripped apart by the dogs. The dogs ripped them apart."
indices = find_sentence_id(fragment_3['original_sentence'])
fragment_3['start_sentence_index']=indices[0]
fragment_3['end_sentence_index']=indices[1]
fragment_3['media_offset'] = 0
fragment_3['media_index'] = 0
fragment_3['testimony_id'] = indices[2]
fragments['fragments'].append(fragment_3)

http://localhost:8080/blacklab-server-2.1.0/lts/hits?patt=%5B%22they%22%5D+%5B%22were%22%5D+%5B%22thrown%22%5D+%5B%22and%22%5D+%5B%22ripped%22%5D+%5B%22apart%22%5D+%5B%22by%22%5D+%5B%22the%22%5D+%5B%22dogs%22%5D+%5B%5D+%5B%22The%22%5D+%5B%22dogs%22%5D+%5B%22ripped%22%5D+%5B%22them%22%5D+%5B%22apart%22%5D+%5B%5D&waitfortotal=true&outputformat=json&prettyprint=no&wordsaroundhit=0
{'left': '', 'match_word': 'they were thrown and ripped apart by the dogs . The dogs ripped them apart . ', 'right': '', 'complete_match': 'they were thrown and ripped apart by the dogs . The dogs ripped them apart . ', 'testimony_id': 'usc_shoah_1387', 'shelfmark': ['USC Shoah Foundation 1387'], 'token_start': 6147, 'token_end': 6163}


In [40]:
fragment_4 = {}
fragment_4['original_sentence'] = "One word that Gestapo said to that dog-- he went over to her, he ripped her stomach open and ripped out the baby"
fragment_4['label']= "(..) Gestapo said to that dog-- he went over to her, he ripped her stomach open and ripped out the baby (..)."
indices = find_sentence_id(fragment_4['original_sentence'])
fragment_4['start_sentence_index']=indices[0]
fragment_4['end_sentence_index']=indices[1]
fragment_4['media_offset'] = 0
fragment_4['media_index'] = 0
fragment_4['testimony_id'] = indices[2]
fragments['fragments'].append(fragment_4)


http://localhost:8080/blacklab-server-2.1.0/lts/hits?patt=%5B%22One%22%5D+%5B%22word%22%5D+%5B%22that%22%5D+%5B%22Gestapo%22%5D+%5B%22said%22%5D+%5B%22to%22%5D+%5B%22that%22%5D+%5B%22dog%22%5D+%5B%5D%7B0%2C3%7D+%5B%22he%22%5D+%5B%22went%22%5D+%5B%22over%22%5D+%5B%22to%22%5D+%5B%22her%22%5D+%5B%5D+%5B%22he%22%5D+%5B%22ripped%22%5D+%5B%22her%22%5D+%5B%22stomach%22%5D+%5B%22open%22%5D+%5B%22and%22%5D+%5B%22ripped%22%5D+%5B%22out%22%5D+%5B%22the%22%5D+%5B%22baby%22%5D&waitfortotal=true&outputformat=json&prettyprint=no&wordsaroundhit=0
{'left': '', 'match_word': 'One word that Gestapo said to that dog -- he went over to her , he ripped her stomach open and ripped out the baby ', 'right': '', 'complete_match': 'One word that Gestapo said to that dog -- he went over to her , he ripped her stomach open and ripped out the baby ', 'testimony_id': 'usc_shoah_4345', 'shelfmark': ['USC Shoah Foundation 4345'], 'token_start': 9857, 'token_end': 9882}


In [41]:
fragment_5 = {}
fragment_5['original_sentence'] = "And they sic the dogs on them. And the dogs rip them apart, limb by limb."
fragment_5['label']= "And they sic the dogs on them. And the dogs rip them apart, limb by limb."
indices = find_sentence_id(fragment_5['original_sentence'])
fragment_5['start_sentence_index']=indices[0]
fragment_5['end_sentence_index']=indices[1]
fragment_5['media_offset'] = 0
fragment_5['media_index'] = 0
fragment_5['testimony_id'] = indices[2]
fragments['fragments'].append(fragment_5)

http://localhost:8080/blacklab-server-2.1.0/lts/hits?patt=%5B%22And%22%5D+%5B%22they%22%5D+%5B%22sic%22%5D+%5B%22the%22%5D+%5B%22dogs%22%5D+%5B%22on%22%5D+%5B%22them%22%5D+%5B%5D+%5B%22And%22%5D+%5B%22the%22%5D+%5B%22dogs%22%5D+%5B%22rip%22%5D+%5B%22them%22%5D+%5B%22apart%22%5D+%5B%5D+%5B%22limb%22%5D+%5B%22by%22%5D+%5B%22limb%22%5D+%5B%5D&waitfortotal=true&outputformat=json&prettyprint=no&wordsaroundhit=0
{'left': '', 'match_word': 'And they sic the dogs on them . And the dogs rip them apart , limb by limb . ', 'right': '', 'complete_match': 'And they sic the dogs on them . And the dogs rip them apart , limb by limb . ', 'testimony_id': 'usc_shoah_8002', 'shelfmark': ['USC Shoah Foundation 8002'], 'token_start': 13015, 'token_end': 13034}


In [42]:
add_testimonial_fragments(fragments)