Author: Omar El Malki (omar.elmalki@epfl.ch)

### ConceptNet Node extraction from first 10 ROCStories

In [172]:
import pandas as pd
import os
from tqdm import tqdm
import ast

tqdm.pandas()

pd.set_option('display.max_colwidth', None)
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
pd.options.mode.chained_assignment = None

In [173]:
os.chdir('../../src/')

In [174]:
# Read ROCStories into pandas DataFrame
roc_stories_path_csv = "../generated/semantic-role-labeling/ROCStories_resolved_with_knowledge_triples_first10.csv"
roc_stories_df = pd.read_csv(roc_stories_path_csv, sep='\t', header=0)

#### Turn triples into strings before applying CoCo-Ex

In [175]:
def triples_to_string(triples):
    """
    Transform list of triples into a single string, omitting the relation
    :param triples: List[List[str]]
    :return: str
    """
    s = ""
    l = ast.literal_eval(triples)
    for triple in l:
        s += f'{triple[0]}, {triple[2]}, '
    return s

In [176]:
roc_stories_triples_df = roc_stories_df[['storyid', 'srl_r1', 'srl_r2', 'srl_r3', 'srl_r4', 'srl_r5']]

In [177]:
roc_stories_triples_df

Unnamed: 0,storyid,srl_r1,srl_r2,srl_r3,srl_r4,srl_r5
0,8bbe6d11-1e2e-413c-bf81-eaea05f4f1bd,"[['David', 'noticed', 'David had put on a lot of weight recently'], ['David', 'put', 'a lot of weight'], ['David', 'put', 'recently']]","[['David', 'examined', 'David habits'], ['David', 'examined', 'to try and figure out the reason'], ['David', 'figure', 'the reason']]","[['David', 'realized', ""David'd been eating too much fast food lately""], [""David'd"", 'eating', 'too much fast food'], [""David'd"", 'eating', 'lately']]","[['David', 'stopped', 'going to burger places'], ['David', 'going', 'to burger places'], ['David', 'started', 'a vegetarian diet']]","[['to feel much better', 'started', 'After a few weeks'], ['David', 'feel', 'much better']]"
1,0beabab2-fb49-460e-a6e6-f35a202e3348,"[['Tom', 'had', 'a very short temper']]","[['a guest', 'made', 'One day'], ['a guest', 'made', 'Tom very angry']]","[['Tom', 'punched', 'a hole in the wall of Tom house']]","[[""Tom 's guest"", 'became', 'afraid'], [""Tom 's guest"", 'left', 'quickly']]","[['Tom', 'sat', 'on Tom couch'], ['Tom', 'sat', 'filled with regret about Tom actions'], ['Tom', 'filled', 'with regret about Tom actions']]"
2,87da1a22-df0b-410c-b186-439700b70ba6,"[['Marcus', 'needed', 'clothing'], ['Marcus', 'needed', 'for a business casual event']]","[['All of Marcus clothes', 'were', 'either too formal or too casual']]","[['Marcus', 'decided', 'to buy a pair of khakis'], ['Marcus', 'buy', 'a pair of khakis']]",[],"[['Marcus', 'was', 'happy to have the right clothes for the event'], ['Marcus', 'have', 'the right clothes for the event']]"
3,2d16bcd6-692a-4fc0-8e7c-4a6f81d9efa9,"[['Bobby', 'thought', 'Bill should buy a trailer and haul a trailer with Bill car'], ['Bill', 'should', 'buy a trailer'], ['Bill', 'buy', 'should'], ['Bill', 'buy', 'a trailer'], ['Bill', 'haul', 'should'], ['Bill', 'haul', 'a trailer'], ['Bill', 'haul', 'with Bill car']]","[['Bill', 'thought', 'a truck would be better for what Bill needed'], ['a truck', 'be', 'would'], ['a truck', 'be', 'better for what Bill needed'], ['Bill', 'needed', 'what']]","[['Bobby', 'pointed', 'two vehicles were much more expensive'], ['two vehicles', 'were', 'much more expensive']]","[['Bill', 'set', 'in Bill ways'], ['Bill', 'set', 'with conventional thinking']]","[['Bill', 'ended', 'buying a truck'], ['Bill', 'ended', ""despite Bobby 's advice""], ['Bill', 'buying', 'a truck']]"
4,c71bb23b-7731-4233-8298-76ba6886cee1,"[['John', 'was', 'a pastor with a very bad memory']]","[['John', 'tried', 'to memorize John sermons many days in advance'], ['John', 'tried', 'but to no avail'], ['John', 'memorize', 'John sermons'], ['John', 'memorize', 'many days in advance']]","[['John', 'decided', 'to learn to sing to overcome John handicap'], ['John', 'learn', 'to sing'], ['John', 'learn', 'to overcome John handicap'], ['John', 'overcome', 'John handicap']]","[['John', 'made', 'then'], ['John', 'made', 'all John sermons'], ['John', 'made', 'into music'], ['John', 'sang', 'then'], ['John', 'sang', 'all his sermons'], ['John', 'sang', 'on Sundays']]","[['John congregation', 'was', 'delighted'], ['John', 'was', 'so']]"
5,4d7b022e-25d2-4300-a9b0-24ab35f4045b,"[[""Melody 's parents"", 'surprised', 'Melody'], [""Melody 's parents"", 'surprised', 'with a trip to the big aquarium']]","[['Melody', 'took', 'a nap'], ['Melody', 'took', 'during the two hour car ride to the big aquarium']]","[[""Melody 's parents"", 'arrived', 'When'], ['Melody', 'was', ""When Melody 's parents arrived""], ['Melody', 'was', 'energetic and excited']]","[['Melody', 'saw', 'At Melody'], ['Melody', 'saw', 'sharks , tropical fish and many others']]","[['Melody and Melody family', 'drove', 'After five hours at the big aquarium'], ['Melody and Melody family', 'drove', 'home']]"
6,8036c905-f23e-4976-83a1-85d679b5e0c2,"[['The math teacher', 'announced', 'a pop quiz'], ['The math teacher', 'announced', 'as class began']]","[['The math teacher', 'began', 'While some students complained'], ['The math teacher', 'began', 'passing out a pop quiz'], ['The math teacher', 'passing', 'a pop quiz']]","[['I', 'took', 'my pencil'], ['I', 'began', 'to work']]","[['I', 'finished', 'About 5 minutes later']]","[['I', 'stood', 'feeling confident'], ['I', 'feeling', 'confident'], ['I', 'turned', 'my pencil'], ['I', 'turned', 'in']]"
7,77338898-07d4-4143-8451-284540c8b082,"[['i', 'met', 'My first girlfriend'], ['i', 'met', 'on the internet']]","[['My first girlfriend', 'lives', 'about 4 hours away from me']]","[['we', 'met', 'Finally'], ['we', 'met', 'after 2 years'], ['we', 'met', 'each other']]","[['My first girlfriend', 'stayed', 'with me'], ['My first girlfriend', 'stayed', 'for a week or two']]","[['we', 'decided', ""we could n't be apart so My first girlfriend moved in with me""], ['we', 'be', 'could'], ['we', 'be', ""n't""], ['we', 'be', 'apart'], ['My first girlfriend', 'moved', 'in'], ['My first girlfriend', 'moved', 'with me']]"
8,110fafd1-2bb7-4ffe-aac7-475706165d41,"[['I', 'got', 'Charlie Horse'], ['I', 'got', 'when I was four years old'], ['I', 'was', 'when'], ['I', 'was', 'four years old']]","[['Charlie Horse', ""'s"", 'a brown stuffed horse'], ['I', 'sleep', 'at 35'], ['I', 'sleep', 'still'], ['I', 'sleep', 'with Charlie Horse'], ['I', 'sleep', 'at night']]","[['Charlie Horse', 'was', 'my best friend'], ['Charlie Horse', 'laid', 'always'], ['Charlie Horse', 'laid', 'at the head of my bed']]","[['I', 'laid', 'Charlie Horse'], ['I', 'laid', 'next to me'], ['I', 'laid', 'smelling Charlie Horse soft fur every night'], ['I', 'smelling', 'Charlie Horse'], ['I', 'smelling', 'soft fur'], ['I', 'smelling', 'every night']]","[['I', 'liked', 'to listen to my radio as I fell asleep cuddling Charlie Horse'], ['I', 'listen', 'to my radio'], ['I', 'listen', 'as I fell asleep cuddling Charlie Horse'], ['I', 'fell', 'asleep'], ['I', 'fell', 'cuddling Charlie Horse'], ['I', 'cuddling', 'Charlie Horse']]"
9,13573c2e-5eed-40eb-bbe5-ed259b5c76a6,"[['Laura', 'loved', 'corn']]","[['Laura', 'decided', 'So'], ['Laura', 'decided', 'to grow some in Laura backyard'], ['Laura', 'grow', 'some'], ['Laura', 'grow', 'in Laura backyard']]","[['The whole process of growing them', 'made', 'Laura very excited']]","[['Laura', 'realized', 'But'], ['Laura', 'realized', 'that them required too much water'], ['them', 'required', 'too much water']]","[['Laura', 'abandoned', 'So'], ['Laura', 'abandoned', 'quickly'], ['Laura', 'abandoned', 'Laura corn garden idea']]"


In [178]:
for n in range(1, 6):
    roc_stories_triples_df[f'triple_str_t{n}'] = roc_stories_triples_df[f'srl_r{n}'] \
        .progress_apply(lambda t: triples_to_string(t))

100%|██████████| 10/10 [00:00<00:00, 11366.68it/s]
100%|██████████| 10/10 [00:00<00:00, 11072.61it/s]
100%|██████████| 10/10 [00:00<00:00, 13144.17it/s]
100%|██████████| 10/10 [00:00<00:00, 12021.51it/s]
100%|██████████| 10/10 [00:00<00:00, 12314.46it/s]


In [179]:
roc_stories_string_triples_df = roc_stories_triples_df[
    ['storyid', 'triple_str_t1', 'triple_str_t2', 'triple_str_t3', 'triple_str_t4', 'triple_str_t5']]

In [180]:
roc_stories_string_triples_df.to_csv(
    '../generated/conceptnet-node-extraction/ROCStories_resolved_triples_entity_extraction_input_first10.csv', sep='\t', header=None)

In [181]:
roc_stories_string_triples_df

Unnamed: 0,storyid,triple_str_t1,triple_str_t2,triple_str_t3,triple_str_t4,triple_str_t5
0,8bbe6d11-1e2e-413c-bf81-eaea05f4f1bd,"David, David had put on a lot of weight recently, David, a lot of weight, David, recently,","David, David habits, David, to try and figure out the reason, David, the reason,","David, David'd been eating too much fast food lately, David'd, too much fast food, David'd, lately,","David, going to burger places, David, to burger places, David, a vegetarian diet,","to feel much better, After a few weeks, David, much better,"
1,0beabab2-fb49-460e-a6e6-f35a202e3348,"Tom, a very short temper,","a guest, One day, a guest, Tom very angry,","Tom, a hole in the wall of Tom house,","Tom 's guest, afraid, Tom 's guest, quickly,","Tom, on Tom couch, Tom, filled with regret about Tom actions, Tom, with regret about Tom actions,"
2,87da1a22-df0b-410c-b186-439700b70ba6,"Marcus, clothing, Marcus, for a business casual event,","All of Marcus clothes, either too formal or too casual,","Marcus, to buy a pair of khakis, Marcus, a pair of khakis,",,"Marcus, happy to have the right clothes for the event, Marcus, the right clothes for the event,"
3,2d16bcd6-692a-4fc0-8e7c-4a6f81d9efa9,"Bobby, Bill should buy a trailer and haul a trailer with Bill car, Bill, buy a trailer, Bill, should, Bill, a trailer, Bill, should, Bill, a trailer, Bill, with Bill car,","Bill, a truck would be better for what Bill needed, a truck, would, a truck, better for what Bill needed, Bill, what,","Bobby, two vehicles were much more expensive, two vehicles, much more expensive,","Bill, in Bill ways, Bill, with conventional thinking,","Bill, buying a truck, Bill, despite Bobby 's advice, Bill, a truck,"
4,c71bb23b-7731-4233-8298-76ba6886cee1,"John, a pastor with a very bad memory,","John, to memorize John sermons many days in advance, John, but to no avail, John, John sermons, John, many days in advance,","John, to learn to sing to overcome John handicap, John, to sing, John, to overcome John handicap, John, John handicap,","John, then, John, all John sermons, John, into music, John, then, John, all his sermons, John, on Sundays,","John congregation, delighted, John, so,"
5,4d7b022e-25d2-4300-a9b0-24ab35f4045b,"Melody 's parents, Melody, Melody 's parents, with a trip to the big aquarium,","Melody, a nap, Melody, during the two hour car ride to the big aquarium,","Melody 's parents, When, Melody, When Melody 's parents arrived, Melody, energetic and excited,","Melody, At Melody, Melody, sharks , tropical fish and many others,","Melody and Melody family, After five hours at the big aquarium, Melody and Melody family, home,"
6,8036c905-f23e-4976-83a1-85d679b5e0c2,"The math teacher, a pop quiz, The math teacher, as class began,","The math teacher, While some students complained, The math teacher, passing out a pop quiz, The math teacher, a pop quiz,","I, my pencil, I, to work,","I, About 5 minutes later,","I, feeling confident, I, confident, I, my pencil, I, in,"
7,77338898-07d4-4143-8451-284540c8b082,"i, My first girlfriend, i, on the internet,","My first girlfriend, about 4 hours away from me,","we, Finally, we, after 2 years, we, each other,","My first girlfriend, with me, My first girlfriend, for a week or two,","we, we could n't be apart so My first girlfriend moved in with me, we, could, we, n't, we, apart, My first girlfriend, in, My first girlfriend, with me,"
8,110fafd1-2bb7-4ffe-aac7-475706165d41,"I, Charlie Horse, I, when I was four years old, I, when, I, four years old,","Charlie Horse, a brown stuffed horse, I, at 35, I, still, I, with Charlie Horse, I, at night,","Charlie Horse, my best friend, Charlie Horse, always, Charlie Horse, at the head of my bed,","I, Charlie Horse, I, next to me, I, smelling Charlie Horse soft fur every night, I, Charlie Horse, I, soft fur, I, every night,","I, to listen to my radio as I fell asleep cuddling Charlie Horse, I, to my radio, I, as I fell asleep cuddling Charlie Horse, I, asleep, I, cuddling Charlie Horse, I, Charlie Horse,"
9,13573c2e-5eed-40eb-bbe5-ed259b5c76a6,"Laura, corn,","Laura, So, Laura, to grow some in Laura backyard, Laura, some, Laura, in Laura backyard,","The whole process of growing them, Laura very excited,","Laura, But, Laura, that them required too much water, them, too much water,","Laura, So, Laura, quickly, Laura, Laura corn garden idea,"


In [182]:
os.chdir('../lib/CoCo-Ex/')

In [183]:
entity_extraction_script_name = 'CoCo-Ex_entity_extraction.py'
input_csv = "../../generated/conceptnet-node-extraction/ROCStories_resolved_triples_entity_extraction_input_first10.csv"
output_tsv = "../../generated/conceptnet-node-extraction/ROCStories_resolved_triples_entity_extraction_output_first10.tsv"
os.system(f'python3 {entity_extraction_script_name} {input_csv} {output_tsv}')

0

In [184]:
output_filtered_tsv = "../../generated/conceptnet-node-extraction/ROCStories_resolved_triples_conceptnet_nodes_filtered_first10.tsv"
overhead_filter_script_name = 'CoCo-Ex_overhead_filter.py'
len_diff_tokenlevel = 1
len_diff_charlevel = 10
dice_coefficient = 0.8

In [185]:
os.system(f'python3 {overhead_filter_script_name} '
          f'--inputfile {output_tsv} '
          f'--outputfile {output_filtered_tsv} '
          f'--len_diff_tokenlevel {len_diff_tokenlevel} '
          f'--len_diff_charlevel {len_diff_charlevel} '
          f'--dice_coefficient {dice_coefficient}')

0

In [186]:
roc_stories_extracted_nodes_df = pd.read_csv(output_filtered_tsv, sep='\t', header=None,
                                             names=['storyid', 'sentence_index', 'sentence', 'nodes'])

In [187]:
roc_stories_extracted_nodes_df

Unnamed: 0,storyid,sentence_index,sentence,nodes
0,text:0,sent:1,"david, david had put on a lot of weight recently, david, a lot of weight, david, recently,",[weight][david][lot]
1,text:0,sent:2,"david, david habits, david, to try and figure out the reason, david, the reason,",[reason][habit][david]
2,text:0,sent:3,"david, david'd been eating too much fast food lately, david'd, too much fast food, david'd, lately,",[food][david]
3,text:0,sent:4,"david, going to burger places, david, to burger places, david, a vegetarian diet,",[place][david][vegetarian][burger][diet][vegetarian diet]
4,text:0,sent:5,"to feel much better, after a few weeks, david, much better,",[week]
5,text:1,sent:1,"tom, a very short temper,",[temper][tom]
6,text:1,sent:2,"a guest, one day, a guest, tom very angry,",[day one][one day][day][guest]
7,text:1,sent:3,"tom, a hole in the wall of tom house,",[wall][house][hole][tom]
8,text:1,sent:4,"tom 's guest, afraid, tom 's guest, quickly,",[tom][guest]
9,text:1,sent:5,"tom, on tom couch, tom, filled with regret about tom actions, tom, with regret about tom actions,",[regret][tom][couch][action]


#### Apply CoCo-Ex directly on resolved sentences 

In [188]:
os.chdir('../../src/')

In [189]:
roc_stories_resolved_sentences_df = roc_stories_df[['storyid', 'resolved1', 'resolved2', 'resolved3', 'resolved4', 'resolved5']]

In [190]:
roc_stories_resolved_sentences_df.to_csv(
    '../generated/conceptnet-node-extraction/ROCStories_resolved_sentences_entity_extraction_input_first10.csv', sep='\t', header=None)

In [191]:
roc_stories_resolved_sentences_df

Unnamed: 0,storyid,resolved1,resolved2,resolved3,resolved4,resolved5
0,8bbe6d11-1e2e-413c-bf81-eaea05f4f1bd,David noticed David had put on a lot of weight recently.,David examined David habits to try and figure out the reason.,David realized David'd been eating too much fast food lately.,David stopped going to burger places and started a vegetarian diet.,"After a few weeks, David started to feel much better."
1,0beabab2-fb49-460e-a6e6-f35a202e3348,Tom had a very short temper.,One day a guest made Tom very angry.,Tom punched a hole in the wall of Tom house.,Tom's guest became afraid and left quickly.,Tom sat on Tom couch filled with regret about Tom actions.
2,87da1a22-df0b-410c-b186-439700b70ba6,Marcus needed clothing for a business casual event.,All of Marcus clothes were either too formal or too casual.,Marcus decided to buy a pair of khakis.,a pair of khakis fit Marcus perfectly.,Marcus was happy to have the right clothes for the event.
3,2d16bcd6-692a-4fc0-8e7c-4a6f81d9efa9,Bobby thought Bill should buy a trailer and haul a trailer with Bill car.,Bill thought a truck would be better for what Bill needed.,Bobby pointed out two vehicles were much more expensive.,Bill was set in Bill ways with conventional thinking.,Bill ended up buying a truck despite Bobby's advice.
4,c71bb23b-7731-4233-8298-76ba6886cee1,John was a pastor with a very bad memory.,John tried to memorize John sermons many days in advance but to no avail.,John decided to learn to sing to overcome John handicap.,John then made all John sermons into music and sang all his sermons on Sundays.,John congregation was delighted and so was John.
5,4d7b022e-25d2-4300-a9b0-24ab35f4045b,Melody's parents surprised Melody with a trip to the big aquarium.,Melody took a nap during the two hour car ride to the big aquarium.,"When Melody's parents arrived, Melody was energetic and excited.","At Melody Melody saw sharks, tropical fish and many others.","After five hours at the big aquarium, Melody and Melody family drove home."
6,8036c905-f23e-4976-83a1-85d679b5e0c2,The math teacher announced a pop quiz as class began.,"While some students complained, The math teacher began passing out a pop quiz.",I took out my pencil and began to work.,"About 5 minutes later, I finished.",I stood up feeling confident and turned my pencil in.
7,77338898-07d4-4143-8451-284540c8b082,My first girlfriend i met on the internet.,My first girlfriend lives about 4 hours away from me.,Finally after 2 years we met each other.,My first girlfriend stayed with me for a week or two.,we decided we couldn't be apart so My first girlfriend moved in with me.
8,110fafd1-2bb7-4ffe-aac7-475706165d41,I got Charlie Horse when I was four years old.,"Charlie Horse's a brown stuffed horse, and at 35 I still sleep with Charlie Horse at night.","Charlie Horse was my best friend, and always laid at the head of my bed.","I laid Charlie Horse next to me, smelling Charlie Horse soft fur every night.",I liked to listen to my radio as I fell asleep cuddling Charlie Horse.
9,13573c2e-5eed-40eb-bbe5-ed259b5c76a6,Laura loved corn.,So Laura decided to grow some in Laura backyard.,The whole process of growing them made Laura very excited.,But Laura realized that them required too much water.,So Laura quickly abandoned Laura corn garden idea.


In [192]:
os.chdir('../lib/CoCo-Ex/')

In [193]:
entity_extraction_script_name = 'CoCo-Ex_entity_extraction.py'
input_csv = "../../generated/conceptnet-node-extraction/ROCStories_resolved_sentences_entity_extraction_input_first10.csv"
output_tsv = "../../generated/conceptnet-node-extraction/ROCStories_resolved_sentences_entity_extraction_output_first10.tsv"
os.system(f'python3 {entity_extraction_script_name} {input_csv} {output_tsv}')

0

In [194]:
output_filtered_tsv = "../../generated/conceptnet-node-extraction/ROCStories_resolved_sentences_conceptnet_nodes_filtered_first10.tsv"
overhead_filter_script_name = 'CoCo-Ex_overhead_filter.py'
len_diff_tokenlevel = 1
len_diff_charlevel = 10
dice_coefficient = 0.8

In [195]:
os.system(f'python3 {overhead_filter_script_name} '
          f'--inputfile {output_tsv} '
          f'--outputfile {output_filtered_tsv} '
          f'--len_diff_tokenlevel {len_diff_tokenlevel} '
          f'--len_diff_charlevel {len_diff_charlevel} '
          f'--dice_coefficient {dice_coefficient}')

0

In [196]:
roc_stories_extracted_nodes_df = pd.read_csv(output_filtered_tsv, sep='\t', header=None,
                                             names=['storyid', 'sentence_index', 'sentence', 'nodes'])

In [197]:
roc_stories_extracted_nodes_df

Unnamed: 0,storyid,sentence_index,sentence,nodes
0,text:0,sent:1,david noticed david had put on a lot of weight recently.,[lot][weight]
1,text:0,sent:2,david examined david habits to try and figure out the reason.,[figure][habit][reason][david]
2,text:0,sent:3,david realized david'd been eating too much fast food lately.,[food][david]
3,text:0,sent:4,david stopped going to burger places and started a vegetarian diet.,[diet][vegetarian][david][place][burger][vegetarian diet]
4,text:0,sent:5,"after a few weeks, david started to feel much better.",[week][david]
5,text:1,sent:1,tom had a very short temper.,[temper][tom]
6,text:1,sent:2,one day a guest made tom very angry.,[day one][one day][guest][day][tom]
7,text:1,sent:3,tom punched a hole in the wall of tom house.,[house][wall][hole][tom]
8,text:1,sent:4,tom's guest became afraid and left quickly.,[guest][tom]
9,text:1,sent:5,tom sat on tom couch filled with regret about tom actions.,[action][couch][regret][tom]


#### Only keep entities

In [200]:
os.chdir('../../src/')

FileNotFoundError: [Errno 2] No such file or directory: '../../src/'

In [201]:
# Read ROCStories into pandas DataFrame
roc_stories_path_csv = '../generated/semantic-role-labeling/ROCStories_resolved_with_knowledge_triples_entities_only_first10.csv'
roc_stories_df = pd.read_csv(roc_stories_path_csv, sep='\t', header=0)

In [202]:
def triples_to_string(triples):
    """
    Transform list of triples into a single string, omitting the relation
    :param triples: List[List[str]]
    :return: str
    """
    s = ""
    l = ast.literal_eval(triples)
    for triple in l:
        s += f'{triple[0]}, {triple[2]}, '
    return s

In [203]:
roc_stories_triples_df = roc_stories_df[['storyid', 'srl_er1', 'srl_er2', 'srl_er3', 'srl_er4', 'srl_er5']]

In [204]:
roc_stories_triples_df

Unnamed: 0,storyid,srl_er1,srl_er2,srl_er3,srl_er4,srl_er5
0,8bbe6d11-1e2e-413c-bf81-eaea05f4f1bd,"[['David', 'noticed', 'David had put on a lot of weight recently'], ['David', 'put', 'a lot of weight']]","[['David', 'examined', 'David habits'], ['David', 'figure', 'the reason']]","[['David', 'realized', ""David'd been eating too much fast food lately""], [""David'd"", 'eating', 'too much fast food']]","[['David', 'stopped', 'going to burger places'], ['David', 'going', 'to burger places'], ['David', 'started', 'a vegetarian diet']]","[['David', 'feel', 'much better']]"
1,0beabab2-fb49-460e-a6e6-f35a202e3348,"[['Tom', 'had', 'a very short temper']]","[['a guest', 'made', 'Tom very angry']]","[['Tom', 'punched', 'a hole in the wall of Tom house']]","[[""Tom 's guest"", 'became', 'afraid']]","[['Tom', 'sat', 'on Tom couch'], ['Tom', 'filled', 'with regret about Tom actions']]"
2,87da1a22-df0b-410c-b186-439700b70ba6,"[['Marcus', 'needed', 'clothing']]","[['All of Marcus clothes', 'were', 'either too formal or too casual']]","[['Marcus', 'decided', 'to buy a pair of khakis'], ['Marcus', 'buy', 'a pair of khakis']]",[],"[['Marcus', 'was', 'happy to have the right clothes for the event'], ['Marcus', 'have', 'the right clothes for the event']]"
3,2d16bcd6-692a-4fc0-8e7c-4a6f81d9efa9,"[['Bobby', 'thought', 'Bill should buy a trailer and haul a trailer with Bill car'], ['Bill', 'should', 'buy a trailer'], ['Bill', 'buy', 'a trailer'], ['Bill', 'haul', 'a trailer']]","[['Bill', 'thought', 'a truck would be better for what Bill needed'], ['a truck', 'be', 'better for what Bill needed'], ['Bill', 'needed', 'what']]","[['Bobby', 'pointed', 'two vehicles were much more expensive'], ['two vehicles', 'were', 'much more expensive']]","[['Bill', 'set', 'in Bill ways']]","[['Bill', 'ended', 'buying a truck'], ['Bill', 'buying', 'a truck']]"
4,c71bb23b-7731-4233-8298-76ba6886cee1,"[['John', 'was', 'a pastor with a very bad memory']]","[['John', 'tried', 'to memorize John sermons many days in advance'], ['John', 'memorize', 'John sermons']]","[['John', 'decided', 'to learn to sing to overcome John handicap'], ['John', 'learn', 'to sing'], ['John', 'overcome', 'John handicap']]","[['John', 'made', 'all John sermons'], ['John', 'made', 'into music'], ['John', 'sang', 'all his sermons']]","[['John congregation', 'was', 'delighted'], ['John', 'was', 'so']]"
5,4d7b022e-25d2-4300-a9b0-24ab35f4045b,"[[""Melody 's parents"", 'surprised', 'Melody'], [""Melody 's parents"", 'surprised', 'with a trip to the big aquarium']]","[['Melody', 'took', 'a nap']]","[['Melody', 'was', 'energetic and excited']]","[['Melody', 'saw', 'sharks , tropical fish and many others']]","[['Melody and Melody family', 'drove', 'home']]"
6,8036c905-f23e-4976-83a1-85d679b5e0c2,"[['The math teacher', 'announced', 'a pop quiz']]","[['The math teacher', 'began', 'passing out a pop quiz'], ['The math teacher', 'passing', 'a pop quiz']]","[['I', 'took', 'my pencil'], ['I', 'began', 'to work']]",[],"[['I', 'feeling', 'confident'], ['I', 'turned', 'my pencil']]"
7,77338898-07d4-4143-8451-284540c8b082,"[['i', 'met', 'My first girlfriend']]",[],"[['we', 'met', 'each other']]","[['My first girlfriend', 'stayed', 'with me']]","[['we', 'decided', ""we could n't be apart so My first girlfriend moved in with me""], ['we', 'be', 'apart']]"
8,110fafd1-2bb7-4ffe-aac7-475706165d41,"[['I', 'got', 'Charlie Horse'], ['I', 'was', 'four years old']]","[['Charlie Horse', ""'s"", 'a brown stuffed horse']]","[['Charlie Horse', 'was', 'my best friend'], ['Charlie Horse', 'laid', 'at the head of my bed']]","[['I', 'laid', 'Charlie Horse'], ['I', 'laid', 'next to me'], ['I', 'smelling', 'Charlie Horse'], ['I', 'smelling', 'soft fur']]","[['I', 'liked', 'to listen to my radio as I fell asleep cuddling Charlie Horse'], ['I', 'listen', 'to my radio'], ['I', 'fell', 'asleep'], ['I', 'cuddling', 'Charlie Horse']]"
9,13573c2e-5eed-40eb-bbe5-ed259b5c76a6,"[['Laura', 'loved', 'corn']]","[['Laura', 'decided', 'to grow some in Laura backyard'], ['Laura', 'grow', 'some']]","[['The whole process of growing them', 'made', 'Laura very excited']]","[['Laura', 'realized', 'that them required too much water'], ['them', 'required', 'too much water']]","[['Laura', 'abandoned', 'Laura corn garden idea']]"


In [205]:
for n in range(1, 6):
    roc_stories_triples_df[f'triple_str_t{n}'] = roc_stories_triples_df[f'srl_er{n}'] \
        .progress_apply(lambda t: triples_to_string(t))

100%|██████████| 10/10 [00:00<00:00, 10449.19it/s]
100%|██████████| 10/10 [00:00<00:00, 13547.49it/s]
100%|██████████| 10/10 [00:00<00:00, 11844.97it/s]
100%|██████████| 10/10 [00:00<00:00, 13756.33it/s]
100%|██████████| 10/10 [00:00<00:00, 13569.41it/s]


In [206]:
roc_stories_string_triples_df = roc_stories_triples_df[
    ['storyid', 'triple_str_t1', 'triple_str_t2', 'triple_str_t3', 'triple_str_t4', 'triple_str_t5']]

In [207]:
roc_stories_string_triples_df.to_csv(
    '../generated/conceptnet-node-extraction/ROCStories_resolved_triples_entities_only_entity_extraction_input_first10.csv', sep='\t', header=None)

In [208]:
roc_stories_string_triples_df

Unnamed: 0,storyid,triple_str_t1,triple_str_t2,triple_str_t3,triple_str_t4,triple_str_t5
0,8bbe6d11-1e2e-413c-bf81-eaea05f4f1bd,"David, David had put on a lot of weight recently, David, a lot of weight,","David, David habits, David, the reason,","David, David'd been eating too much fast food lately, David'd, too much fast food,","David, going to burger places, David, to burger places, David, a vegetarian diet,","David, much better,"
1,0beabab2-fb49-460e-a6e6-f35a202e3348,"Tom, a very short temper,","a guest, Tom very angry,","Tom, a hole in the wall of Tom house,","Tom 's guest, afraid,","Tom, on Tom couch, Tom, with regret about Tom actions,"
2,87da1a22-df0b-410c-b186-439700b70ba6,"Marcus, clothing,","All of Marcus clothes, either too formal or too casual,","Marcus, to buy a pair of khakis, Marcus, a pair of khakis,",,"Marcus, happy to have the right clothes for the event, Marcus, the right clothes for the event,"
3,2d16bcd6-692a-4fc0-8e7c-4a6f81d9efa9,"Bobby, Bill should buy a trailer and haul a trailer with Bill car, Bill, buy a trailer, Bill, a trailer, Bill, a trailer,","Bill, a truck would be better for what Bill needed, a truck, better for what Bill needed, Bill, what,","Bobby, two vehicles were much more expensive, two vehicles, much more expensive,","Bill, in Bill ways,","Bill, buying a truck, Bill, a truck,"
4,c71bb23b-7731-4233-8298-76ba6886cee1,"John, a pastor with a very bad memory,","John, to memorize John sermons many days in advance, John, John sermons,","John, to learn to sing to overcome John handicap, John, to sing, John, John handicap,","John, all John sermons, John, into music, John, all his sermons,","John congregation, delighted, John, so,"
5,4d7b022e-25d2-4300-a9b0-24ab35f4045b,"Melody 's parents, Melody, Melody 's parents, with a trip to the big aquarium,","Melody, a nap,","Melody, energetic and excited,","Melody, sharks , tropical fish and many others,","Melody and Melody family, home,"
6,8036c905-f23e-4976-83a1-85d679b5e0c2,"The math teacher, a pop quiz,","The math teacher, passing out a pop quiz, The math teacher, a pop quiz,","I, my pencil, I, to work,",,"I, confident, I, my pencil,"
7,77338898-07d4-4143-8451-284540c8b082,"i, My first girlfriend,",,"we, each other,","My first girlfriend, with me,","we, we could n't be apart so My first girlfriend moved in with me, we, apart,"
8,110fafd1-2bb7-4ffe-aac7-475706165d41,"I, Charlie Horse, I, four years old,","Charlie Horse, a brown stuffed horse,","Charlie Horse, my best friend, Charlie Horse, at the head of my bed,","I, Charlie Horse, I, next to me, I, Charlie Horse, I, soft fur,","I, to listen to my radio as I fell asleep cuddling Charlie Horse, I, to my radio, I, asleep, I, Charlie Horse,"
9,13573c2e-5eed-40eb-bbe5-ed259b5c76a6,"Laura, corn,","Laura, to grow some in Laura backyard, Laura, some,","The whole process of growing them, Laura very excited,","Laura, that them required too much water, them, too much water,","Laura, Laura corn garden idea,"


In [182]:
os.chdir('../lib/CoCo-Ex/')

In [183]:
entity_extraction_script_name = 'CoCo-Ex_entity_extraction.py'
input_csv = "../../generated/conceptnet-node-extraction/ROCStories_resolved_triples_entities_only_entity_extraction_input_first10.csv"
output_tsv = "../../generated/conceptnet-node-extraction/ROCStories_resolved_triples_entities_only_entity_extraction_output_first10.tsv"
os.system(f'python3 {entity_extraction_script_name} {input_csv} {output_tsv}')

0

In [184]:
output_filtered_tsv = "../../generated/conceptnet-node-extraction/ROCStories_resolved_triples_entities_only_conceptnet_nodes_filtered_first10.tsv"
overhead_filter_script_name = 'CoCo-Ex_overhead_filter.py'
len_diff_tokenlevel = 1
len_diff_charlevel = 10
dice_coefficient = 0.8

In [185]:
os.system(f'python3 {overhead_filter_script_name} '
          f'--inputfile {output_tsv} '
          f'--outputfile {output_filtered_tsv} '
          f'--len_diff_tokenlevel {len_diff_tokenlevel} '
          f'--len_diff_charlevel {len_diff_charlevel} '
          f'--dice_coefficient {dice_coefficient}')

0

In [186]:
roc_stories_extracted_nodes_df = pd.read_csv(output_filtered_tsv, sep='\t', header=None,
                                             names=['storyid', 'sentence_index', 'sentence', 'nodes'])

In [187]:
roc_stories_extracted_nodes_df

Unnamed: 0,storyid,sentence_index,sentence,nodes
0,text:0,sent:1,"david, david had put on a lot of weight recently, david, a lot of weight, david, recently,",[weight][david][lot]
1,text:0,sent:2,"david, david habits, david, to try and figure out the reason, david, the reason,",[reason][habit][david]
2,text:0,sent:3,"david, david'd been eating too much fast food lately, david'd, too much fast food, david'd, lately,",[food][david]
3,text:0,sent:4,"david, going to burger places, david, to burger places, david, a vegetarian diet,",[place][david][vegetarian][burger][diet][vegetarian diet]
4,text:0,sent:5,"to feel much better, after a few weeks, david, much better,",[week]
5,text:1,sent:1,"tom, a very short temper,",[temper][tom]
6,text:1,sent:2,"a guest, one day, a guest, tom very angry,",[day one][one day][day][guest]
7,text:1,sent:3,"tom, a hole in the wall of tom house,",[wall][house][hole][tom]
8,text:1,sent:4,"tom 's guest, afraid, tom 's guest, quickly,",[tom][guest]
9,text:1,sent:5,"tom, on tom couch, tom, filled with regret about tom actions, tom, with regret about tom actions,",[regret][tom][couch][action]
