Constructing Features from Scratch

In [1]:
#import libraries

import os
import sys
import numpy as np
import torchtext
import pandas as pd
import re
import requests
import nltk
from nltk.corpus import wordnet as wn
nltk.download('popular')

from lib import data_utils, preprocess

#get path
src_path = os.getcwd()

datapath = src_path + '/Data/'
PLcopath = datapath + 'AnnotatedPLData/PLCoref'
PLpath = datapath + 'AnnotatedPLData/PLTexts'
CENpath = datapath + 'AnnotatedCENData/CENCoref'
ONpath = datapath + 'AnnotatedONData/ONCoref'

PLInter = src_path + '/output/IntermediateFilesPL/'

#import data

#create empty data frame
PL_data = pd.DataFrame(columns=['corpusID', 'character', 'animacy', 'coref_chain', 'chain_head', 'head_of_head', 'chain_len', 'CL'])

#append all PL texts and features into one dataframe
for n in range(1,47):
# for n in range(1,2):

    print(n)

    #get story path
    storycopath = PLcopath +'/story' + str(n) + '.txt'
    storypath = PLpath + '/story' + str(n) + '.txt'
    storyid = 'story'+ str(n)

    #read in story
    corpus = data_utils.read_story(storycopath)

    # read in from intermediate files
    # list of features
    # features = ["CN", "Dep", "NER", "SS", "Triple", "WN"]

    # for f in features:
    #     #empty list
    #     feat = []
    #     with open(PLInter + f + 'FeatureBoolean'+'/Story' + str(n) + '.txt', 'r') as doc:
    #         for line in doc:
    #             feat.append(eval(line.rstrip()))
    
    #     corpus[f] = feat

    #get ss feature
    sslist = preprocess.semantic_subj(storypath)
    #remove leading The/A's in the sematic list
    sslist = [re.sub('^(The |A )','',s, flags=re.IGNORECASE) for s in sslist]
    pattern = '|'.join(sslist)
    
    #create binary flag variable for ss feat
    corpus['SS'] = corpus['head_of_head'].str.contains(pattern)
    corpus['SS'] = corpus['SS'].replace({True:1, False:0})

    #get ner feature
    nerlist = preprocess.ner_person(storypath)
    pattern = '|'.join(nerlist)

    #create binary flag variable for ss feat
    corpus['NER'] = corpus['head_of_head'].str.contains(pattern)
    corpus['NER'] = corpus['NER'].replace({True:1, False:0})

    #create binary flag variable for wn feat
    #get wordnet synset of head of chain
    wn_input = corpus['head_of_head'].apply(lambda word: pd.Series(wn.synsets(word)))
   
    #fill blanks with unrelated word to person
    wn_input[0] = wn_input[0].fillna(wn.synset('strong.a.01'))
   
    # get common synonym with person
    per = wn.synset('person.n.01')
    test = wn_input[0].apply(lambda syn: pd.Series(syn.lowest_common_hypernyms(per)))
   
    # test if head of chain related to person
    corpus['WN']= test[0]==per
    corpus['WN'] = corpus['WN'].replace({True:1, False:0})

    #get dp feat
    dplist = preprocess.dep_link(storypath)
    dplist = list(set(dplist))
    pattern='|'.join(dplist)
    #remove punct
    pattern = pattern.replace('?|','')
    pattern = pattern.replace('!|','')

    #create binary flag variable for dp feat
    corpus['DP'] = corpus['head_of_head'].str.contains(pattern)
    corpus['DP'] = corpus['DP'].replace({True:1, False:0})

    #get triple feat
    tplist = preprocess.triple(storypath)
    tplist = list(set(tplist))
    #remove leading The/A's in the sematic list
    tplist = [re.sub('^(The |A )','',s, flags=re.IGNORECASE) for s in tplist]
    pattern='|'.join(tplist)

    #create binary flag variable for ss feat
    corpus['TP'] = corpus['head_of_head'].str.contains(pattern)
    corpus['TP'] = corpus['TP'].replace({True:1, False:0})

    # get conceptnet feat
    urlreq = 'https://api.conceptnet.io/c/en/'+corpus['head_of_head']

    #default no presence of person mentioned
    corpus['CN'] = 0

    for i in range(len(urlreq)):

        #make request to concept net api
        response = requests.get(urlreq[i])
        obj = response.json()
        #get list of edges
        cnlist = [edge['@id'] for edge in obj['edges']]

        #if person is in list then flag
        if any('person' in s for s in cnlist):
            val = 1
            corpus['CN'][i]=val

    #append to dataframe
    PL_data = pd.concat([PL_data, corpus], ignore_index=True)

PL_data

[nltk_data] Downloading collection 'popular'
[nltk_data]    | 
[nltk_data]    | Downloading package cmudict to
[nltk_data]    |     /home/eileen/nltk_data...
[nltk_data]    |   Package cmudict is already up-to-date!
[nltk_data]    | Downloading package gazetteers to
[nltk_data]    |     /home/eileen/nltk_data...
[nltk_data]    |   Package gazetteers is already up-to-date!
[nltk_data]    | Downloading package genesis to
[nltk_data]    |     /home/eileen/nltk_data...
[nltk_data]    |   Package genesis is already up-to-date!
[nltk_data]    | Downloading package gutenberg to
[nltk_data]    |     /home/eileen/nltk_data...
[nltk_data]    |   Package gutenberg is already up-to-date!
[nltk_data]    | Downloading package inaugural to
[nltk_data]    |     /home/eileen/nltk_data...
[nltk_data]    |   Package inaugural is already up-to-date!
[nltk_data]    | Downloading package movie_reviews to
[nltk_data]    |     /home/eileen/nltk_data...
[nltk_data]    |   Package movie_reviews is already up-to

1


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
  wn_input = corpus['head_of_head'].apply(lambda word: pd.Series(wn.synsets(word)))
  test = wn_input[0].apply(lambda syn: pd.Series(syn.lowest_common_hype

2


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
  test = wn_input[0].apply(lambda syn: pd.Series(syn.lowest_common_hypernyms(per)))
A value is trying to be set on a copy of a slice from a DataFrame

See 

3


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
  wn_input = corpus['head_of_head'].apply(lambda word: pd.Series(wn.synsets(word)))
  test = wn_input[0].apply(lambda syn: pd.Series(syn.lowest_common_hype

4


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
  wn_input = corpus['head_of_head'].apply(lambda word: pd.Series(wn.synsets(word)))
  test = wn_input[0].apply(lambda syn: pd.Series(syn.lowest_common_hype

5


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
  wn_input = corpus['head_of_head'].apply(lambda word: pd.Series(wn.synsets(word)))
  test = wn_input[0].apply(lambda syn: pd.Series(syn.lowest_common_hype

6


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
  wn_input = corpus['head_of_head'].apply(lambda word: pd.Series(wn.synsets(word)))
  test = wn_input[0].apply(lambda syn: pd.Series(syn.lowest_common_hype

7


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
  wn_input = corpus['head_of_head'].apply(lambda word: pd.Series(wn.synsets(word)))
  test = wn_input[0].apply(lambda syn: pd.Series(syn.lowest_common_hype

8


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
  wn_input = corpus['head_of_head'].apply(lambda word: pd.Series(wn.synsets(word)))
  test = wn_input[0].apply(lambda syn: pd.Series(syn.lowest_common_hype

9


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
  wn_input = corpus['head_of_head'].apply(lambda word: pd.Series(wn.synsets(word)))
  test = wn_input[0].apply(lambda syn: pd.Series(syn.lowest_common_hype

10


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
  wn_input = corpus['head_of_head'].apply(lambda word: pd.Series(wn.synsets(word)))
  test = wn_input[0].apply(lambda syn: pd.Series(syn.lowest_common_hype

11


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
  wn_input = corpus['head_of_head'].apply(lambda word: pd.Series(wn.synsets(word)))
  test = wn_input[0].apply(lambda syn: pd.Series(syn.lowest_common_hype

12


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
  test = wn_input[0].apply(lambda syn: pd.Series(syn.lowest_common_hypernyms(per)))
A value is trying to be set on a copy of a slice from a DataFrame

See 

13


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
  corpus['SS'] = corpus['head_of_head'].str.contains(pattern)
  wn_input = corpus['head_of_head'].apply(lambda word: pd.Series(wn.synsets(word)))
  test = 

14


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
  wn_input = corpus['head_of_head'].apply(lambda word: pd.Series(wn.synsets(word)))
  test = wn_input[0].apply(lambda syn: pd.Series(syn.lowest_common_hype

15


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
  wn_input = corpus['head_of_head'].apply(lambda word: pd.Series(wn.synsets(word)))
  test = wn_input[0].apply(lambda syn: pd.Series(syn.lowest_common_hype

16


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
  wn_input = corpus['head_of_head'].apply(lambda word: pd.Series(wn.synsets(word)))
  test = wn_input[0].apply(lambda syn: pd.Series(syn.lowest_common_hype

17


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
  wn_input = corpus['head_of_head'].apply(lambda word: pd.Series(wn.synsets(word)))
  test = wn_input[0].apply(lambda syn: pd.Series(syn.lowest_common_hype

18


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
  wn_input = corpus['head_of_head'].apply(lambda word: pd.Series(wn.synsets(word)))
  test = wn_input[0].apply(lambda syn: pd.Series(syn.lowest_common_hype

19


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
  wn_input = corpus['head_of_head'].apply(lambda word: pd.Series(wn.synsets(word)))
  test = wn_input[0].apply(lambda syn: pd.Series(syn.lowest_common_hype

20


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
  wn_input = corpus['head_of_head'].apply(lambda word: pd.Series(wn.synsets(word)))
  test = wn_input[0].apply(lambda syn: pd.Series(syn.lowest_common_hype

21


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
  wn_input = corpus['head_of_head'].apply(lambda word: pd.Series(wn.synsets(word)))
  test = wn_input[0].apply(lambda syn: pd.Series(syn.lowest_common_hype

22


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
  wn_input = corpus['head_of_head'].apply(lambda word: pd.Series(wn.synsets(word)))
  test = wn_input[0].apply(lambda syn: pd.Series(syn.lowest_common_hype

23


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
  wn_input = corpus['head_of_head'].apply(lambda word: pd.Series(wn.synsets(word)))
  test = wn_input[0].apply(lambda syn: pd.Series(syn.lowest_common_hype

24


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
  wn_input = corpus['head_of_head'].apply(lambda word: pd.Series(wn.synsets(word)))
  test = wn_input[0].apply(lambda syn: pd.Series(syn.lowest_common_hype

25


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
  wn_input = corpus['head_of_head'].apply(lambda word: pd.Series(wn.synsets(word)))
  test = wn_input[0].apply(lambda syn: pd.Series(syn.lowest_common_hype

26


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
  wn_input = corpus['head_of_head'].apply(lambda word: pd.Series(wn.synsets(word)))
  test = wn_input[0].apply(lambda syn: pd.Series(syn.lowest_common_hype

27


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
  wn_input = corpus['head_of_head'].apply(lambda word: pd.Series(wn.synsets(word)))
  test = wn_input[0].apply(lambda syn: pd.Series(syn.lowest_common_hype

28


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
  wn_input = corpus['head_of_head'].apply(lambda word: pd.Series(wn.synsets(word)))
  test = wn_input[0].apply(lambda syn: pd.Series(syn.lowest_common_hype

29


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
  wn_input = corpus['head_of_head'].apply(lambda word: pd.Series(wn.synsets(word)))
  test = wn_input[0].apply(lambda syn: pd.Series(syn.lowest_common_hype

30


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
  wn_input = corpus['head_of_head'].apply(lambda word: pd.Series(wn.synsets(word)))
  test = wn_input[0].apply(lambda syn: pd.Series(syn.lowest_common_hype

31


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
  wn_input = corpus['head_of_head'].apply(lambda word: pd.Series(wn.synsets(word)))
  test = wn_input[0].apply(lambda syn: pd.Series(syn.lowest_common_hype

32


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
  wn_input = corpus['head_of_head'].apply(lambda word: pd.Series(wn.synsets(word)))
  test = wn_input[0].apply(lambda syn: pd.Series(syn.lowest_common_hype

33


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
  wn_input = corpus['head_of_head'].apply(lambda word: pd.Series(wn.synsets(word)))
  test = wn_input[0].apply(lambda syn: pd.Series(syn.lowest_common_hype

34


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
  wn_input = corpus['head_of_head'].apply(lambda word: pd.Series(wn.synsets(word)))
  test = wn_input[0].apply(lambda syn: pd.Series(syn.lowest_common_hype

35


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
  wn_input = corpus['head_of_head'].apply(lambda word: pd.Series(wn.synsets(word)))
  test = wn_input[0].apply(lambda syn: pd.Series(syn.lowest_common_hype

36


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
  wn_input = corpus['head_of_head'].apply(lambda word: pd.Series(wn.synsets(word)))
  test = wn_input[0].apply(lambda syn: pd.Series(syn.lowest_common_hype

37


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
  wn_input = corpus['head_of_head'].apply(lambda word: pd.Series(wn.synsets(word)))
  test = wn_input[0].apply(lambda syn: pd.Series(syn.lowest_common_hype

38


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
  wn_input = corpus['head_of_head'].apply(lambda word: pd.Series(wn.synsets(word)))
  test = wn_input[0].apply(lambda syn: pd.Series(syn.lowest_common_hype

39


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
  wn_input = corpus['head_of_head'].apply(lambda word: pd.Series(wn.synsets(word)))
  test = wn_input[0].apply(lambda syn: pd.Series(syn.lowest_common_hype

40


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
  wn_input = corpus['head_of_head'].apply(lambda word: pd.Series(wn.synsets(word)))
  test = wn_input[0].apply(lambda syn: pd.Series(syn.lowest_common_hype

41


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
  wn_input = corpus['head_of_head'].apply(lambda word: pd.Series(wn.synsets(word)))
  test = wn_input[0].apply(lambda syn: pd.Series(syn.lowest_common_hype

42


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
  wn_input = corpus['head_of_head'].apply(lambda word: pd.Series(wn.synsets(word)))
  test = wn_input[0].apply(lambda syn: pd.Series(syn.lowest_common_hype

43


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
  wn_input = corpus['head_of_head'].apply(lambda word: pd.Series(wn.synsets(word)))
  test = wn_input[0].apply(lambda syn: pd.Series(syn.lowest_common_hype

44


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
  wn_input = corpus['head_of_head'].apply(lambda word: pd.Series(wn.synsets(word)))
  test = wn_input[0].apply(lambda syn: pd.Series(syn.lowest_common_hype

45


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
  wn_input = corpus['head_of_head'].apply(lambda word: pd.Series(wn.synsets(word)))
  test = wn_input[0].apply(lambda syn: pd.Series(syn.lowest_common_hype

46


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
  wn_input = corpus['head_of_head'].apply(lambda word: pd.Series(wn.synsets(word)))
  test = wn_input[0].apply(lambda syn: pd.Series(syn.lowest_common_hype

Unnamed: 0,corpusID,character,animacy,coref_chain,chain_head,head_of_head,chain_len,CL,SS,NER,WN,DP,TP,CN
0,story1,1,1,"[ dragon , he , he , the dragon , He , hi...",dragon,dragon,43,2.499540,1.0,0.0,0.0,1.0,1.0,1.0
1,story1,1,1,"[ princess , the tsar 's daughter , her , h...",princess,princess,23,0.990763,1.0,0.0,1.0,1.0,1.0,1.0
2,story1,1,1,"[ tsar , tsar , father , tsar , her father...",tsar,tsar,9,-0.065380,1.0,0.0,1.0,1.0,1.0,1.0
3,story1,0,1,"[ princess' dog , a little dog that had follo...",princess' dog,dog,4,-0.442574,1.0,0.0,0.0,1.0,1.0,1.0
4,story1,1,1,"[ tsarina , mother , tsarina , tsarina ]",tsarina,tsarina,4,-0.442574,1.0,0.0,1.0,1.0,1.0,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1907,story46,0,1,"[ the entire enemy army , the enemy army , h...",the entire enemy army,army,7,-0.308434,0.0,0.0,0.0,0.0,0.0,0.0
1908,story46,0,1,"[ this , the best solution ]",this,this,2,-0.604613,1.0,0.0,0.0,1.0,0.0,0.0
1909,story46,0,1,"[ a horse , a horse ]",a horse,horse,2,-0.604613,0.0,0.0,0.0,0.0,0.0,0.0
1910,story46,0,1,"[ an even better horse , his horse , his hor...",an even better horse,horse,4,-0.486142,0.0,0.0,0.0,0.0,0.0,0.0


In [11]:
pattern = pattern.replace('?|','')
pattern

'Everything|The|you|them|one|lady|daughter|debtors|devil|Satan|but|there|Hey|women|and|There|where|we|Pack|princess|king|,|job|It|We|How|Out|They||tavern|how|brother|That|what|thirteen|Wander|they|"|Order|hammers|spirits|wandered|No|served|soldier|head|everything|many|foreign|road|has|he|it|devils|You|kind|heart|But|that|He|spirit|I|his|Each|she|Lord|who|What|Do|\'s'

Importing Features from Intermediate File

In [9]:
#import libraries

import os
import sys
import numpy as np
import torchtext
import pandas as pd
import re

from lib import data_utils, preprocess

#get path
src_path = os.getcwd()

datapath = src_path + '/Data/'
PLcopath = datapath + 'AnnotatedPLData/PLCoref'
PLpath = datapath + 'AnnotatedPLData/PLTexts'
CENpath = datapath + 'AnnotatedCENData/CENCoref'
ONpath = datapath + 'AnnotatedONData/ONCoref'

PLInter = src_path + '/output/IntermediateFilesPL/'

#import data

#create empty data frame
PL_data = pd.DataFrame(columns=['corpusID', 'character', 'animacy', 'coref_chain', 'chain_head', 'head_of_head', 'chain_len', 'CL'])

#append all PL texts and features into one dataframe
# for n in range(1,47):
for n in range(1,2):

    print(n)

    #get story path
    storycopath = PLcopath +'/story' + str(n) + '.txt'
    storypath = PLpath + '/story' + str(n) + '.txt'
    storyid = 'story'+ str(n)

    #read in story
    corpus = data_utils.read_story(storycopath)

    #read in from intermediate files
    #list of features
    features = ["CN", "Dep", "NER", "SS", "Triple", "WN"]

    for f in features:
        #empty list
        feat = []
        with open(PLInter + f + 'FeatureBoolean'+'/Story' + str(n) + '.txt', 'r') as doc:
            for line in doc:
                feat.append(eval(line.rstrip()))
    
        corpus[f] = feat
    
    #append to dataframe
    PL_data = pd.concat([PL_data, corpus], ignore_index=True)
PL_data

1


Unnamed: 0,corpusID,character,animacy,coref_chain,chain_head,head_of_head,chain_len,CL,CN,Dep,NER,SS,Triple,WN
0,story1,1,1,"[ dragon , he , he , the dragon , He , hi...",dragon,dragon,43,2.49954,1.0,1.0,0.0,1.0,1.0,0.0
1,story1,1,1,"[ princess , the tsar 's daughter , her , h...",princess,princess,23,0.990763,1.0,1.0,0.0,1.0,0.0,1.0
2,story1,1,1,"[ tsar , tsar , father , tsar , her father...",tsar,tsar,9,-0.06538,1.0,1.0,0.0,1.0,1.0,1.0
3,story1,0,1,"[ princess' dog , a little dog that had follo...",princess' dog,dog,4,-0.442574,1.0,1.0,0.0,1.0,1.0,0.0
4,story1,1,1,"[ tsarina , mother , tsarina , tsarina ]",tsarina,tsarina,4,-0.442574,1.0,1.0,0.0,1.0,0.0,1.0
5,story1,0,1,"[ who is stronger , who in this world was str...",who is stronger,stronger,2,-0.593452,0.0,0.0,0.0,0.0,0.0,0.0
6,story1,0,1,"[ dragon's statement , a tanner in the city o...",dragon's statement,statement,3,-0.518013,0.0,0.0,0.0,0.0,0.0,0.0
7,story1,1,1,"[ Nikita , a tanner in the city of Kiev , Ni...",Nikita,Nikita,39,2.197784,0.0,1.0,1.0,1.0,0.0,0.0
8,story1,0,1,"[ tsar's coming , the tsar went in person to ...",tsar's coming,coming,3,-0.518013,0.0,0.0,0.0,0.0,0.0,0.0
9,story1,0,1,"[ his hands , his hands , his hands ]",his hands,hands,3,-0.518013,0.0,1.0,0.0,1.0,0.0,0.0


Model

In [82]:
# simple model 

from sklearn import svm
from sklearn.model_selection import train_test_split
from sklearn.model_selection import StratifiedShuffleSplit
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import cross_validate
from sklearn.metrics import f1_score
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix

data_y = PL_data["character"].astype('int')
data_x = PL_data[["CL", "CN", "Dep", "NER", "SS", "Triple", "WN"]]

X_train, X_test, y_train, y_test = train_test_split(data_x, data_y, random_state=0)

rbf_svc = svm.SVC(kernel = 'rbf', C=.5, gamma=1)

cv_results = cross_validate(rbf_svc, X_train, y_train, cv = 10, scoring=('f1', 'accuracy'), return_train_score=True)

print(cv_results['train_accuracy'])
print(cv_results['test_accuracy'])
print(cv_results['train_f1'])
print(cv_results['test_f1'])

# cv = StratifiedShuffleSplit(n_splits=10, test_size=.2, random_state=42)

# C_range = np.logspace(-2,10,13)
# gamma_range = np.logspace(-9,3,13)

# grid = GridSearchCV(svm.SVC(kernel='rbf'), param_grid=dict(gamma = gamma_range, C= C_range), cv=cv)

# grid.fit(X_train,y_train)

# print(grid.best_params_, grid.best_score_)



[0.90775194 0.9124031  0.91007752 0.91007752 0.90859799 0.91402014
 0.9070488  0.91092177 0.91324555 0.91247095]
[0.92361111 0.875      0.90972222 0.90277778 0.90909091 0.8951049
 0.92307692 0.91608392 0.86713287 0.88811189]
[0.82212257 0.8320951  0.82789318 0.82738095 0.82440476 0.83308271
 0.82035928 0.82962963 0.83577713 0.83109118]
[0.86419753 0.75675676 0.82191781 0.82051282 0.82191781 0.81012658
 0.85333333 0.83783784 0.71641791 0.78947368]


In [2]:
# simple model 

from sklearn import svm
from sklearn.model_selection import train_test_split
from sklearn.model_selection import StratifiedShuffleSplit
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import cross_validate
from sklearn.metrics import f1_score
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix

data_y = PL_data["character"].astype('int')
data_x = PL_data[["CL", "CN", "DP", "NER", "SS", "TP", "WN"]]

X_train, X_test, y_train, y_test = train_test_split(data_x, data_y, random_state=0)

rbf_svc = svm.SVC(kernel = 'rbf', C=.5, gamma=1)

cv_results = cross_validate(rbf_svc, X_train, y_train, cv = 10, scoring=('f1', 'accuracy'), return_train_score=True)

print(cv_results['train_accuracy'])
print(cv_results['test_accuracy'])
print(cv_results['train_f1'])
print(cv_results['test_f1'])

[0.87906977 0.88062016 0.8744186  0.87674419 0.87529047 0.87838885
 0.88071263 0.87838885 0.87993803 0.88071263]
[0.86111111 0.83333333 0.86111111 0.84027778 0.87412587 0.9020979
 0.86713287 0.86713287 0.83916084 0.86713287]
[0.76435045 0.76876877 0.75748503 0.75650842 0.76005961 0.76319759
 0.75862069 0.76390977 0.76691729 0.76807229]
[0.74358974 0.65714286 0.75       0.70886076 0.75       0.81081081
 0.73239437 0.72463768 0.66666667 0.73972603]
