In [13]:
# Set up CoreNLP Annotator

# Import client module
from stanza.server import CoreNLPClient

# Set the CORENLP_HOME environment variable to point to the installation location
import os
os.environ["CORENLP_HOME"] = r"C:\Users\effbl\stanza_corenlp\stanford-corenlp-4.2.2"

# Construct a CoreNLPClient with some basic annotators, a memory allocation of 4GB, and port number 9001
client = CoreNLPClient(
    annotators=['tokenize','ssplit','ner'], 
    memory='4G', 
    endpoint='http://localhost:9000',
    be_quiet=True)
print(client)

# Start the background server and wait for some time
# Note that in practice this is totally optional, as by default the server will be started when the first annotation is performed
client.start()
import time; time.sleep(10)

2023-04-20 18:11:32 INFO: Writing properties to tmp file: corenlp_server-4a66775638a14a95.props
2023-04-20 18:11:32 INFO: Starting server with command: java -Xmx4G -cp C:\Users\effbl\stanza_corenlp\stanford-corenlp-4.2.2\* edu.stanford.nlp.pipeline.StanfordCoreNLPServer -port 9000 -timeout 60000 -threads 5 -maxCharLength 100000 -quiet True -serverProperties corenlp_server-4a66775638a14a95.props -annotators tokenize,ssplit,ner -preload -outputFormat serialized


<stanza.server.client.CoreNLPClient object at 0x000001F1A2DA66C8>


In [14]:
def get_timex_infos(document):
    # Get tokens that have TIMEX infos
    timex_infos = []
    for i, sent in enumerate(document.sentence):
        for j,t in enumerate(sent.token):
            if t.timexValue.text!='':
                timex_infos.append({
                    'sent_id': i,
                    'word_id': j,
                    'word': t.word,
                    'value': t.timexValue.value,
                    'altValue': t.timexValue.altValue,
                    'text': t.timexValue.text,
                    'type': t.timexValue.type,
                })


    # Consolidate tokens that refer to the same TIMEX
    current = None
    consolidate_timex_infos = []

    for i,d in enumerate(timex_infos):
        if current is None: # new item
            current = d
        else:
            # check if new or append to old
            if i==len(timex_infos)-1: # last
                consolidate_timex_infos.append({
                    'sent_id': current['sent_id'],
                    'word_id': str(current['word_id'])+'_'+str(d['word_id']),
                    'value': current['value'],
                    'altValue': current['altValue'],
                    'text': current['text'],
                    'type': current['type'],
                })
            elif (d['value']==current['value']) and (d['type']==current['type']):
                pass
            else:
                consolidate_timex_infos.append({
                    'sent_id': current['sent_id'],
                    'word_id': str(current['word_id'])+'_'+str(timex_infos[i-1]['word_id']),
                    'value': current['value'iu],
                    'altValue': current['altValue'],
                    'text': current['text'],
                    'type': current['type'],
                })
                current = None

    return consolidate_timex_infos


In [10]:
# Load Data
import pandas as pd

graph_df = pd.read_csv(r"D:\66 CausalMap\SciLit_CausalMap\visualization\mir_paper\graph.csv")
graph_df.head()

Unnamed: 0,cause,effect,source,support,evidence
0,a lack of demand for their products .,"prior to ford , two more american firms - gene...",f-0,1,a lack of demand for their products . --> Prio...
1,that there would be a rise in the demand for h...,foreign automobile manufacturers had set up th...,f-0,1,that there would be a rise in the demand for h...
2,the implementation of b hara t stage vi standards,car manufacturers a bomb .,f-0,1,The implementation of B hara t Stage VI standa...
3,ad d to it the rising cost of fuel,driving a car in india becomes a truly elite e...,f-0,1,Ad d to it the rising cost of fuel --> driving...
4,the increase in the price of cars over the las...,writer and analyst ra no joy mu ker ji,f-0,1,the increase in the price of cars over the las...


In [18]:
cols = ['Cause->Effect','Sources','Text']
evidence = graph_df['evidence'].iloc[100]
cause, effect = evidence.split(';')[0].split('-->')

print(cause)
document = client.annotate(cause)
get_timex_infos(document)

print(effect)
document = client.annotate(effect)
get_timex_infos(document)

A total of 23 companies in the automobile sector made the list 
 Volkswagen , Toyota , and Stella nti s Group being the top three auto makers .


[]

In [20]:
timex_ce = []

for i,row in graph_df.iterrows():
    if i>100:
        break
    
    timex_row = []
    cause, effect = row.evidence.split(';')[0].split('-->')
    
    timex_row.append(cause)
    document = client.annotate(cause)
    timex_row.append(get_timex_infos(document))

    timex_row.append(effect)
    document = client.annotate(effect)
    timex_row.append(get_timex_infos(document))
    
    timex_ce.append(timex_row)

pd.DataFrame(timex_ce, columns=['Cause_Span','Cause_TIMEX','Effect_Span','Effect_TIMEX'])

Unnamed: 0,Cause_Span,Cause_TIMEX,Effect_Span,Effect_TIMEX
0,a lack of demand for their products .,[],"Prior to Ford , two more American firms - Gen...",[]
1,that there would be a rise in the demand for h...,[],Foreign automobile manufacturers had set up t...,[]
2,The implementation of B hara t Stage VI standa...,[],car manufacturers a bomb .,[]
3,Ad d to it the rising cost of fuel,[],driving a car in India becomes a truly elite ...,[]
4,the increase in the price of cars over the las...,"[{'sent_id': 0, 'word_id': '8_11', 'word': 'th...",writer and analyst Ra no joy Mu ker ji,[]
...,...,...,...,...
96,the governor of the country,[],to increase the implementation of p rud ent m...,[]
97,to support the real economy .,[],to increase the implementation of p rud ent m...,[]
98,to support the real economy .,[],China stocks rose on Monday after the governo...,[]
99,regulator s stepped up efforts to encourage le...,[],Financial s and property developers ’ shares ...,[]


In [21]:
pd.DataFrame(timex_ce, columns=['Cause_Span','Cause_TIMEX','Effect_Span','Effect_TIMEX']).to_csv('tmp.csv')