In [None]:
import numpy as np
import pandas as pd
from src.query_helpers import *

In [None]:
## load data
df = pd.read_csv("./results/soteu_narrativetable.csv")

## add speaker data
df['speaker'] = df['event_id'].apply(parse_speaker)
speakers = sorted(set(df['speaker']))

In [None]:
## replace ["commission","organization:commission"] by "European Commission",
##         ["organization:european union","organization:eu"] by "European Union",
##         ["continent:europe"] by "Europe"
for arg in args:
    df[arg] = df[arg].apply(custom_replace)

In [None]:
## TABLE 1: main agents, patients and predicates
argcounts = get_argcounts(df)
print(argcounts[':ARG1'].most_common(20))

In [None]:
## TABLE 2: tf-idf on ARG1 per speaker
alist = []
for speaker in speakers:
    sdf = df[df['speaker'] == speaker].copy()
    a = get_argdf(sdf,add_predicates=False,N_words=None)
    a = a[[':ARG1',':ARG1_count']].set_index(':ARG1').rename(columns={':ARG1_count':speaker})
    alist.append(a)
countmatrix = pd.concat(alist,axis=1).fillna(0)
countmatrix = countmatrix[2:]
tfidfdf = compute_tfidf(countmatrix)
print(tfidfdf[:20].to_markdown())

In [None]:
## TABLE 3: tf-idf on goal predicates per speaker
goaldf = get_goaldf(df,actor = ["we","European Union","Europe","European Commission"])
glist = []
for speaker in speakers:
    glist.append(pd.DataFrame(goaldf[goaldf['speaker'] == speaker]['predicate_va'].value_counts()).rename(columns={'count':speaker}))
countmatrix = pd.concat(glist,axis=1).fillna(0)
tfidfdf = compute_tfidf(countmatrix)[:20]
print(tfidfdf[:10].to_markdown())

In [None]:
## TABLE 4: goals related to a given verb category
goal_predicate = "MOUNT_ASSEMBLE_PRODUCE"
gdf = goaldf[goaldf['predicate_va']==goal_predicate][goaldf.columns[:-1]].sort_values(by='sentence_id')
gdf['year'] = gdf['sentence_id'].str[:2].map(tid2year)
print(gdf[['sentence_id','predicate','arg1','argx1','speaker','year']].to_markdown(index=False))

In [None]:
## exploration of the narrative table
cols = ['parent_predicate_pb',':ARG0','predicate_pb',':ARG1',':ARG1_adj','speaker']
speaker = "barroso"
arg1 = "market"
sdf = df[(df['speaker'] == speaker) & (df[':ARG1'] == arg1)]
sdf[cols]

In [None]:
arg0 = "we"
arg1 = "solidarity"
sdf = df[(df[':ARG0'] == arg0) & (df[':ARG1'] == arg1)]
sdf[cols]

In [None]:
## graph analysis
df['verb_annotation'] = df.apply(lambda row: annotate_row(row), axis=1)
edgelist,G = build_graph(df)

In [None]:
## filter edges by weight and score
ego = get_ego_network(G,"we")
etd = []
for e in ego.es:
    if e['weight'] < 2 or e['score'] >= 0: ## change to weight < 3 and score <= 0 for positive edges
        etd.append(e)
ego.delete_edges(etd)
ego = ego.components(mode='weak').giant()

## plot
plot_egonet(ego)