In [1]:
# Import relevant modules
%matplotlib notebook

import sys
sys.path.append("../")

from Models.SGNS import SourceReceiverConcatClassifier, SourceReceiverConcatModel
from Preprocessing.FullContextProcessor import FullContextProcessor

import numpy as np
import pandas as pd
import torch
import pickle

# Load Trained SRC Model

In [2]:
# Load mappings and original data
fcp = FullContextProcessor(data_fpath="../Data/OConnor2013/ocon-nicepaths-extracted.txt", sep="\t")

# Create mappings
fcp.createTwoWayMap("SOURCE")
fcp.createTwoWayMap("RECEIVER")
fcp.createTwoWayMap("WORD")
fcp.convertColToIdx("SOURCE")
fcp.convertColToIdx("RECEIVER")
fcp.convertColToIdx("WORD")

In [3]:
# Load model
model = SourceReceiverConcatModel(s_cnt=len(fcp.df["SOURCE"].unique()),
                            r_cnt=len(fcp.df["RECEIVER"].unique()),
                            w_cnt=len(fcp.df["WORD"].unique()),
                            K_s=100,
                            K_r=100,
                            K_w=200)

model.load_state_dict(torch.load("src-xavier.pt", map_location="cpu"))

In [4]:
# Extract the emebddings into numpy arrays
s_embeds = model.s_embeds.weight.detach().numpy()
r_embeds = model.r_embeds.weight.detach().numpy()
w_embeds = model.w_embeds.weight.detach().numpy()

# Word and SR Tensorflow projection generation

In [5]:
# Write word vectors out for tensorflow projector
np.savetxt(fname="w_embeds.txt",
           X=w_embeds,
           fmt="%.8f",
           delimiter="\t",)

with open("w_labels.txt", "w") as f:
    for idx, w in fcp.twoway_maps["WORD"]["idx_to_col"].items():
        f.write(str(w) + "\n")

In [6]:
# Write source vectors out for tensorflow projector
np.savetxt(fname="s_embeds.txt",
           X=s_embeds,
           fmt="%.8f",
           delimiter="\t",)

with open("s_labels.txt", "w") as f:
    for idx, s in fcp.twoway_maps["SOURCE"]["idx_to_col"].items():
        f.write(str(s) + "\n")

In [7]:
# Write source vectors out for tensorflow projector
np.savetxt(fname="r_embeds.txt",
           X=r_embeds,
           fmt="%.8f",
           delimiter="\t",)

with open("r_labels.txt", "w") as f:
    for idx, s in fcp.twoway_maps["RECEIVER"]["idx_to_col"].items():
        f.write(str(s) + "\n")

In [8]:
with open("sr_embeds.txt", "w") as embeds_file, open("sr_labels.txt", "w") as labels_file:
    for (s, r), df in fcp.df.groupby(["SOURCE", "RECEIVER"]):
        sr_embed = np.concatenate((s_embeds[s, :], r_embeds[r, :]))
        embeds_file.write("\t".join([str(sr_val) for sr_val in sr_embed]) + "\n")
        labels_file.write(
            fcp.twoway_maps["SOURCE"]["idx_to_col"][s] + \
            "-" + \
            fcp.twoway_maps["RECEIVER"]["idx_to_col"][r] + "\n")    
        

# Predicate Path Analysis per SR pair

In [114]:
# Filter pred paths based on minimum occurance in the data 
valid_w_idxs = fcp.df.groupby("WORD").size().sort_index().values > 0

In [115]:
sr_w_rankings = fcp.df.pivot_table(index=["SOURCE", "RECEIVER"],
                                   columns="WORD",
                                   fill_value=0,
                                   aggfunc="size")

In [123]:
# Obtain pred path count rankings
# Highest predicate_path counts per (s, r) will receive lowest numerical rank (e.g. highest count gets rank 1)
# For each group of tied predicates, rank is the mean numerical rank among the group (e.g. if 3 things have the highest count, they all get rank (1+2+3)/3=2)
sr_w_rankings = fcp.df.pivot_table(index=["SOURCE", "RECEIVER"],
                                   columns="WORD",
                                   fill_value=0,
                                   aggfunc="size").rank(axis=1,
                                                        method="min",
                                                        ascending=False,
                                                        pct=False) 

In [132]:
top_words = 10
for (s, r), df in fcp.df.groupby(["SOURCE", "RECEIVER"]):
    sr_embed = np.concatenate((s_embeds[s, :], r_embeds[r, :]))
    sr_word_prod = np.dot(sr_embed, w_embeds.T)[valid_w_idxs]
    print(fcp.twoway_maps["SOURCE"]["idx_to_col"][s], fcp.twoway_maps["RECEIVER"]["idx_to_col"][r], df.shape[0])
    for i, (word_idx, pred_path) in enumerate([(word_idx, fcp.twoway_maps["WORD"]["idx_to_col"][word_idx]) for word_idx in np.argsort(sr_word_prod)][-top_words:]):
        print("{}\t {:.2f}\t{}".format(top_words-i, sr_w_rankings.loc[(s, r)][word_idx], pred_path))
    print("-"*80)

IGOUNO IRQ 4280
10	 1093.00	recommend in
9	 18.00	hold on
8	 1093.00	hold consultation on
7	 60.00	lift sanction <-partmod impose on
6	 554.00	say <-ccomp speak to president of
5	 344.00	name for
4	 35.00	urge <-xcomp act against
3	 124.00	fail <-xcomp reach <-prepc_on return to
2	 251.00	present on
1	 6.00	impose after invasion <-poss
--------------------------------------------------------------------------------
IGOUNO FRA 489
10	 232.00	say <-ccomp speak to president of
9	 232.00	indict
8	 232.00	agree
7	 232.00	vote to
6	 104.00	meet from
5	 232.00	elect
4	 232.00	hold consultation on
3	 232.00	appeal for
2	 32.00	say in
1	 232.00	leave Baghdad ahead_of
--------------------------------------------------------------------------------
IGOUNO USA 2332
10	 215.00	agree
9	 110.00	authorize
8	 31.00	adopt
7	 24.00	meet <-advcl shoot
6	 20.00	launch for
5	 34.00	grant
4	 79.00	vote to
3	 16.00	leave Baghdad ahead_of
2	 26.00	give
1	 3.00	appeal for
---------------------------------------

10	 169.00	monitor <-advmod long <-pobj zone between
9	 169.00	vote to
8	 3.00	call
7	 169.00	due <-xcomp sign in
6	 169.00	elect
5	 169.00	express for support <-poss
4	 169.00	recommend in
3	 169.00	hold consultation on
2	 169.00	say <-ccomp speak to president of
1	 169.00	extend to government of
--------------------------------------------------------------------------------
IGOUNO LBR 399
10	 206.00	approve for
9	 67.00	say in
8	 206.00	appoint in
7	 67.00	name for
6	 206.00	say <-ccomp speak to president of
5	 206.00	rule <-ccomp belong to
4	 206.00	administer
3	 206.00	lift against
2	 206.00	hold consultation on
1	 25.00	recommend in
--------------------------------------------------------------------------------
IGOUNO SDN 1056
10	 411.00	monitor <-advmod long <-pobj zone between
9	 161.00	name for
8	 8.00	pass <-xcomp give
7	 411.00	say <-ccomp speak to president of
6	 161.00	hold consultation on
5	 35.00	lift against
4	 55.00	recommend in
3	 9.00	adopt <-partmod give
2	 13.00	a

4	 57.00	regret
3	 13.00	grant
2	 11.00	donate
1	 25.00	partner after
--------------------------------------------------------------------------------
JPN IGOUNO 550
10	 270.00	oppose <-partmod argue <-ccomp disarm through
9	 270.00	tell at
8	 14.00	contributor to
7	 4.00	member of
6	 270.00	meet on sideline of
5	 270.00	call for reform of
4	 270.00	threaten <-xcomp refer to
3	 103.00	head
2	 11.00	agree <-xcomp take against Korea at
1	 103.00	say <-ccomp submit to
--------------------------------------------------------------------------------
JPN IND 254
10	 126.00	ban from
9	 6.00	dispatch to
8	 126.00	tell reporter following meeting with
7	 126.00	member of council along_with
6	 126.00	claim <-prepc_after win with
5	 126.00	report <-ccomp hit
4	 126.00	be at invitation of
3	 126.00	partner after
2	 2.00	refuel warship in
1	 126.00	due <-xcomp sign in
--------------------------------------------------------------------------------
JPN PRK 1208
10	 20.00	agree <-xcomp take against
9	

10	 154.00	say <-ccomp fire into
9	 66.00	carry from Gaza against
8	 30.00	kill with
7	 36.00	shoot by
6	 32.00	ambush
5	 16.00	clash with
4	 83.00	issue <-xcomp condemn
3	 52.00	fire <-pobj
2	 115.00	say <-ccomp fire at
1	 2.00	kill by
--------------------------------------------------------------------------------
PSE EGY 619
10	 260.00	clash with
9	 260.00	fire <-pobj
8	 17.00	give to effort <-poss
7	 26.00	say in
6	 12.00	talk to
5	 260.00	due <-xcomp sign in
4	 11.00	agree in
3	 4.00	use <-xcomp smuggle from
2	 2.00	arrive for talk with
1	 8.00	declare at summit in
--------------------------------------------------------------------------------
PSE IGOUNO 312
10	 141.00	oppose without
9	 141.00	say <-ccomp impose <-nsubjpass
8	 29.00	hold at
7	 141.00	implement
6	 141.00	call for reform of
5	 141.00	say <-ccomp accept
4	 141.00	fail <-xcomp comply with
3	 141.00	meet on sideline of
2	 141.00	tell at
1	 141.00	say <-ccomp submit to
--------------------------------------------------

10	 14.00	order to
9	 317.00	stand with people of
8	 317.00	base in
7	 51.00	urge leader <-poss
6	 317.00	tell reporter following meeting with
5	 317.00	begin <-xcomp train
4	 317.00	due <-xcomp sign in
3	 317.00	label with
2	 101.00	close in
1	 317.00	equip
--------------------------------------------------------------------------------
USA JOR 534
10	 262.00	arrive for talk with
9	 262.00	insist including
8	 9.00	hope <-ccomp attend state including
7	 262.00	identify country <-rcmod raise as
6	 21.00	arrive to
5	 262.00	stand with people of
4	 262.00	due <-xcomp sign in
3	 262.00	prepare with
2	 262.00	run from
1	 262.00	fly from
--------------------------------------------------------------------------------
USA PAK 2095
10	 769.00	tell reporter following meeting with
9	 29.00	arrest on border with
8	 33.00	arrive for talk with
7	 52.00	identify country <-rcmod raise as
6	 66.00	tread on ground with
5	 769.00	due <-xcomp sign in
4	 147.00	warn <-xcomp defer to
3	 52.00	arrive to
2	 

CHN FRA 589
10	 259.00	meet from
9	 259.00	host of talk <-rcmod involve
8	 22.00	one of member along_with States <-appos
7	 259.00	meet with delegation from
6	 47.00	overtake
5	 259.00	partner after
4	 259.00	host of forum <-rcmod involve
3	 100.00	willing <-xcomp work with
2	 100.00	side with
1	 8.00	leave to
--------------------------------------------------------------------------------
CHN USA 4677
10	 181.00	broker round of talk with
9	 64.00	overtake
8	 147.00	host of forum <-rcmod involve
7	 28.00	earn
6	 31.00	country after
5	 101.00	host of talk <-rcmod involve
4	 116.00	partner after
3	 48.00	talk with
2	 88.00	side with
1	 10.00	donate
--------------------------------------------------------------------------------
CHN GBR 521
10	 228.00	host of talk <-rcmod involve
9	 228.00	meet from
8	 228.00	country after
7	 228.00	say <-ccomp support <-tmod
6	 51.00	overtake
5	 228.00	meet with delegation from
4	 90.00	willing <-xcomp work with
3	 4.00	member along_with
2	 19.00	one of 

RUS IND 362
10	 21.00	supplier for
9	 191.00	due <-xcomp sign in
8	 21.00	have all <-rcmod appeal to
7	 191.00	one <-rcmod include
6	 191.00	anxious <-xcomp secure <-xcomp prevent
5	 191.00	prompt from
4	 191.00	drive into
3	 191.00	ban import of
2	 59.00	sign to
1	 1.00	supplier <-poss
--------------------------------------------------------------------------------
RUS PRK 535
10	 250.00	supplier <-poss
9	 20.00	be among nation <-rcmod hold with
8	 20.00	oppose enforcement for
7	 20.00	express over
6	 250.00	sign to
5	 30.00	call for talk between
4	 14.00	state willingness <-infmod provide to
3	 9.00	join <-prepc_in talk to
2	 20.00	try <-xcomp convene with
1	 20.00	engage
--------------------------------------------------------------------------------
RUS IGOEEC 683
10	 295.00	oppose campaign <-poss
9	 295.00	hold presidency <-poss
8	 26.00	reject including
7	 12.00	sponsor along_with
6	 7.00	withdraw with
5	 7.00	member of
4	 2.00	continue despite collapse of
3	 12.00	push despite c

10	 152.00	target of
9	 407.00	burn
8	 25.00	speak against
7	 407.00	fight with
6	 407.00	move away_from
5	 407.00	due <-xcomp sign in
4	 407.00	refute
3	 18.00	backer along_with
2	 407.00	say <-ccomp welcome
1	 1.00	reject
--------------------------------------------------------------------------------
IND GBR 736
10	 10.00	fight <-prepc_since gain <-iobj independence from
9	 6.00	fight <-advcl independent from
8	 10.00	fight <-prepc_after gain from
7	 10.00	backer along_with
6	 6.00	fight since
5	 4.00	fight <-prepc_since gain from
4	 5.00	be <-advcl gain from
3	 3.00	fight <-advcl gain from
2	 2.00	fight <-advcl win from
1	 1.00	fight from
--------------------------------------------------------------------------------
IND IGOUNO 315
10	 186.00	agree <-xcomp take against Korea at
9	 186.00	want <-ccomp refer to
8	 186.00	have veto on
7	 186.00	oppose <-partmod argue <-ccomp disarm through
6	 186.00	call for implementation of
5	 186.00	call for reform of
4	 28.00	oppose without
3	 18

10	 1.00	bomb
9	 8.00	drive
8	 23.00	administer <-advcl end by
7	 6.00	launch against
6	 15.00	run <-advcl bomb
5	 9.00	halt
4	 3.00	strike at
3	 2.00	end
2	 10.00	run <-advcl end
1	 15.00	run <-advcl force
--------------------------------------------------------------------------------
IGONAT AFG 806
10	 41.00	provide in
9	 19.00	lead in
8	 283.00	begin against
7	 19.00	increase in
6	 2.00	take in
5	 4.00	say <-ccomp kill in
4	 125.00	stay in
3	 19.00	have soldier in
2	 29.00	injure in
1	 14.00	take command of force in
--------------------------------------------------------------------------------
IGONAT GEO 216
10	 114.00	have peacekeeper in
9	 114.00	be <-xcomp call on
8	 12.00	refuse <-xcomp argue <-ccomp withdraw from
7	 114.00	end on
6	 114.00	take control of
5	 114.00	move into
4	 114.00	launch <-xcomp stop on
3	 114.00	due <-xcomp sign in
2	 114.00	bomb
1	 114.00	drive
--------------------------------------------------------------------------------
IGONAT ALB 346
10	 4.00	halt

4	 139.00	deploy in preparation for invasion of
3	 56.00	set zone over
2	 190.00	use zone over
1	 5.00	insist <-ccomp be in
--------------------------------------------------------------------------------
GBR FRA 503
10	 24.00	continue <-xcomp lobby from
9	 300.00	prepare with
8	 300.00	due <-xcomp sign in
7	 300.00	block by
6	 300.00	insist <-ccomp try in
5	 300.00	talk with
4	 300.00	partner after
3	 96.00	side with
2	 96.00	win
1	 5.00	be at odds with
--------------------------------------------------------------------------------
GBR USA 1705
10	 28.00	country <-infmod join
9	 17.00	insist <-ccomp try in
8	 149.00	extradite to
7	 13.00	stand with
6	 68.00	have national in
5	 6.00	agree to
4	 11.00	side with
3	 149.00	partner after
2	 718.00	be at odds with
1	 95.00	talk with
--------------------------------------------------------------------------------
GBR ISR 416
10	 203.00	threaten <-advcl recognize
9	 203.00	work partmod-> roadmap require from
8	 203.00	propose <-xcomp include

1	 147.00	say <-ccomp submit to
--------------------------------------------------------------------------------
EGY SDN 398
10	 175.00	report <-ccomp hit
9	 175.00	act as mediator between
8	 175.00	due <-xcomp sign in
7	 175.00	fight with
6	 45.00	recall to
5	 3.00	blame
4	 6.00	expel
3	 175.00	break with
2	 11.00	accuse government <-poss
1	 175.00	sever with
--------------------------------------------------------------------------------
IGOEEC IRQ 361
10	 213.00	have number of troops in
9	 213.00	blame in
8	 213.00	stay in
7	 213.00	join in
6	 213.00	use zone over
5	 213.00	arrive for talk on
4	 9.00	condemn attack in
3	 213.00	fail <-xcomp reach <-prepc_on return to
2	 213.00	present on
1	 213.00	agree <-xcomp send to
--------------------------------------------------------------------------------
IGOEEC FRA 454
10	 45.00	present Iran with package <-appos
9	 253.00	say <-ccomp admit
8	 10.00	appoint
7	 253.00	have ban in place for
6	 5.00	Luxembourg <-appos
5	 253.00	apply to
4	 20

10	 213.00	say <-ccomp consider by
9	 6.00	summon
8	 22.00	agree <-xcomp suspend in return for
7	 22.00	condemn <-ccomp draw by
6	 10.00	agree <-xcomp suspend during talk with
5	 15.00	deny
4	 15.00	suspend as gesture toward negotiation with
3	 2.00	seize
2	 3.00	agree with
1	 10.00	promise minister of
--------------------------------------------------------------------------------
IRN ISR 821
10	 58.00	condemn killing <-poss
9	 15.00	trade over
8	 237.00	country <-rcmod make with
7	 237.00	say <-ccomp fire at
6	 20.00	term
5	 6.00	refuse <-xcomp recognize
4	 237.00	accept with
3	 58.00	express over
2	 237.00	call for withdrawal of
1	 3.00	call for destruction <-poss
--------------------------------------------------------------------------------
IRN DEU 279
10	 139.00	term
9	 10.00	insist advcl-> engage <-nsubjpass
8	 10.00	reject by
7	 10.00	agree <-xcomp suspend in return for
6	 3.00	agree <-xcomp suspend during talk with
5	 10.00	condemn <-ccomp draw by
4	 6.00	suspend as gesture t

10	 122.00	province in
9	 122.00	administer <-advcl end by
8	 122.00	be <-advcl drive
7	 122.00	back <-nsubjpass plan nsubjpass-> oppose by
6	 122.00	be <-advcl halt
5	 122.00	be <-advcl help <-xcomp drive
4	 122.00	run <-advcl end
3	 122.00	proclaim from
2	 122.00	run <-advcl force
1	 122.00	declare from
--------------------------------------------------------------------------------
TWN USA 534
10	 276.00	burn
9	 90.00	reach
8	 90.00	target of
7	 276.00	study in
6	 90.00	agree <-xcomp pay
5	 20.00	take delivery of
4	 276.00	due <-xcomp sign in
3	 276.00	partner after
2	 90.00	extradite to
1	 90.00	lose
--------------------------------------------------------------------------------
TWN IGOUNO 261
10	 74.00	abstain from
9	 74.00	head
8	 74.00	oppose <-partmod argue <-ccomp disarm through
7	 74.00	meet on sideline of
6	 74.00	have veto on
5	 74.00	call for reform of
4	 74.00	oppose without
3	 74.00	part of
2	 74.00	say <-ccomp submit to
1	 1.00	lose
------------------------------------

10	 241.00	_ <-ccomp speak <-xcomp stabilize
9	 67.00	partner in war on
8	 67.00	say <-ccomp pull out_of
7	 113.00	suffer fatality in
6	 67.00	announce <-ccomp withdraw from
5	 8.00	represent interest in
4	 49.00	supporter in
3	 4.00	have soldier in
2	 241.00	present on
1	 29.00	stay in
--------------------------------------------------------------------------------
POL USA 332
10	 155.00	contribute
9	 155.00	due <-xcomp sign in
8	 155.00	agree <-xcomp pay
7	 155.00	donate
6	 155.00	regret
5	 31.00	send <-xcomp support
4	 57.00	join
3	 155.00	extradite to
2	 155.00	partner after
1	 1.00	represent
--------------------------------------------------------------------------------
POL IGOEEC 509
10	 213.00	take presidency of
9	 7.00	be <-xcomp join
8	 8.00	member of
7	 213.00	take presidency <-poss
6	 213.00	hold presidency of
5	 3.00	hope <-xcomp join
4	 3.00	due <-xcomp join
3	 213.00	hold presidency <-poss
2	 1.00	join
1	 213.00	represent
-------------------------------------------------

5	 12.00	apply for
4	 94.00	hold presidency of
3	 5.00	due <-xcomp join
2	 94.00	hold presidency <-poss
1	 2.00	hope <-xcomp join
--------------------------------------------------------------------------------
BGR IRQ 260
10	 125.00	have number of troops in
9	 125.00	blame in
8	 125.00	use zone over
7	 125.00	_ <-ccomp speak <-xcomp stabilize
6	 125.00	say <-ccomp pull out_of
5	 125.00	suffer fatality in
4	 125.00	announce <-ccomp withdraw from
3	 28.00	supporter in
2	 125.00	present on
1	 47.00	stay in
--------------------------------------------------------------------------------
BGR IGOEEC 323
10	 139.00	assume
9	 139.00	take presidency of
8	 7.00	negotiate <-xcomp join
7	 3.00	apply for
6	 139.00	take presidency <-poss
5	 139.00	hold presidency of
4	 1.00	join
3	 2.00	hope <-xcomp join
2	 10.00	due <-xcomp join
1	 139.00	hold presidency <-poss
--------------------------------------------------------------------------------
CYP IGOUNO 262
10	 123.00	invade ccomp-> recognize by
9	 