In [2]:
# Import relevant modules
%matplotlib notebook

import sys
sys.path.append("../")

from Models.SGNS import SourceReceiverClassifier, SourceReceiverModel
from Preprocessing.FullContextProcessor import FullContextProcessor

import numpy as np
import pandas as pd
import torch
import pickle

# Std CV Results

In [2]:
cv_results = pd.read_csv("sr-cv-std-results.txt", sep="\t")
# cv_results.columns
cv_results.loc[:, ["params", "mean_test_Log-Loss"]].sort_values(by="mean_test_Log-Loss", ascending=False)

Unnamed: 0,params,mean_test_Log-Loss
90,"{'r_std': 0.0001, 's_std': 0.0001, 'w_std': 0.1}",-1.612109
110,"{'r_std': 1e-05, 's_std': 0.001, 'w_std': 0.1}",-1.612670
20,"{'r_std': 0.1, 's_std': 1e-05, 'w_std': 0.1}",-1.612795
4,"{'r_std': 0.1, 's_std': 0.1, 'w_std': 1e-05}",-1.612898
25,"{'r_std': 0.01, 's_std': 0.1, 'w_std': 0.1}",-1.612898
2,"{'r_std': 0.1, 's_std': 0.1, 'w_std': 0.001}",-1.612919
70,"{'r_std': 0.001, 's_std': 1e-05, 'w_std': 0.1}",-1.613127
55,"{'r_std': 0.001, 's_std': 0.01, 'w_std': 0.1}",-1.613272
80,"{'r_std': 0.0001, 's_std': 0.01, 'w_std': 0.1}",-1.614103
76,"{'r_std': 0.0001, 's_std': 0.1, 'w_std': 0.01}",-1.614394


# Load Trained SR Model

In [3]:
# Load mappings and original data
fcp = FullContextProcessor(data_fpath="../Data/OConnor2013/ocon-verb-noun-extracted.txt", sep="\t")

# Create mappings
fcp.createTwoWayMap("SOURCE")
fcp.createTwoWayMap("RECEIVER")
fcp.createTwoWayMap("WORD")
fcp.convertColToIdx("SOURCE")
fcp.convertColToIdx("RECEIVER")
fcp.convertColToIdx("WORD")

In [5]:
# Load model
model = SourceReceiverModel(s_cnt=len(fcp.df["SOURCE"].unique()),
                            r_cnt=len(fcp.df["RECEIVER"].unique()),
                            w_cnt=len(fcp.df["WORD"].unique()),
                            K=50)

model.load_state_dict(torch.load("sr-best-20neg-wstd0.001.pt", map_location="cpu"))

In [7]:
# Extract the emebddings into numpy arrays
s_embeds = model.s_embeds.weight.detach().numpy()
r_embeds = model.r_embeds.weight.detach().numpy()
w_embeds = model.w_embeds.weight.detach().numpy()

# Word and SR Tensorflow projection generation

In [17]:
# Write word vectors out for tensorflow projector
np.savetxt(fname="w_embeds.txt",
           X=w_embeds,
           fmt="%.8f",
           delimiter="\t",)

with open("w_labels.txt", "w") as f:
    for idx, w in fcp.twoway_maps["WORD"]["idx_to_col"].items():
        f.write(str(w) + "\n")

In [18]:
# Write source vectors out for tensorflow projector
np.savetxt(fname="s_embeds.txt",
           X=s_embeds,
           fmt="%.8f",
           delimiter="\t",)

with open("s_labels.txt", "w") as f:
    for idx, s in fcp.twoway_maps["SOURCE"]["idx_to_col"].items():
        f.write(str(s) + "\n")

In [19]:
# Write source vectors out for tensorflow projector
np.savetxt(fname="r_embeds.txt",
           X=r_embeds,
           fmt="%.8f",
           delimiter="\t",)

with open("r_labels.txt", "w") as f:
    for idx, s in fcp.twoway_maps["RECEIVER"]["idx_to_col"].items():
        f.write(str(s) + "\n")

In [20]:
with open("sr_embeds.txt", "w") as embeds_file, open("sr_labels.txt", "w") as labels_file:
    for (s, r), df in fcp.df.groupby(["SOURCE", "RECEIVER"]):
        sr_embed = s_embeds[s, :] + r_embeds[r, :]
        embeds_file.write("\t".join([str(sr_val) for sr_val in sr_embed]) + "\n")
        labels_file.write(
            fcp.twoway_maps["SOURCE"]["idx_to_col"][s] + \
            "-" + \
            fcp.twoway_maps["RECEIVER"]["idx_to_col"][r] + "\n")    
        

# Word analysis per SR pair

In [8]:
for (s, r), df in fcp.df.groupby(["SOURCE", "RECEIVER"]):
    print(fcp.twoway_maps["SOURCE"]["idx_to_col"][s], fcp.twoway_maps["RECEIVER"]["idx_to_col"][r], df.shape[0])
    sr_embed = s_embeds[s, :] + r_embeds[r, :]
    sr_word_prod = np.dot(sr_embed, w_embeds.T)
    print([fcp.twoway_maps["WORD"]["idx_to_col"][word_idx] for word_idx in np.argsort(sr_word_prod)][-5:])

IGOUNO IRQ 5374
['last', 'personnel', 'mission', 'Tour', 'Party']
IGOUNO FRA 630
['Russia', 'indict', 'approve', 'schedule', 'comprise']
IGOUNO USA 2857
['need', 'Tour', 'Baghdad', 'Party', 'remind']
IGOUNO GBR 475
['request', 'Party', 'indict', 'Iraq', 'Baghdad']
IGOUNO ISR 2151
['Party', 'killing', 'resolution', 'bombing', 'deplore']
IGOUNO PSE 1143
['roadmap', 'representative', 'Israel', 'deplore', 'envisage']
IGOUNO DEU 312
['include', 'Party', 'interview', 'delegate', 'pipe']
IGOUNO BIH 1530
['Tour', 'Party', 'indict', 'mission', 'peacekeep']
IGOUNO IND 277
['Baghdad', 'deplore', 'approve', 'Trust', 'comprise']
IGOUNO PRK 958
['adopt', 'willingness', 'Republic', 'Korea', 'test']
IGOUNO CIV 480
['authorize', 'mission', 'consultation', 'extend', 'embargo']
IGOUNO IGOEEC 350
['assume', 'comprise', 'act', 'presidency', 'approve']
IGOUNO SYR 862
['Tour', 'Party', 'direct', 'belong', 'implicate']
IGOUNO CHN 898
['Party', 'praise', 'comprise', 'invitation', 'overcome']
IGOUNO IGONAT 516


['supply', 'Party', 'Trust', 'supplier', 'sell']
RUS PRK 725
['arsenal', 'test', 'reactor', 'supply', 'willingness']
RUS IGOEEC 847
['quartet', 'co-sponsor', 'assume', 'presidency', 'collapse']
RUS CHN 1178
['conclude', 'Tour', 'Party', 'invitation', 'sell']
RUS UKR 746
['Tour', 'Party', 'restore', 'cut', 'States']
RUS IGONAT 1645
['oppose', 'Party', 'Partnership', 'airstrike', 'expansion']
RUS IRN 2023
['sell', 'collapse', 'entice', 'build', 'plant']
RUS SRB 428
['bomb', 'support', 'oppose', 'position', 'drive']
RUS AFG 350
['build', 'sell', 'Singapore', 'Tour', 'Party']
RUS JPN 672
['Party', 'past', 'sink', 'Sea', 'seize']
RUS TJK 472
['Party', 'interest', 'soldier', 'station', 'troops']
RUS GEO 3192
['pour', 'jockey', 'route', 'inherit', 'repel']
RUS POL 394
['install', 'base', 'defens', 'wedge', 'interceptor']
RUS BLR 254
['Cooperation', 'Tour', 'start', 'Party', 'route']
RUS KSV 622
['control', 'oppose', 'statehood', 'independence', 'declaration']
PAK IRQ 307
['war', 'Tour', 'figh

['contingent', 'troops', 'supporter', 'serve', 'soldier']
ITA USA 447
['need', 'Tour', 'Party', 'serve', 'supporter']
ITA IGOUNO 208
['implementation', 'serve', 'contingent', 'contribute', 'supporter']
ITA IGOEEC 521
['Party', 'reflect', 'supporter', 'assume', 'presidency']
JOR IRQ 981
['Tour', 'dispatch', 'kidnap', 'Party', 'abducted']
JOR USA 518
['treaty', 'side', 'Party', 'receive', 'nation']
JOR ISR 1864
['broker', 'treaty', 'country', 'nation', 'state']
JOR PSE 598
['freeze', 'broker', 'Party', 'kidnap', 'abducted']
SRB BIH 366
['Tour', 'territory', 'republic', 'Party', 'commit']
SRB IGOUNO 470
['Singapore', 'requirement', 'Dollars', 'Tour', 'Party']
SRB HRV 197
['Singapore', 'Party', 'missile', 'break', 'commit']
SRB KSV 853
['recognize', 'control', 'statehood', 'independence', 'declaration']
MMR THA 349
['comprise', 'appreciate', 'ASEAN', 'close', 'envoy']
MEX USA 730
['buy', 'Tour', 'extradite', 'Party', 'sue']
POL IRQ 1062
['interest', 'force', 'soldier', 'represent', 'comman