In [2]:
# Import relevant modules
% matplotlib notebook

import sys
sys.path.append("../")

from Models.SGNS import SourceReceiverClassifier, SourceReceiverModel
from Preprocessing.FullContextProcessor import FullContextProcessor

import numpy as np
import pandas as pd
import torch
import pickle

# Std CV Results

In [12]:
cv_results = pd.read_csv("sr-cv-std-results.txt", sep="\t")
# cv_results.columns
cv_results.loc[:, ["params", "mean_test_Log-Loss"]].sort_values(by="mean_test_Log-Loss", ascending=False)

Unnamed: 0,params,mean_test_Log-Loss
90,"{'r_std': 0.0001, 's_std': 0.0001, 'w_std': 0.1}",-1.612109
110,"{'r_std': 1e-05, 's_std': 0.001, 'w_std': 0.1}",-1.612670
20,"{'r_std': 0.1, 's_std': 1e-05, 'w_std': 0.1}",-1.612795
4,"{'r_std': 0.1, 's_std': 0.1, 'w_std': 1e-05}",-1.612898
25,"{'r_std': 0.01, 's_std': 0.1, 'w_std': 0.1}",-1.612898
2,"{'r_std': 0.1, 's_std': 0.1, 'w_std': 0.001}",-1.612919
70,"{'r_std': 0.001, 's_std': 1e-05, 'w_std': 0.1}",-1.613127
55,"{'r_std': 0.001, 's_std': 0.01, 'w_std': 0.1}",-1.613272
80,"{'r_std': 0.0001, 's_std': 0.01, 'w_std': 0.1}",-1.614103
76,"{'r_std': 0.0001, 's_std': 0.1, 'w_std': 0.01}",-1.614394


# Load Trained SR Model

In [2]:
# Load mappings and original data
fcp = FullContextProcessor(data_fpath="../Data/OConnor2013/ocon-verb-noun-extracted.txt", sep="\t")

# Create mappings
fcp.createTwoWayMap("SOURCE")
fcp.createTwoWayMap("RECEIVER")
fcp.createTwoWayMap("WORD")
fcp.convertColToIdx("SOURCE")
fcp.convertColToIdx("RECEIVER")
fcp.convertColToIdx("WORD")

In [3]:
# Load model
model = SourceReceiverModel(s_cnt=len(fcp.df["SOURCE"].unique()),
                            r_cnt=len(fcp.df["RECEIVER"].unique()),
                            w_cnt=len(fcp.df["WORD"].unique()),
                            K=50)

model.load_state_dict(torch.load("sr-best-20neg-srstd0.1-wstd0.001.pt", map_location="cpu"))

In [4]:
# Extract the emebddings into numpy arrays
s_embeds = model.s_embeds.weight.detach().numpy()
r_embeds = model.r_embeds.weight.detach().numpy()
w_embeds = model.w_embeds.weight.detach().numpy()

# Word and SR Tensorflow projection generation

In [5]:
# Write word vectors out for tensorflow projector
np.savetxt(fname="w_embeds.txt",
           X=w_embeds,
           fmt="%.8f",
           delimiter="\t",)

with open("w_labels.txt", "w") as f:
    for idx, w in fcp.twoway_maps["WORD"]["idx_to_col"].items():
        f.write(str(w) + "\n")

In [6]:
# Write source vectors out for tensorflow projector
np.savetxt(fname="s_embeds.txt",
           X=s_embeds,
           fmt="%.8f",
           delimiter="\t",)

with open("s_labels.txt", "w") as f:
    for idx, s in fcp.twoway_maps["SOURCE"]["idx_to_col"].items():
        f.write(str(s) + "\n")

In [7]:
# Write source vectors out for tensorflow projector
np.savetxt(fname="r_embeds.txt",
           X=r_embeds,
           fmt="%.8f",
           delimiter="\t",)

with open("r_labels.txt", "w") as f:
    for idx, s in fcp.twoway_maps["RECEIVER"]["idx_to_col"].items():
        f.write(str(s) + "\n")

In [8]:
with open("sr_embeds.txt", "w") as embeds_file, open("sr_labels.txt", "w") as labels_file:
    for (s, r), df in fcp.df.groupby(["SOURCE", "RECEIVER"]):
        sr_embed = s_embeds[s, :] + r_embeds[r, :]
        embeds_file.write("\t".join([str(sr_val) for sr_val in sr_embed]) + "\n")
        labels_file.write(
            fcp.twoway_maps["SOURCE"]["idx_to_col"][s] + \
            "-" + \
            fcp.twoway_maps["RECEIVER"]["idx_to_col"][r] + "\n")    
        

# Word analysis per SR pair

In [9]:
# Words at least certain count avoiding vectors that haven't been touched
valid_words = (fcp.df["WORD"].value_counts().sort_index() > 0).values

In [11]:
for (s, r), df in fcp.df.groupby(["SOURCE", "RECEIVER"]):
    print(fcp.twoway_maps["SOURCE"]["idx_to_col"][s], fcp.twoway_maps["RECEIVER"]["idx_to_col"][r], df.shape[0])
    sr_embed = s_embeds[s, :] + r_embeds[r, :]
    sr_word_prod = np.dot(sr_embed, w_embeds.T)
    print([fcp.twoway_maps["WORD"]["idx_to_col"][word_idx] for word_idx in np.argsort(sr_word_prod)][-10:])

IGOUNO IRQ 5374
['disarm', 'Dollars', 'mandate', 'bombing', 'Singapore', 'last', 'Tour', 'mission', 'Party', 'impose']
IGOUNO FRA 630
['interview', 'resignation', 'Morocco', 'representative', 'last', 'Singapore', 'Press', 'Dollars', 'Tour', 'Party']
IGOUNO USA 2857
['Singapore', 'Dollars', 'last', 'need', 'Press', 'authorize', 'renew', 'appeal', 'Tour', 'Party']
IGOUNO GBR 475
['Iraq', 'Baghdad', 'resignation', 'Morocco', 'last', 'Press', 'Singapore', 'Dollars', 'Tour', 'Party']
IGOUNO ISR 2151
['persuade', 'Singapore', 'Dollars', 'pass', 'last', 'deplore', 'Tour', 'renew', 'Party', 'bombing']
IGOUNO PSE 1143
['last', 'debate', 'deplore', 'impose', 'roadmap', 'Tour', 'Party', 'representative', 'envisage', 'situation']
IGOUNO DEU 312
['resignation', 'Morocco', 'persuade', 'last', 'Dollars', 'Press', 'Singapore', 'representative', 'Tour', 'Party']
IGOUNO BIH 1530
['forward', 'Symposium', 'resignation', 'Morocco', 'Press', 'Dollars', 'Singapore', 'last', 'Tour', 'Party']
IGOUNO IND 277
['

['resignation', 'Morocco', 'Symposium', 'forward', 'Press', 'last', 'Dollars', 'Singapore', 'Tour', 'Party']
USA KSV 667
['Press', 'statehood', 'Morocco', 'forward', 'last', 'Dollars', 'Singapore', 'Tour', 'Party', 'independence']
CHN IRQ 700
['resignation', 'Morocco', 'Symposium', 'forward', 'Press', 'last', 'Dollars', 'Singapore', 'Tour', 'Party']
CHN FRA 747
['Morocco', 'delegation', 'attach', 'Singapore', 'Press', 'Dollars', 'Tour', 'side', 'Party', 'States']
CHN USA 5277
['earn', 'Singapore', 'Dollars', 'Press', 'steal', 'Tour', 'donate', 'side', 'market', 'Party']
CHN GBR 639
['shoreline', 'resignation', 'Morocco', 'side', 'Press', 'Singapore', 'Dollars', 'Tour', 'Party', 'States']
CHN ISR 427
['Morocco', 'shoreline', 'last', 'forward', 'broker', 'Press', 'Singapore', 'Dollars', 'Tour', 'Party']
CHN DEU 538
['conclude', 'attach', 'Press', 'Dollars', 'Singapore', 'delegation', 'side', 'Tour', 'States', 'Party']
CHN IGOUNO 2055
['attend', 'introduce', 'Dollars', 'abstain', 'submit'

['respond', 'notify', 'implement', 'name', 'Party', 'kick', 'threaten', 'submit', 'admit', 'expel']
PRK CHN 578
['admit', 'States', 'Tour', 'test-fired', 'agency', 'normalize', 'Party', 'return', 'kick', 'boycott']
PRK JPN 1566
['Party', 'defect', 'missile', 'splash', 'normalize', 'Sea', 'boycott', 'kidnap', 'test-fired', 'admit']
PRK KOR 1510
['expel', 'defect', 'normalize', 'Party', 'name', 'test-fired', 'boycott', 'agency', 'stand', 'admit']
EGY IRQ 623
['train', 'Symposium', 'forward', 'play', 'last', 'Dollars', 'role', 'Singapore', 'Tour', 'Party']
EGY USA 837
['shoreline', 'Press', 'Singapore', 'Dollars', 'receive', 'Tour', 'treaty', 'country', 'nation', 'Party']
EGY ISR 3641
['nation', 'mediator', 'sign', 'recall', 'Party', 'treaty', 'state', 'mediate', 'broker', 'country']
EGY PSE 1640
['last', 'tour', 'Dollars', 'Singapore', 'invite', 'Tour', 'broker', 'Party', 'situation', 'mediate']
EGY IGOUNO 345
['Morocco', 'last', 'Singapore', 'implementation', 'limit', 'Dollars', 'role',

['Press', 'Symposium', 'forward', 'last', 'Dollars', 'Singapore', 'Tour', 'War', 'Party', 'liberate']
KWT USA 564
['side', 'shoreline', 'Dollars', 'Press', 'Singapore', 'nation', 'Tour', 'buy', 'Party', 'treaty']
CAN USA 504
['shoreline', 'forward', 'resignation', 'Morocco', 'last', 'Press', 'Dollars', 'Singapore', 'Tour', 'Party']
CAN AFG 518
['Dollars', 'Singapore', 'suffer', 'Tour', 'troops', 'Party', 'deploy', 'command', 'serve', 'soldier']
COL USA 499
['forward', 'Morocco', 'shoreline', 'last', 'Dollars', 'Press', 'Singapore', 'Tour', 'extradite', 'Party']
AFG USA 618
['forward', 'shoreline', 'resignation', 'last', 'Dollars', 'need', 'Press', 'Singapore', 'Tour', 'Party']
AFG PAK 1018
['shelter', 'hide', 'last', 'Press', 'Singapore', 'Tour', 'area', 'Party', 'haven', 'agency']
RWA COD 412
['withdraw', 'forward', 'Press', 'last', 'Dollars', 'Singapore', 'Tour', 'Party', 'cross', 'flee']
RWA COG 559
['Dollars', 'Singapore', 'intervene', 'pour', 'Tour', 'cross', 'withdraw', 'supporte