# Load Trained SR Model

In [1]:
# Import relevant modules
% matplotlib notebook

import sys
sys.path.append("../")

from Models.SGNS import SourceReceiverClassifier, SourceReceiverModel
from Preprocessing.FullContextProcessor import FullContextProcessor

import numpy as np
import pandas as pd
import torch
import pickle

In [2]:
# Load mappings and original data
fcp = FullContextProcessor(data_fpath="../Data/OConnor2013/ocon-verb-noun-extracted.txt", sep="\t")

# Create mappings
fcp.createTwoWayMap("SOURCE")
fcp.createTwoWayMap("RECEIVER")
fcp.createTwoWayMap("WORD")
fcp.convertColToIdx("SOURCE")
fcp.convertColToIdx("RECEIVER")
fcp.convertColToIdx("WORD")

# Load model
model = SourceReceiverModel(s_cnt=len(fcp.df["SOURCE"].unique()),
                            r_cnt=len(fcp.df["RECEIVER"].unique()),
                            w_cnt=len(fcp.df["WORD"].unique()),
                            K=50)

model.load_state_dict(torch.load("sr-best.pt"))

In [14]:
# Extract the emebddings into numpy arrays
s_embeds = model.s_embeds.weight.detach().numpy()
r_embeds = model.r_embeds.weight.detach().numpy()
w_embeds = model.w_embeds.weight.detach().numpy()

# Word and SR Tensorflow projection generation

In [74]:
# Write word vectors out for tensorflow projector
np.savetxt(fname="w_embeds.txt",
           X=w_embeds,
           fmt="%.8f",
           delimiter="\t",)

with open("w_labels.txt", "w") as f:
    for idx, w in fcp.twoway_maps["WORD"]["idx_to_col"].items():
        f.write(str(w) + "\n")

In [75]:
with open("sr_embeds.txt", "w") as embeds_file, open("sr_labels.txt", "w") as labels_file:
    for (s, r), df in fcp.df.groupby(["SOURCE", "RECEIVER"]):
        sr_embed = s_embeds[s, :] + r_embeds[r, :]
        embeds_file.write("\t".join([str(sr_val) for sr_val in sr_embed]) + "\n")
        labels_file.write(fcp.twoway_maps["SOURCE"]["idx_to_col"][s] + "-" + fcp.twoway_maps["RECEIVER"]["idx_to_col"][r] + "\n")    
        

# Word analysis per SR pair

In [121]:
fcp.df["WORD"].value_counts()[fcp.twoway_maps["WORD"]["col_to_idx"]["eye"]]

7

In [124]:
# Words at least certain count avoiding vectors that haven't been touched
valid_words = (fcp.df["WORD"].value_counts().sort_index() > 1000).values

In [125]:
for (s, r), df in fcp.df.groupby(["SOURCE", "RECEIVER"]):
    print(fcp.twoway_maps["SOURCE"]["idx_to_col"][s], fcp.twoway_maps["RECEIVER"]["idx_to_col"][r], df.shape[0])
    sr_word_prod = np.dot(sr_embed, w_embeds.T)
    print([fcp.twoway_maps["WORD"]["idx_to_col"][word_idx] for word_idx in np.argsort(sr_word_prod) if valid_words[word_idx]][-5:])

IGOUNO IRQ 5374
['maintain', 'ally', 'have', 'deploy', 'troops']
IGOUNO FRA 630
['maintain', 'ally', 'have', 'deploy', 'troops']
IGOUNO USA 2857
['maintain', 'ally', 'have', 'deploy', 'troops']
IGOUNO GBR 475
['maintain', 'ally', 'have', 'deploy', 'troops']
IGOUNO ISR 2151
['maintain', 'ally', 'have', 'deploy', 'troops']
IGOUNO PSE 1143
['maintain', 'ally', 'have', 'deploy', 'troops']
IGOUNO DEU 312
['maintain', 'ally', 'have', 'deploy', 'troops']
IGOUNO BIH 1530
['maintain', 'ally', 'have', 'deploy', 'troops']
IGOUNO IND 277
['maintain', 'ally', 'have', 'deploy', 'troops']
IGOUNO PRK 958
['maintain', 'ally', 'have', 'deploy', 'troops']
IGOUNO CIV 480
['maintain', 'ally', 'have', 'deploy', 'troops']
IGOUNO IGOEEC 350
['maintain', 'ally', 'have', 'deploy', 'troops']
IGOUNO SYR 862
['maintain', 'ally', 'have', 'deploy', 'troops']
IGOUNO CHN 898
['maintain', 'ally', 'have', 'deploy', 'troops']
IGOUNO IGONAT 516
['maintain', 'ally', 'have', 'deploy', 'troops']
IGOUNO RUS 559
['maintain', '

KOR CHN 508
['maintain', 'ally', 'have', 'deploy', 'troops']
KOR JPN 710
['maintain', 'ally', 'have', 'deploy', 'troops']
IND USA 976
['maintain', 'ally', 'have', 'deploy', 'troops']
IND GBR 1042
['maintain', 'ally', 'have', 'deploy', 'troops']
IND IGOUNO 405
['maintain', 'ally', 'have', 'deploy', 'troops']
IND CHN 444
['maintain', 'ally', 'have', 'deploy', 'troops']
IND RUS 356
['maintain', 'ally', 'have', 'deploy', 'troops']
IND PAK 5415
['maintain', 'ally', 'have', 'deploy', 'troops']
GRC IGOUNO 512
['maintain', 'ally', 'have', 'deploy', 'troops']
GRC IGOEEC 953
['maintain', 'ally', 'have', 'deploy', 'troops']
GRC MKD 304
['maintain', 'ally', 'have', 'deploy', 'troops']
GRC TUR 973
['maintain', 'ally', 'have', 'deploy', 'troops']
GRC CYP 679
['maintain', 'ally', 'have', 'deploy', 'troops']
ISR IRQ 302
['maintain', 'ally', 'have', 'deploy', 'troops']
ISR USA 2326
['maintain', 'ally', 'have', 'deploy', 'troops']
ISR EGY 1486
['maintain', 'ally', 'have', 'deploy', 'troops']
ISR PSE 295

['maintain', 'ally', 'have', 'deploy', 'troops']
HRV IGOUNO 366
['maintain', 'ally', 'have', 'deploy', 'troops']
HRV SRB 227
['maintain', 'ally', 'have', 'deploy', 'troops']
TWN USA 592
['maintain', 'ally', 'have', 'deploy', 'troops']
TWN IGOUNO 283
['maintain', 'ally', 'have', 'deploy', 'troops']
TWN CHN 1759
['maintain', 'ally', 'have', 'deploy', 'troops']
CUB USA 936
['maintain', 'ally', 'have', 'deploy', 'troops']
UKR IGONAT 294
['maintain', 'ally', 'have', 'deploy', 'troops']
UKR RUS 706
['maintain', 'ally', 'have', 'deploy', 'troops']
TMP IDN 258
['maintain', 'ally', 'have', 'deploy', 'troops']
CHNTIB CHN 287
['maintain', 'ally', 'have', 'deploy', 'troops']
LBY USA 444
['maintain', 'ally', 'have', 'deploy', 'troops']
LBY IGOUNO 380
['maintain', 'ally', 'have', 'deploy', 'troops']
ESP IRQ 764
['maintain', 'ally', 'have', 'deploy', 'troops']
ESP USA 538
['maintain', 'ally', 'have', 'deploy', 'troops']
ESP IGOUNO 317
['maintain', 'ally', 'have', 'deploy', 'troops']
ESP IGOEEC 416
['

1

In [None]:
# Need to remove words that appeared < threshold