# Load Trained SR Model

In [1]:
# Import relevant modules
% matplotlib notebook

import sys
sys.path.append("../")

from Models.SGNS import SourceReceiverClassifier, SourceReceiverModel
from Preprocessing.FullContextProcessor import FullContextProcessor

import numpy as np
import pandas as pd
import torch
import pickle

In [3]:
# Load mappings and original data
fcp = FullContextProcessor(data_fpath="../Data/OConnor2013/ocon-verb-noun-extracted.txt", sep="\t")

# Create mappings
fcp.createTwoWayMap("SOURCE")
fcp.createTwoWayMap("RECEIVER")
fcp.createTwoWayMap("WORD")
fcp.convertColToIdx("SOURCE")
fcp.convertColToIdx("RECEIVER")
fcp.convertColToIdx("WORD")

# Load model
model = SourceReceiverModel(s_cnt=len(fcp.df["SOURCE"].unique()),
                            r_cnt=len(fcp.df["RECEIVER"].unique()),
                            w_cnt=len(fcp.df["WORD"].unique()),
                            K=50)

model.load_state_dict(torch.load("sr-best-20neg.pt", map_location="cpu"))

In [4]:
# Extract the emebddings into numpy arrays
s_embeds = model.s_embeds.weight.detach().numpy()
r_embeds = model.r_embeds.weight.detach().numpy()
w_embeds = model.w_embeds.weight.detach().numpy()

# Word and SR Tensorflow projection generation

In [6]:
# Write word vectors out for tensorflow projector
np.savetxt(fname="w_embeds.txt",
           X=w_embeds,
           fmt="%.8f",
           delimiter="\t",)

with open("w_labels.txt", "w") as f:
    for idx, w in fcp.twoway_maps["WORD"]["idx_to_col"].items():
        f.write(str(w) + "\n")

In [11]:
# Write source vectors out for tensorflow projector
np.savetxt(fname="s_embeds.txt",
           X=s_embeds,
           fmt="%.8f",
           delimiter="\t",)

with open("s_labels.txt", "w") as f:
    for idx, s in fcp.twoway_maps["SOURCE"]["idx_to_col"].items():
        f.write(str(s) + "\n")

In [12]:
# Write source vectors out for tensorflow projector
np.savetxt(fname="r_embeds.txt",
           X=r_embeds,
           fmt="%.8f",
           delimiter="\t",)

with open("r_labels.txt", "w") as f:
    for idx, s in fcp.twoway_maps["RECEIVER"]["idx_to_col"].items():
        f.write(str(s) + "\n")

In [5]:
with open("sr_embeds.txt", "w") as embeds_file, open("sr_labels.txt", "w") as labels_file:
    for (s, r), df in fcp.df.groupby(["SOURCE", "RECEIVER"]):
        sr_embed = s_embeds[s, :] + r_embeds[r, :]
        embeds_file.write("\t".join([str(sr_val) for sr_val in sr_embed]) + "\n")
        labels_file.write(
            fcp.twoway_maps["SOURCE"]["idx_to_col"][s] + \
            "-" + \
            fcp.twoway_maps["RECEIVER"]["idx_to_col"][r] + "\n")    
        

# Word analysis per SR pair

In [14]:
# Words at least certain count avoiding vectors that haven't been touched
valid_words = (fcp.df["WORD"].value_counts().sort_index() > 0).values

In [19]:
for (s, r), df in fcp.df.groupby(["SOURCE", "RECEIVER"]):
    print(fcp.twoway_maps["SOURCE"]["idx_to_col"][s], fcp.twoway_maps["RECEIVER"]["idx_to_col"][r], df.shape[0])
    sr_embed = s_embeds[s, :] + r_embeds[r, :]
    sr_word_prod = np.dot(sr_embed, w_embeds.T)
    print([fcp.twoway_maps["WORD"]["idx_to_col"][word_idx] for word_idx in np.argsort(sr_word_prod)][-5:])

IGOUNO IRQ 5374
['requirement', 'Symposium', 'Dollars', 'Tour', 'Party']
IGOUNO FRA 630
['representative', 'envoy', 'Tour', 'persuade', 'schedule']
IGOUNO USA 2857
['Dollars', 'Press', 'appeal', 'Party', 'Tour']
IGOUNO GBR 475
['envoy', 'Party', 'Singapore', 'Tour', 'Baghdad']
IGOUNO ISR 2151
['persuade', 'renew', 'killing', 'Tour', 'Party']
IGOUNO PSE 1143
['situation', 'Press', 'Party', 'representative', 'envisage']
IGOUNO DEU 312
['representative', 'delegate', 'Press', 'pipe', 'schedule']
IGOUNO BIH 1530
['adherence', 'peacekeeper', 'Tour', 'Party', 'party']
IGOUNO IND 277
['mandate', 'slap', 'Symposium', 'Tour', 'appeal']
IGOUNO PRK 958
['refer', 'program', 'adopt', 'inspector', 'test']
IGOUNO CIV 480
['mission', 'party', 'Party', 'peacekeeper', 'embargo']
IGOUNO IGOEEC 350
['act', 'Party', 'Tour', 'appeal', 'meeting']
IGOUNO SYR 862
['concern', 'Party', 'determine', 'direct', 'implicate']
IGOUNO CHN 898
['Singapore', 'representative', 'invitation', 'Tour', 'conclude']
IGOUNO IGONA

['hopeful', 'last', 'Tour', 'Singapore', 'Party']
USA CUB 1293
['forward', 'lease', 'Press', 'isolate', 'Bay']
USA THA 389
['renew', 'cloud', 'minister', 'Party', 'Tour']
USA SDN 812
['pinpoint', 'Tour', 'Singapore', 'Sunday', 'Party']
USA MMR 857
['prisoner', 'Press', 'Party', 'ruler', 'Tour']
USA ITA 407
['last', 'Belize', 'law', 'hopeful', 'crash']
USA GEO 806
['jockey', 'pour', 'activity', 'repel', 'basis']
USA HRV 347
['Nations', 'Singapore', 'deploy', 'Party', 'authority']
USA SAU 1228
['Press', 'shut', 'Tour', 'Singapore', 'foreigner']
USA KWT 934
['subcontractor', 'Symposium', 'invasion', 'invade', 'crash']
USA LBY 844
['Tour', 'Press', 'sanction', 'pinpoint', 'Party']
USA COL 711
['Press', 'train', 'decertify', 'crash', 'Party']
USA NGA 324
['Party', 'Singapore', 'abduction', 'cloud', 'wound']
USA POL 616
['missile', 'install', 'base', 'plan', 'interceptor']
USA PHL 1180
['Belize', 'rape', 'Symposium', 'advise', 'train']
USA VEN 468
['Party', 'enable', 'buyer', 'Singapore', 'T

['Cooperation', 'Party', 'come', 'Tour', 'map']
THA MMR 452
['gunfire', 'conclude', 'prisoner', 'Tour', 'Association']
THA KHM 380
['conclude', 'Association', 'award', 'downgrade', 'rule']
TUR IRQ 3916
['PKK', 'Party', 'cross', 'border', 'mass']
TUR USA 1510
['converge', 'PKK', 'Party', 'Press', 'Tour']
TUR ISR 778
['Mediterranean', 'converge', 'friend', 'Tour', 'Party']
TUR IGOUNO 447
['Tour', 'implementation', 'comply', 'Party', 'Press']
TUR IGOEEC 1321
['realize', 'Party', 'Tour', 'Press', 'candidate']
TUR SYR 435
['converge', 'border', 'Party', 'Istanbul', 'host']
TUR IGONAT 307
['archipelago', 'Tour', 'invoke', 'Party', 'Press']
TUR IRN 539
['Symposium', 'border', 'Party', 'suspect', 'converge']
TUR GRC 1146
['last', 'Tour', 'blueprint', 'union', 'coup']
TUR CYP 903
['year', 'delimitation', 'accession', 'guarantor', 'island']
TUR X-TURCYP 704
['maintain', 'republic', 'declare', 'Republic', 'Party']
AUS IRQ 930
['gunfire', 'invasion', 'Singapore', 'Dollars', 'Party']
AUS USA 847
['