In [62]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from tabulate import tabulate
from sklearn.decomposition import PCA
from gensim.models.doc2vec import Doc2Vec
import os
import re

import sys
sys.path.append('utils')
import labels
import plots 
import interpret
import accuracy
from accuracy import pairwise_accuracy
from interpret import Interpret

#import utils.labels as labels
#import utils.plots as plots
#from utils.interpret import Interpret
#from utils.accuracy import pairwise_accuracy

## Figure 3- Party Placement in Canada

In [63]:
# load in Canada model

canmodel = Doc2Vec.load('model/updated_model')

can_dict = labels.party_labels('Canada')
cannames, canparties, cancols, canmkers = labels.party_tags(canmodel, 'Canada')

# preprocess the keys in canparties to have it match the inverted can_dict
#canparties_processed = [party.replace('_', ' ') for party in canparties]

# invert the dictionary to map from values to keys
can_dict_inverted = {value: key for key, value in can_dict.items()}

# changing this so that it only looks at ones that are actually in the dictionary
canlabs = [can_dict_inverted[p] for p in canparties if p in can_dict_inverted]
Mcan = canmodel.vector_size
#Pcan = len(canparties_processed)
Pcan = len(canparties)


In [64]:
zcan = np.zeros((Pcan, Mcan))
for i in range(Pcan):
    zcan[i,:] = canmodel.docvecs[canparties[i]]
pca_can = PCA(n_components = 2)
Zcan = pd.DataFrame(pca_can.fit_transform(zcan), columns = ['dim1', 'dim2'])

# GRACE NEED TO GET LABELS TO WORK
Zcan['label'] = canparties

  zcan[i,:] = canmodel.docvecs[canparties[i]]


In [65]:
Zcan

Unnamed: 0,dim1,dim2,label
0,8.263308,-9.224139,New Democratic Party_2013
1,8.472222,-9.399086,New Democratic Party_2014
2,5.877826,-10.291950,New Democratic Party_2015
3,7.701108,-8.616850,New Democratic Party_2012
4,-5.111331,-11.023739,New Democratic Party_1988
...,...,...,...
509,-11.894556,-2.139780,Conservative_2005
510,-8.705391,0.334099,Progressive Conservative_2002
511,-8.015775,0.937191,Progressive Conservative_2003
512,-9.517301,-1.676125,Conservative_2004


In [66]:
# change all instances of 'New democratic party' to NDP

Zcan['label'] = Zcan['label'].apply(lambda x: re.sub(r'New Democratic Party', 'NDP', x))
Zcan['label'] = Zcan['label'].apply(lambda x: re.sub(r'Conservative', 'Cons', x))
Zcan['label'] = Zcan['label'].apply(lambda x: re.sub(r'Bloc Québécois', 'Bloc', x))
# had to do my own research to find out that reform party/canadian alliance merged with the progressive conservative party
Zcan['label'] = Zcan['label'].apply(lambda x: re.sub(r'Progressive Cons', 'RefAll', x))

In [70]:
# remove underscores from labels
Zcan['label'] = Zcan['label'].str.replace('_', ' ')

In [67]:
# Re-orienting the first axis for substantive interpretation:
if Zcan[Zcan.label=='NDP_2015'].dim1.values[0] > Zcan[Zcan.label=='Cons_2015'].dim1.values[0]:
    Zcan['dim1'] = Zcan.dim1 * (-1)

In [28]:
plots.plot_3b(Zcan, canlabs, cancols, canmkers, savepath='figures/figure3b.pdf')
print("Saved Figure 3b to file figures/figure3b.pdf")

Saved Figure 3b to file figures/figure3b.pdf


## Table 2- accuracy of placements 

In [71]:
Zcan

Unnamed: 0,dim1,dim2,label
0,-8.263308,-9.224139,NDP 2013
1,-8.472222,-9.399086,NDP 2014
2,-5.877826,-10.291950,NDP 2015
3,-7.701108,-8.616850,NDP 2012
4,5.111331,-11.023739,NDP 1988
...,...,...,...
509,11.894556,-2.139780,Cons 2005
510,8.705391,0.334099,RefAll 2002
511,8.015775,0.937191,RefAll 2003
512,9.517301,-1.676125,Cons 2004


In [72]:
gold_can = pd.read_csv('data/goldstandard_canada.csv').merge(Zcan, on='label', how='left')

In [81]:
gold_scores = ['voteview', 'experts_stand', 'rile', 'vanilla', 'legacy']
countries = [('Canada', gold_can)]
results = np.zeros(( 10, 1 ), dtype=object)

for idx, (c, df) in enumerate(countries):
    jdx = 0
    for g in gold_scores:
        if g=='voteview' and 'voteview' not in df.columns:
            results[jdx:(jdx+2),idx] = ['','']
        else:
            temp = df[pd.notnull(df[g])]
            corr = '%0.3f' %temp.dim1.corr(temp[g])
            acc = '%0.2f%%' %pairwise_accuracy(temp[g].tolist(), temp.dim1.tolist())
            results[jdx:(jdx+2),idx] = [corr, acc]
        jdx += 2

results = pd.DataFrame(results, columns = [c for c,df in countries])
results.insert(loc=0,column='Metric',value=['Correlation', 'Accuracy']*5)
results.insert(loc=0,column='Gold Standard',value=[item for item in ['Voteview', 'Experts Surveys', 'rile', 'vanilla', 'legacy'] for i in range(2)])

In [82]:
results

Unnamed: 0,Gold Standard,Metric,Canada
0,Voteview,Correlation,
1,Voteview,Accuracy,
2,Experts Surveys,Correlation,0.116
3,Experts Surveys,Accuracy,19.70%
4,rile,Correlation,-0.131
5,rile,Accuracy,20.35%
6,vanilla,Correlation,-0.074
7,vanilla,Accuracy,20.66%
8,legacy,Correlation,-0.180
9,legacy,Accuracy,19.49%


In [83]:
with open('tables/table2.txt', 'w') as f:
    print("Table 2: Accuracy of Party Placement against Gold Standards\n"+"-"*83, file=f)
    print(tabulate(results, headers="keys", showindex=False, tablefmt="orgtbl"), file=f)
print("Saved Table 2 to file tables/table2.txt")

Saved Table 2 to file tables/table2.txt
