In [1]:
import os
import sys
import inspect
currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
parentdir = os.path.dirname(currentdir)
sys.path.insert(0, parentdir)

import pandas as pd
from functions import dirs, readFile, VIF, canonicalNames, readSet
from wordEmbedders import WESCScore
import pickle
import chevron
from re import sub
from latexTable import LatexTable

In [2]:
finalNames = pickle.load(open('./FinalNames.pickle', mode='rb'))
datasets = finalNames['datasets'].keys()
names = canonicalNames('../data')
picked = finalNames['picked']
sentiments = ['positive', 'negative']
results = pd.DataFrame()
for dataset in datasets:
    dimsFile = f"../data/{dataset}/Dimensions.csv"
    predictionFile = f"../data/{dataset}/Word2Vec-Prediction.csv"
    dims = pd.read_csv(dimsFile)
    pred = pd.read_csv(predictionFile)
    df = dims.join(pred)
    for sentiment in sentiments:
        data = df[df['truth'] == sentiment]
        vif = VIF(data[picked])
        results[f'{dataset}-{sentiment}'] = vif['VIF']

In [3]:
result = results.drop(index='const')

In [4]:
rows = []
finalNames['dimensions']['syntactic'] += finalNames['dimensions']['postagwords']
for aspect in [x for x in finalNames['dimensions'] if not x == 'postagwords']:
    first = True
    picked = [x for x in finalNames['dimensions'][aspect] if x in finalNames['picked']]
    n = len(picked)
    for dimension in picked:
        firstCell = ''
        if first:
            firstCell = "\parbox[t]{2mm}{\multirow{"+str(n)+"}{*}{\\rotatebox[origin=c]{90}{"+finalNames['aspects'][aspect]+"}}}"
            first = False
        row = [firstCell, finalNames['dimensionNames'][dimension]] + list(map(lambda f: f"{f:.2f}", result.loc[dimension]))
        rows.append(row)


In [5]:
#rows = list(map(lambda row: [finalNames['dimensionNames'][row]] + list(map(lambda f: f"{f:.2f}", results.loc[row])), picked))

table = LatexTable()
table.emptyFirstHeader = True
table.boldHeaders = False
table.boldIndexColumn = False
table.nrColumns = 6
table.customHeader = "\multicolumn{2}{c|}{} & \multicolumn{2}{c|}{\\textbf{Airline tweets}} & \multicolumn{2}{c|}{\\textbf{IMDb reviews}}\\\\ \cline{3-6}\n\multicolumn{2}{c|}{} & \\textbf{Positive} & \\textbf{Negative} & \\textbf{Positive} & \\textbf{Negative} \\\\ \\hline\n"
table.rows = rows + [
    "!boldLine",
    ['\multicolumn{1}{c|}{}', "Average VIF"] + list(map(lambda f: f"{f:.2f}", results.drop('const').mean(axis=0)))
]
results = table.render()
results = sub('2\-6', '3-6', results)
rows = results.split("\n")
out = []
for i, r in enumerate(rows):
    if i + 1 < len(rows):
        nxt = rows[i+1]
    else:
        nxt = None
    if nxt is None or nxt[0] == ' ' or nxt == '\end{tabular}':
        r = sub(r"\\hline", r"\\cline{2-6}", r)
    out.append(r)
results = '\n'.join(out)
outputFile = 'Results-VIF.tex'
with open(outputFile, mode='w') as output:
    output.write(results)
print(results)

\begin{tabular}{|l|l|l|l|l|l|} \cline{3-6}
\multicolumn{2}{c|}{} & \multicolumn{2}{c|}{\textbf{Airline tweets}} & \multicolumn{2}{c|}{\textbf{IMDb reviews}}\\ \cline{3-6}
\multicolumn{2}{c|}{} & \textbf{Positive} & \textbf{Negative} & \textbf{Positive} & \textbf{Negative} \\ \hline
\parbox[t]{2mm}{\multirow{3}{*}{\rotatebox[origin=c]{90}{Lexical}}} & nrLetters/nrWords & 1.69 & 1.55 & 1.99 & 1.83 \\ \cline{2-6}
 & uniquenessMean & 2.03 & 1.72 & 3.23 & 3.17 \\ \cline{2-6}
 & uniquenessSTD & 1.39 & 1.47 & 3.29 & 3.16 \\ \hline
\parbox[t]{2mm}{\multirow{4}{*}{\rotatebox[origin=c]{90}{Semantic}}} & nrSynsets/nrWords & 1.38 & 1.20 & 1.33 & 1.36 \\ \cline{2-6}
 & nrSlangWords/nrWords & 1.05 & 1.07 & 1.01 & 1.01 \\ \cline{2-6}
 & nrHardWordsSAT/nrWords & 1.04 & 1.04 & 1.26 & 1.22 \\ \cline{2-6}
 & nrHardWordsDC/nrWords & 2.42 & 1.79 & 2.22 & 1.98 \\ \hline
\parbox[t]{2mm}{\multirow{7}{*}{\rotatebox[origin=c]{90}{Syntactic}}} & nrWords/nrSentences & 1.62 & 1.22 & 1.05 & 1.05 \\ \cline{2-6}
 & n