In [1]:
import os
import sys
import inspect
currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
parentdir = os.path.dirname(currentdir)
sys.path.insert(0, parentdir)

import pandas as pd
from functions import dirs, readFile, VIF, canonicalNames, readSet
from wordEmbedders import WESCScore
import pickle
import chevron
from re import sub
from latexTable import LatexTable

In [2]:
datasets = dirs('../data')
names = canonicalNames('../data')
picked = readSet('../data/Dimensions-Picked-Final.txt')
sentiments = ['positive', 'negative']
results = pd.DataFrame()
for dataset in datasets:
    dimsFile = f"../data/{dataset}/Dimensions.csv"
    predictionFile = f"../data/{dataset}/Word2Vec-Prediction.csv"
    dims = pd.read_csv(dimsFile)
    pred = pd.read_csv(predictionFile)
    df = dims.join(pred)
    for sentiment in sentiments:
        data = df[df['truth'] == sentiment]
        vif = VIF(data[picked])
        results[f'{dataset}-{sentiment}'] = vif['VIF']

In [3]:
results = results.drop(index='const')

In [4]:
finalNames = pickle.load(open('./FinalNames.pickle', mode='rb'))

In [5]:
picked = list(picked)
picked.sort(key=lambda f: list(finalNames['dimensionNames'].keys()).index(f) )

In [6]:


rows = list(map(lambda row: [finalNames['dimensionNames'][row]] + list(map(lambda f: f"{f:.2f}", results.loc[row])), picked))

table = LatexTable()
table.emptyFirstHeader = True
table.boldHeaders = False
table.nrColumns = 5
table.customHeader = "\multicolumn{1}{c|}{} & \multicolumn{2}{c|}{\\textbf{AirlineTweets}} & \multicolumn{2}{c|}{\\textbf{IMDB}}\\\\ \cline{2-5}\n\multicolumn{1}{c|}{} & \\textbf{Positive} & \\textbf{Negative} & \\textbf{Positive} & \\textbf{Negative} \\\\ \\hline\n"
table.rows = rows + [
    "!boldLine",
    ["Mean"] + list(map(lambda f: f"{f:.2f}", results.mean(axis=0)))
]
results = table.render()
outputFile = 'Results-VIF.tex'
with open(outputFile, mode='w') as output:
    output.write(results)
print(results)

\begin{tabular}{|l|l|l|l|l|} \cline{2-5}
\multicolumn{1}{c|}{} & \multicolumn{2}{c|}{\textbf{AirlineTweets}} & \multicolumn{2}{c|}{\textbf{IMDB}}\\ \cline{2-5}
\multicolumn{1}{c|}{} & \textbf{Positive} & \textbf{Negative} & \textbf{Positive} & \textbf{Negative} \\ \hline
\textbf{nrConjunctions/nrWords} & 1.53 & 1.27 & 1.29 & 1.28 \\ \hline
\textbf{nrAdjectives/nrWords} & 1.54 & 1.27 & 1.50 & 1.48 \\ \hline
\textbf{nrAdverbs/nrWords} & 1.24 & 1.19 & 1.18 & 1.19 \\ \hline
\textbf{nrComplexVerbs/nrWords} & 1.12 & 1.06 & 1.10 & 1.10 \\ \hline
\textbf{nrPossesives/nrWords} & 2.79 & 1.55 & 1.93 & 1.71 \\ \hline
\textbf{nrDeterminers/nrWords} & 1.40 & 1.26 & 1.48 & 1.45 \\ \hline
\textbf{uniquenessMean} & 2.03 & 1.72 & 3.23 & 3.17 \\ \hline
\textbf{uniquenessSTD} & 1.39 & 1.47 & 3.29 & 3.16 \\ \hline
\textbf{nrLetters/nrWords} & 1.69 & 1.55 & 1.99 & 1.83 \\ \hline
\textbf{nrSynsets/nrWords} & 1.38 & 1.20 & 1.33 & 1.36 \\ \hline
\textbf{nrSlangWords/nrWords} & 1.05 & 1.07 & 1.01 & 1.01 \\ \hli

AirlineTweets2-positive     7.637811
AirlineTweets2-negative     9.742087
IMDB-positive              48.328242
IMDB-negative              53.925573
dtype: float64