In [1]:
import os
import sys
import inspect
currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
parentdir = os.path.dirname(currentdir)
sys.path.insert(0, parentdir)

import pandas as pd
from functions import dirs, readFile, canonicalNames, readSet
from wordEmbedders import WESCScore
import pickle
import chevron
from re import sub
from latexTable import LatexTable

In [2]:
def stars(z):
    (coef, p) = z
    res = f"{coef:.2f}"
    if p < 0.01:
        res += '*'
    if p < 0.05:
        res += '*'
    if p < 0.1:
        res += '*'
    if coef > 0:
        res = "\phantom{-}" + res
    return res

In [3]:
finalNames = pickle.load(open('./FinalNames.pickle', mode='rb'))
datasets = finalNames['datasets'].keys()
sentiments = ['Positive', 'Negative']

results = {}
df = pd.DataFrame()
for dataset in datasets:
    for sentiment in sentiments:
        regressionFile = f"../data/{dataset}/Regression-{sentiment}.pickle"
        result = pickle.load(open(regressionFile, mode='rb'))
        coefs = list(map(stars, zip(result.params, result.pvalues)))
        col = [result.prsquared] + coefs
        df[f"{dataset}-{sentiment}"] = col

In [4]:
df.index = ['r2'] + list(result.params.index)
order = finalNames['picked'] + ['const']
finalNames['dimensionNames']['const'] = 'constant'
coefs = list(map(lambda row: [finalNames['dimensionNames'][row]] + list(df.loc[row]), order))

In [5]:
table = LatexTable()
table.emptyFirstHeader = True
table.boldHeaders = False
table.nrColumns = 5
table.customHeader = "\multicolumn{1}{c|}{} & \multicolumn{2}{c|}{\\textbf{AirlineTweets}} & \multicolumn{2}{c|}{\\textbf{IMDB}}\\\\ \cline{2-5}\n\multicolumn{1}{c|}{} & \\textbf{Positive} & \\textbf{Negative} & \\textbf{Positive} & \\textbf{Negative} \\\\ \\hline\n"
table.headers = ['\multicolumn{2}{|c|}{\\textbf{AirlineTweets}}', '\multicolumn{2}{|c|}{\\textbf{IMDB}}']
table.rows = [
    ['Psuedo-R\\textsuperscript{2}'] + list(map(lambda x: f"{x:.2f}", df.loc['r2'])),
    '!boldLine'
] + coefs
results = table.render()
outputFile = 'Results-Regression.tex'
with open(outputFile, mode='w') as output:
    output.write(results)
print(results)

\begin{tabular}{|l|l|l|l|l|} \cline{2-5}
\multicolumn{1}{c|}{} & \multicolumn{2}{c|}{\textbf{AirlineTweets}} & \multicolumn{2}{c|}{\textbf{IMDB}}\\ \cline{2-5}
\multicolumn{1}{c|}{} & \textbf{Positive} & \textbf{Negative} & \textbf{Positive} & \textbf{Negative} \\ \hline
\textbf{Psuedo-R\textsuperscript{2}} & 0.19 & 0.17 & 0.21 & 0.20\\ \hline
 \multicolumn{3}{c}{} \\ [-1.5ex] \hline
\textbf{nrLetters/nrWords} & -0.44*** & \phantom{-}0.58*** & \phantom{-}1.83*** & -1.69*** \\ \hline
\textbf{uniquenessMean} & \phantom{-}4.85*** & \phantom{-}3.39*** & -12.67*** & \phantom{-}21.25*** \\ \hline
\textbf{uniquenessSTD} & \phantom{-}6.71** & -1.11 & \phantom{-}16.41*** & -21.50*** \\ \hline
\textbf{nrSynsets/nrWords} & -0.06** & \phantom{-}0.01 & \phantom{-}0.23*** & -0.26*** \\ \hline
\textbf{nrSlangWords/nrWords} & \phantom{-}2.66 & -7.53*** & -0.41 & \phantom{-}0.96 \\ \hline
\textbf{nrHardWordsSAT/nrWords} & -1.74 & -3.56*** & -10.18*** & \phantom{-}14.46*** \\ \hline
\textbf{nrHardWordsD