In [33]:
import os
import sys
import inspect
currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
parentdir = os.path.dirname(currentdir)
sys.path.insert(0, parentdir)

import pandas as pd
from functions import dirs, readFile, canonicalNames, readSet
from wordEmbedders import WESCScore
import pickle
import chevron
from re import sub
from latexTable import LatexTable

In [34]:
def stars(z):
    (coef, p) = z
    res = f"{coef:.2f}"
    if p < 0.01:
        res += '*'
    if p < 0.05:
        res += '*'
    if p < 0.1:
        res += '*'
    if coef > 0:
        res = "\phantom{-}" + res
    return res

In [35]:
finalNames = pickle.load(open('./FinalNames.pickle', mode='rb'))
datasets = finalNames['datasets'].keys()
sentiments = ['Positive', 'Negative']

results = {}
df = pd.DataFrame()
for dataset in datasets:
    for sentiment in sentiments:
        regressionFile = f"../data/{dataset}/Regression-{sentiment}.pickle"
        result = pickle.load(open(regressionFile, mode='rb'))
        coefs = list(map(stars, zip(result.params, result.pvalues)))
        col = [result.prsquared] + coefs
        df[f"{dataset}-{sentiment}"] = col

In [36]:
df.index = ['r2'] + list(result.params.index)
order = finalNames['picked'] + ['const']
rows = []
finalNames['dimensions']['syntactic'] += finalNames['dimensions']['postagwords']
for aspect in [x for x in finalNames['dimensions'] if not x == 'postagwords']:
    first = True
    picked = [x for x in finalNames['dimensions'][aspect] if x in order]
    n = len(picked)
    for dimension in picked:
        firstCell = ''
        if first:
            firstCell = "\parbox[t]{2mm}{\multirow{"+str(n)+"}{*}{\\rotatebox[origin=c]{90}{"+finalNames['aspects'][aspect]+"}}}"
            first = False
        row = [firstCell, finalNames['dimensionNames'][dimension]] + list(df.loc[dimension])
        rows.append(row)



rows.append(['\multicolumn{1}{c|}{}', 'Constant'] + list(df.loc['const']))

#finalNames['dimensionNames']['const'] = 'constant'
#coefs = list(map(lambda row: [finalNames['dimensionNames'][row]] + list(df.loc[row]), order))


In [37]:
table = LatexTable()
table.emptyFirstHeader = True
table.boldHeaders = False
table.boldIndexColumn = False
table.nrColumns = 6
table.customHeader = "\multicolumn{2}{c|}{} & \multicolumn{2}{c|}{\\textbf{Airline tweets}} & \multicolumn{2}{c|}{\\textbf{IMDBb reviews}}\\\\ \cline{3-6}\n\multicolumn{2}{c|}{} & \\textbf{Positive} & \\textbf{Negative} & \\textbf{Positive} & \\textbf{Negative} \\\\ \\hline\n"
table.headers = ['\multicolumn{2}{|c|}{\\textbf{AirlineTweets}}', '\multicolumn{2}{|c|}{\\textbf{IMDB}}']
table.rows = [
    [' \multicolumn{1}{c|}{}', 'Psuedo-R\\textsuperscript{2}'] + list(map(lambda x: f"{x:.2f}", df.loc['r2'])),
    '!boldLine'
] + rows
results = table.render()
results = sub('2\-6', '3-6', results)
rows = results.split("\n")
out = []
for i, r in enumerate(rows):
    if i + 1 < len(rows):
        nxt = rows[i+1]
    else:
        nxt = None
    if nxt is None or nxt[0] == ' ' or nxt == '\end{tabular}':
        r = sub(r"\\hline", r"\\cline{2-6}", r)
    out.append(r)
results = '\n'.join(out)

outputFile = 'Results-Regression.tex'
with open(outputFile, mode='w') as output:
    output.write(results)
print(results)

\begin{tabular}{|l|l|l|l|l|l|} \cline{3-6}
\multicolumn{2}{c|}{} & \multicolumn{2}{c|}{\textbf{Airline tweets}} & \multicolumn{2}{c|}{\textbf{IMDBbreviews}}\\ \cline{3-6}
\multicolumn{2}{c|}{} & \textbf{Positive} & \textbf{Negative} & \textbf{Positive} & \textbf{Negative} \\ \cline{2-6}
 \multicolumn{1}{c|}{} & Psuedo-R\textsuperscript{2} & 0.19 & 0.17 & 0.21 & 0.20\\ \cline{2-6}
 \multicolumn{3}{c}{} \\ [-1.5ex] \hline
\parbox[t]{2mm}{\multirow{3}{*}{\rotatebox[origin=c]{90}{Lexical}}} & nrLetters/nrWords & -0.44*** & \phantom{-}0.58*** & \phantom{-}1.83*** & -1.69*** \\ \cline{2-6}
 & uniquenessMean & \phantom{-}4.85*** & \phantom{-}3.39*** & -12.67*** & \phantom{-}21.25*** \\ \cline{2-6}
 & uniquenessSTD & \phantom{-}6.71** & -1.11 & \phantom{-}16.41*** & -21.50*** \\ \hline
\parbox[t]{2mm}{\multirow{4}{*}{\rotatebox[origin=c]{90}{Semantic}}} & nrSynsets/nrWords & -0.06** & \phantom{-}0.01 & \phantom{-}0.23*** & -0.26*** \\ \cline{2-6}
 & nrSlangWords/nrWords & \phantom{-}2.66 & -7.