In [1]:
import os
import sys
import inspect
currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
parentdir = os.path.dirname(currentdir)
sys.path.insert(0, parentdir)

import pandas as pd
from functions import dirs, readFile, canonicalNames, readSet
from wordEmbedders import WESCScore
import pickle
import chevron
from re import sub
from latexTable import LatexTable

In [2]:
picked = readSet('../data/Dimensions-Picked-Final.txt')

In [3]:
result = pickle.load(open("../data/AirlineTweets2/Regression-Positive.pickle", mode='rb'))

In [4]:
result.params

const                                1.659492
nrPossesives/nrWords                 6.238790
nrStrongSentimentWords/nrWords      -0.703717
nrConjunctions/nrWords              -6.137505
nrSynsets/nrWords                   -0.058234
nrAdjectives/nrWords                -1.288943
opinionPolarity                     -0.094735
nrDeterminers/nrWords               -3.478717
nrAmbiguousSentimentWords/nrWords   -1.704586
nrDifficultWordsDaleChall/nrWords   -2.341873
nrDifficultWordsSAT/nrWords         -1.743940
nrSlangWords/nrWords                 2.663263
uniquenessMean                       4.854494
nrWords/nrSentences                  0.033311
uniquenessSTD                        6.711025
nrAdverbs/nrWords                    0.092676
nrLetters/nrWords                   -0.439648
nrComplexVerbs/nrWords               3.046090
nrSentences                          0.615171
dtype: float64

In [5]:
def stars(z):
    (coef, p) = z
    res = f"{coef:.2f}"
    if p < 0.01:
        res += '*'
    if p < 0.05:
        res += '*'
    if p < 0.1:
        res += '*'
    if coef > 0:
        res = "\phantom{-}" + res
    return res

In [6]:
datasets = dirs('../data')
names = canonicalNames('../data')
sentiments = ['Positive', 'Negative']

results = {}
df = pd.DataFrame()
for dataset in datasets:
    for sentiment in sentiments:
        regressionFile = f"../data/{dataset}/Regression-{sentiment}.pickle"
        result = pickle.load(open(regressionFile, mode='rb'))
        coefs = list(map(stars, zip(result.params, result.pvalues)))
        col = [result.prsquared] + coefs
        df[f"{dataset}-{sentiment}"] = col

In [7]:
df.index = ['r2'] + list(result.params.index)
df.reindex([
    'r2', 'nrPossesives/nrWords', 'nrStrongSentimentWords/nrWords',
       'nrConjunctions/nrWords', 'nrSynsets/nrWords', 'nrAdjectives/nrWords',
       'opinionPolarity', 'nrDeterminers/nrWords',
       'nrAmbiguousSentimentWords/nrWords',
       'nrDifficultWordsDaleChall/nrWords', 'nrDifficultWordsSAT/nrWords',
       'nrSlangWords/nrWords', 'nrWords/nrSentences', 'uniquenessMean', 
       'uniquenessSTD', 'nrAdverbs/nrWords', 'nrLetters/nrWords',
       'nrComplexVerbs/nrWords',
       'const'
])

Unnamed: 0,AirlineTweets2-Positive,AirlineTweets2-Negative,IMDB-Positive,IMDB-Negative
r2,0.18786,0.169807,0.20689,0.201375
nrPossesives/nrWords,\phantom{-}6.24***,-6.88***,\phantom{-}2.36**,-2.79
nrStrongSentimentWords/nrWords,-0.70,\phantom{-}8.80***,\phantom{-}46.57***,\phantom{-}14.94***
nrConjunctions/nrWords,-6.14***,\phantom{-}8.69***,\phantom{-}7.27***,-8.61***
nrSynsets/nrWords,-0.06**,\phantom{-}0.01,\phantom{-}0.23***,-0.26***
nrAdjectives/nrWords,-1.29,\phantom{-}1.14**,\phantom{-}6.86***,-3.08*
opinionPolarity,-0.09,\phantom{-}0.05*,\phantom{-}0.17***,-0.12***
nrDeterminers/nrWords,-3.48***,\phantom{-}5.15***,-2.14***,\phantom{-}8.42***
nrAmbiguousSentimentWords/nrWords,-1.70,\phantom{-}6.30***,\phantom{-}6.77***,-1.83
nrDifficultWordsDaleChall/nrWords,-2.34***,\phantom{-}1.76***,\phantom{-}11.90***,-14.56***


In [8]:
finalNames = pickle.load(open('./FinalNames.pickle', mode='rb'))

In [9]:
picked = list(picked)
picked.sort(key=lambda f: list(finalNames['dimensionNames'].keys()).index(f) )

In [10]:
picked

['nrConjunctions/nrWords',
 'nrAdjectives/nrWords',
 'nrAdverbs/nrWords',
 'nrComplexVerbs/nrWords',
 'nrPossesives/nrWords',
 'nrDeterminers/nrWords',
 'uniquenessMean',
 'uniquenessSTD',
 'nrLetters/nrWords',
 'nrSynsets/nrWords',
 'nrSlangWords/nrWords',
 'nrWords/nrSentences',
 'nrDifficultWordsSAT/nrWords',
 'nrDifficultWordsDaleChall/nrWords',
 'opinionPolarity',
 'nrAmbiguousSentimentWords/nrWords',
 'nrStrongSentimentWords/nrWords']

In [12]:
order = picked + ['const']
finalNames['dimensionNames']['const'] = 'constant'


coefs = list(map(lambda row: [finalNames['dimensionNames'][row]] + list(df.loc[row]), order))



table = LatexTable()
table.emptyFirstHeader = True
table.boldHeaders = False
table.nrColumns = 5
table.customHeader = "\multicolumn{1}{c|}{} & \multicolumn{2}{c|}{\\textbf{AirlineTweets}} & \multicolumn{2}{c|}{\\textbf{IMDB}}\\\\ \cline{2-5}\n\multicolumn{1}{c|}{} & \\textbf{Positive} & \\textbf{Negative} & \\textbf{Positive} & \\textbf{Negative} \\\\ \\hline\n"
table.headers = ['\multicolumn{2}{|c|}{\\textbf{AirlineTweets}}', '\multicolumn{2}{|c|}{\\textbf{IMDB}}']
table.rows = [
    ['Psuedo-R\\textsuperscript{2}'] + list(map(lambda x: f"{x:.2f}", df.loc['r2'])),
    '!boldLine'
] + coefs
results = table.render()
outputFile = 'Results-Regression.tex'
with open(outputFile, mode='w') as output:
    output.write(results)
print(results)

\begin{tabular}{|l|l|l|l|l|} \cline{2-5}
\multicolumn{1}{c|}{} & \multicolumn{2}{c|}{\textbf{AirlineTweets}} & \multicolumn{2}{c|}{\textbf{IMDB}}\\ \cline{2-5}
\multicolumn{1}{c|}{} & \textbf{Positive} & \textbf{Negative} & \textbf{Positive} & \textbf{Negative} \\ \hline
\textbf{Psuedo-R\textsuperscript{2}} & 0.19 & 0.17 & 0.21 & 0.20\\ \hline
 \multicolumn{3}{c}{} \\ [-1.5ex] \hline
\textbf{nrConjunctions/nrWords} & -6.14*** & \phantom{-}8.69*** & \phantom{-}7.27*** & -8.61*** \\ \hline
\textbf{nrAdjectives/nrWords} & -1.29 & \phantom{-}1.14** & \phantom{-}6.86*** & -3.08* \\ \hline
\textbf{nrAdverbs/nrWords} & \phantom{-}0.09 & \phantom{-}0.12 & -7.84*** & \phantom{-}9.93*** \\ \hline
\textbf{nrComplexVerbs/nrWords} & \phantom{-}3.05** & -0.22 & -16.15*** & \phantom{-}22.59*** \\ \hline
\textbf{nrPossesives/nrWords} & \phantom{-}6.24*** & -6.88*** & \phantom{-}2.36** & -2.79 \\ \hline
\textbf{nrDeterminers/nrWords} & -3.48*** & \phantom{-}5.15*** & -2.14*** & \phantom{-}8.42*** \\ \h