In [1]:
import os
import sys
import inspect
currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
parentdir = os.path.dirname(currentdir)
sys.path.insert(0, parentdir)

import pandas as pd
from functions import dirs, readSet
import pickle
import chevron
from re import sub
from latexTable import LatexTable

In [2]:
dims = pickle.load(open('../data/Dimensions-All.pickle', mode='rb'))
picked = readSet('../data/Dimensions-Picked-Final.txt')

In [4]:
table = LatexTable()
table.headers = ['Variable','Definition']
table.columnAlignments = ["l", "p{80mm}"]
table.boldIndexColumn = False
table.rows = [
        ['nrLetters', 'Number of letters'],
        ['nrWords', 'Number of words'],
        ['nrSentences', 'Number of sentences'],
        ['nrSynsets', 'Number of synsets across all words'],
        ['nrSyllables', 'Number of syllables across all words'],
        ['nrMonoSyllables', 'Number of words with one syllable'],
        ['nrBiSyllables', 'Number of words with two syllables'],
        ['nrPolySyllables', 'Number of words with three or more syllables'],
        ['nrLongWords', 'Number of words with 6 or more letters'],
        
        
        ['nrSlangWords', 'Number of words that are found in a pre-defined list of slang words'],
        ['nrHardWordsSAT', 'Number of words that are found in the SAT difficult words list'],
        ['nrHardWordsDC', 'Number of words that are not found in the Dale-Chall easy words list'],
        ['nrStrongSentiWords', 'Number of words that are found in a pre-defined list of strong sentiment words'],
        ['nrMixedSentiWords', 'Number of words with both a positive and a negative synset'],
        ['nrConjunctions', 'POS-tags pertaining to conjunctions: CC, IN'],
        ['nrAdjectives', 'POS-tags pertaining to adjectives: JJ, JJR, JJS'],
        ['nrAdverbs', 'POS-tags pertaining to adverbs: RB, RBR, RBS'],
        ['nrComplexVerbs', 'POS-tags pertaining to complex verbs: MD, VBG'],
        ['nrPossesives', 'POS-tags pertaining to possesives: POS, PRP, PRP\$'],
        ['nrDeterminers', 'POS-tags pertaining to determiners: DT, PDT'],
        "!boldLine",
        ['uniquenessMean', 'Mean TF-IDF score for all words in an observation'],
        ['uniquenessSTD', 'Standard deviation of the TF-IDF scores for all words in an observation'],
        ['opinionPolarity', 'Measurement of how polarised an observation is.'], #max(#positiveSentences, #negativeSentences) / min(#positiveSentences, #negativeSentences)
    ]
result = table.render()
outputFile = 'Setup-Variable-Overview-Base.tex'
with open(outputFile, mode='w') as output:
    output.write(result)
print(result)

\begin{tabular}{|l|p{80mm}|} \hline
\textbf{Variable} & \textbf{Definition} \\ \hline
nrLetters & Number of letters \\ \hline
nrWords & Number of words \\ \hline
nrSentences & Number of sentences \\ \hline
nrSynsets & Number of synsets across all words \\ \hline
nrSyllables & Number of syllables across all words \\ \hline
nrMonoSyllables & Number of words with one syllable \\ \hline
nrBiSyllables & Number of words with two syllables \\ \hline
nrPolySyllables & Number of words with three or more syllables \\ \hline
nrLongWords & Number of words with 6 or more letters \\ \hline
nrSlangWords & Number of words that are found in a pre-defined list of slang words \\ \hline
nrHardWordsSAT & Number of words that are found in the SAT difficult words list \\ \hline
nrHardWordsDC & Number of words that are not found in the Dale-Chall easy words list \\ \hline
nrStrongSentiWords & Number of words that are found in a pre-defined list of strong sentiment words \\ \hline
nrMixedSentiWords & Number of

opinionPolarity is calculated by separatly counting the amount of positive and negative sentences in the document. The score is the max of those counts divided by the min of those counts. The score can be used as a measure on how polarized the document is. If the number is high, there is a higher amount of either positive or negative sentences. If the number is close to one, there is a closer to equal amount.

In [5]:
finalNames = pickle.load(open('./FinalNames.pickle', mode='rb'))

In [11]:
base = ['nrWords', 'nrSenteces', 'nrLetters', 'nrSynsets', 'nrSyllables', 'nrMonoSyllables', 'nrBiSyllables', 
'nrPolySyllables', 'nrLongWords', 'nrAmbiguousSentimentWords', 'nrStrongSentimentWords', 'nrSlangWords', 
'nrDifficultWordsSAT', 'nrDifficultWordsDaleChall', 'opinionPolarity', 'uniquenessMean', 'uniquenessSTD']

left = ['lexical', 'syntactic', 'semantic']
right = ['postagwords', 'sentiment']

def items(aspect):
    i = [x for x in finalNames['dimensions'][aspect] if x not in base]
    i = list(map(lambda item: [finalNames['dimensionNames'][item] + '*'] if item in picked else [finalNames['dimensionNames'][item]], i))
    i.append("!emptyRow")
    return i

left = LatexTable()
left.headers = ["\\textbf{"+finalNames['aspects']['lexical']+"}"]
left.boldIndexColumn = False
left.boldHeaders = False
rows = items('lexical')
rows.append(["\\textbf{"+finalNames['aspects']['syntactic']+"}"])
rows += items('syntactic')
rows.append(["\\textbf{"+finalNames['aspects']['semantic']+"}"])
rows += items('semantic')
left.rows = rows
print(rows)
left = left.render()

right = LatexTable()
right.headers = ["\\textbf{"+finalNames['aspects']['postagwords']+"}"]
right.boldIndexColumn = False
right.boldHeaders = False
rows = items('postagwords')
rows.append(["\\textbf{"+finalNames['aspects']['sentiment']+"}"])
rows += items('sentiment')
right.rows = rows
for i in range(11):
    rows.append('!emptyRow')
right = right.render()

data = {
    'left': left,
    'right': right
}

outputFile = './Setup-Variable-Overview-Derived.tex'
with open(outputFile, mode='w') as output:
    with open(f'{outputFile}.mustache') as template:
        res = chevron.render(template, data)
        output.write(res)
        print(res)

[['nrLetters/nrWords*'], ['nrSyllables/nrWords'], ['nrMonoSyllables/nrWords'], ['nrBiSyllables/nrWords'], ['nrPolySyllables/nrWords'], ['nrLongWords/nrWords'], '!emptyRow', ['\\textbf{Syntactic}'], ['nrLetters/nrSentences'], ['nrWords/nrSentences*'], ['nrSyllables/nrSentences'], ['nrMonoSyllables/nrSentences'], ['nrBiSyllables/nrSentences'], ['nrPolySyllables/nrSentences'], ['nrLongWords/nrSentences'], '!emptyRow', ['\\textbf{Semantic}'], ['nrSynsets/nrWords*'], ['nrSlangWords/nrWords*'], ['nrHardWordsSAT/nrWords*'], ['nrHardWordsDC/nrWords*'], '!emptyRow']


\begin{tabular}{rl}
    \begin{minipage}{.44\linewidth}
        \begin{tabular}{|l|} \hline
\textbf{Lexical} \\ \hline
nrLetters/nrWords* \\ \hline
nrSyllables/nrWords \\ \hline
nrMonoSyllables/nrWords \\ \hline
nrBiSyllables/nrWords \\ \hline
nrPolySyllables/nrWords \\ \hline
nrLongWords/nrWords \\ \hline
\multicolumn{1}{c}{} \\ \hline
\textbf{Syntactic} \\ \hline
nrLetters/nrSentences \\ \hline
nrWords/nrSentences* \\ \hline
nrS