In [1]:
import os
import sys
import inspect
currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
parentdir = os.path.dirname(currentdir)
sys.path.insert(0, parentdir)

import pandas as pd
from functions import dirs, readSet
import pickle
import chevron
from re import sub
from latexTable import LatexTable

In [2]:
dims = pickle.load(open('../data/Dimensions-All.pickle', mode='rb'))
picked = readSet('../data/Dimensions-Picked-Final.txt')

In [3]:
table = LatexTable()
table.headers = ['Variable','Definition']
#table.columnAlignments = ["l", "p{70mm}"]
table.boldIndexColumn = False
table.rows = [
        ['nrWords', 'nr of words'],
        ['nrSenteces', 'nr of sentences'],
        ['nrLetters', 'nr of letters in all words'],
        ['nrSynsets', 'nr of synsets in all words'],
        ['nrSyllables', 'nr of syllables in all words'],
        ['nrMonoSyllables', 'nr of words with one syllable'],
        ['nrBiSyllables', 'nr of words with two syllables'],
        ['nrPolySyllables', 'nr of words with three or more syllables'],
        ['nrLongWords', 'nr of words with 6 or more letters'],
        ['nrAmbiguousSentimentWords', 'nr of words with a positive and a negative synset'],
        ['nrStrongSentimentWords', 'nr of words in the cluster words list'],
        ['nrSlangWords', 'nr of words in the slang words list'],
        ['nrDifficultWordsSAT', 'nr of words in the SAT difficult words list'],
        ['nrDifficultWordsDaleChall', 'nr of words not in the Dale-Chall easy words list'],
        "!boldLine",
        ['uniquenessMean', 'mean TF-IDF score of all words'],
        ['uniquenessSTD', 'STD of the TF-IDF scores for all words'],
        ['opinionPolarity', '*'], #max(#positiveSentences, #negativeSentences) / min(#positiveSentences, #negativeSentences)
    ]
result = table.render()
outputFile = 'Setup-Software-Base.tex'
with open(outputFile, mode='w') as output:
    output.write(result)
print(result)

\begin{tabular}{|l|l|} \hline
\textbf{Variable} & \textbf{Definition} \\ \hline
nrWords & nr of words \\ \hline
nrSenteces & nr of sentences \\ \hline
nrLetters & nr of letters in all words \\ \hline
nrSynsets & nr of synsets in all words \\ \hline
nrSyllables & nr of syllables in all words \\ \hline
nrMonoSyllables & nr of words with one syllable \\ \hline
nrBiSyllables & nr of words with two syllables \\ \hline
nrPolySyllables & nr of words with three or more syllables \\ \hline
nrLongWords & nr of words with 6 or more letters \\ \hline
nrAmbiguousSentimentWords & nr of words with a positive and a negative synset \\ \hline
nrStrongSentimentWords & nr of words in the cluster words list \\ \hline
nrSlangWords & nr of words in the slang words list \\ \hline
nrDifficultWordsSAT & nr of words in the SAT difficult words list \\ \hline
nrDifficultWordsDaleChall & nr of words not in the Dale-Chall easy words list \\ \Xhline{3\arrayrulewidth}
uniquenessMean & mean TF-IDF score of all words \\

opinionPolarity is calculated by separatly counting the amount of positive and negative sentences in the document. The score is the max of those counts divided by the min of those counts. The score can be used as a measure on how polarized the document is. If the number is high, there is a higher amount of either positive or negative sentences. If the number is close to one, there is a closer to equal amount.

In [4]:
base = ['nrWords', 'nrSenteces', 'nrLetters', 'nrSynsets', 'nrSyllables', 'nrMonoSyllables', 'nrBiSyllables', 
'nrPolySyllables', 'nrLongWords', 'nrAmbiguousSentimentWords', 'nrStrongSentimentWords', 'nrSlangWords', 
'nrDifficultWordsSAT', 'nrDifficultWordsDaleChall', 'opinionPolarity', 'uniquenessMean', 'uniquenessSTD']

names = {
    'postagwords': 'POS Groups',
    'lexical': 'Lexical',
    'syntactic': 'Syntactic',
    'semantic': 'Semantic',
    'sentiment': 'Sentiment'
}

results = {}



for aspect in ['postagwords', 'lexical', 'syntactic', 'semantic', 'sentiment']:
    items = [x for x in dims[aspect] if x not in base]
    items = list(map(lambda item: [item + '*'] if item in picked else [item], items))
    items.append("!emptyRow")

    table = LatexTable()
    table.headers = [names[aspect]]
    table.boldIndexColumn = False
    table.rows = items
    results[aspect] = table.render()

outputFile = './Setup-Variable-Overview-Derived.tex'
with open(outputFile, mode='w') as output:
    with open(f'{outputFile}.mustache') as template:
        output.write(chevron.render(template, results))