#Tableau Prep

In [1]:
%matplotlib inline
import numpy as np
import scipy as sp
import matplotlib as mpl
import matplotlib.cm as cm
import matplotlib.pyplot as plt
import pandas as pd
pd.set_option('display.width', 500)
pd.set_option('display.max_columns', 100)
pd.set_option('display.notebook_repr_html', True)
import seaborn as sns
sns.set_style("whitegrid")
sns.set_context("poster")

In [2]:
## MLJ: Additional Extras
import time
import itertools
import json
import pickle

In [3]:
# root in
root_in = "../../data/conditioned/corpus_vocabs/"
# root out
root_out = "../../viz/data/"

In [4]:
# function to ensure elements in list are ascii
def listAsAscii(lst):
    return [x.encode('ascii','ignore') if isinstance(x, unicode) else x for x in lst]

In [13]:
# function to sort dataframe decsending is the default
def sortDataframe(df,sort_col,ascending=False):
    return df.sort(columns=sort_col, ascending=ascending)

In [6]:
# function for loading dictionary json to columnar dataframe
def jsonDictToDataframe(json_name, key_col_label="key", val_col_label="value", root_in=root_in):
    # read to json
    with open(root_in + json_name, 'r') as fp:
        j = json.load(fp)
    
    d = {key_col_label: listAsAscii(j.keys()), val_col_label: listAsAscii(j.values())}
    return pd.DataFrame(data=d)    

In [7]:
# function for loading list of list pairs json to columnar dataframe
def jsonListOfPairListsToDataframe(json_name, key_col_label="key", val_col_label="value", root_in=root_in):
    # read to json
    with open(root_in + json_name, 'r') as fp:
        j = json.load(fp)
    
    keys = []
    values = []
    for x in j:
        keys.append(x[0])
        values.append(x[1])
        
    d = {key_col_label: listAsAscii(keys), val_col_label: listAsAscii(values)}
    return pd.DataFrame(data=d)    

In [8]:
# function for saving dataframe to csv
def dataframeToCsv(df, csv_name, root_out=root_out, index=False):
    df.to_csv(root_out+csv_name,index=index)     

In [9]:
# function for json dict to csv
def jsonDictToCsv(json_name, csv_name, key_col_label="key", val_col_label="value",
                  root_in=root_in, root_out=root_out, index=False, sort_col=None):
    # json to df
    df = jsonDictToDataframe(json_name, key_col_label=key_col_label, val_col_label=val_col_label,
                             root_in=root_in)
    # handle sort
    if sort_col:
        df = sortDataframe(df,sort_col)
    
    # df to csv
    dataframeToCsv(df, csv_name, root_out=root_out, index=index)

In [17]:
# function for json list of lists containing 2 entries to csv
def jsonListOfPairListsToCsv(json_name, csv_name, key_col_label="key", val_col_label="value",
                  root_in=root_in, root_out=root_out, index=False, sort_col=None):
    # json to df
    df = jsonListOfPairListsToDataframe(json_name, key_col_label=key_col_label, val_col_label=val_col_label,
                                        root_in=root_in)
    # handle sort
    if sort_col:
        df = sortDataframe(df,sort_col)
    
    # df to csv
    dataframeToCsv(df, csv_name, root_out=root_out, index=index)

##N-Gram (Normal)

In [11]:
name=None #this will get set for each conversion
key_col_label = "word" #this will not change for n-gram
val_col_label = "count" #this will not change for n-gram

###Noun

In [14]:
name="noun-n-gram"
jsonDictToCsv(name+".json",name+".csv", key_col_label=key_col_label, val_col_label=val_col_label, sort_col=val_col_label)

![noun n-gram](../../viz/noun_n-gram.jpg)

###Adjective

In [15]:
name="adj-n-gram"
jsonDictToCsv(name+".json",name+".csv", key_col_label=key_col_label, val_col_label=val_col_label, sort_col=val_col_label)

![adjective n-gram](../../viz/adj_n-gram.jpg)

##N-Gram (Reduced)

###Noun

In [18]:
name="noun_n-gram_reduced"
jsonListOfPairListsToCsv(name+".json",name+".csv", key_col_label=key_col_label, val_col_label=val_col_label, sort_col=val_col_label)

![noun n-gram reduced](../../viz/noun_n-gram_reduced.jpg)

###Adjective

In [19]:
name="adj_n-gram_reduced"
jsonListOfPairListsToCsv(name+".json",name+".csv", key_col_label=key_col_label, val_col_label=val_col_label, sort_col=val_col_label)

![adjective n-gram reduced](../../viz/noun_n-gram_reduced.jpg)