In [39]:
import numpy as np
import pandas as pd
import math
from IPython.display import display
import seaborn as sns
import matplotlib.pyplot as plt
import locale
locale.setlocale(locale.LC_ALL, '')
%matplotlib inline

# Bokeh is cool.
from bokeh.io import output_file, show,curdoc, output_notebook
from bokeh.models import Quad
from bokeh.layouts import row, layout,widgetbox
from bokeh.models.widgets import Select,MultiSelect
from bokeh.plotting import ColumnDataSource,Figure,reset_output,gridplot

import pickle

# Ensure vector figure output uses editable text
plt.rcParams['pdf.fonttype'] = 42
plt.rcParams['ps.fonttype'] = 42
plt.rcParams['svg.fonttype'] = 'none'
# Autolayout on graphs
plt.rcParams['figure.autolayout'] = True

# Read in our preprocessed data
df = pd.read_csv('./export_csv/processed_output_without_column_names.csv', encoding = 'utf8')
with open('SavedVariables.pckl', 'rb') as f:
    continous_cols, cate_cols, cate_cols_v2, pipe_col, col_dict, pipe_col_dict = pickle.load(f)


In [40]:
AIGA_col = pipe_col_dict['V_32']['AIGA']
print('The total numeber of AIGA members is: ' + str(len(df[df[AIGA_col] == 1])))
print('The total numeber of Non AIGA members is: ' + str(len(df[df[AIGA_col] == 0])))

In [41]:
def summary_cate_col(col, AllorAIGA = 'All'): #default is all records
    
    AIGA_col = pipe_col_dict['V_32']['AIGA']
    if AllorAIGA == 'All':
        df_filtered = df
    elif AllorAIGA =="NotAIGA":
        df_filtered = df[df[AIGA_col] == 0]
    else:
        df_filtered = df[df[AIGA_col] == 1]

        
    if col in continous_cols:
        return
    
    elif col in pipe_col:
        dff = pd.DataFrame(columns=['Options','Count','Percent'])
        for key, value in pipe_col_dict[col].items():
            count = df_filtered[value].sum()
            dff = dff.append({'Options': key, 'Count' : count}, ignore_index=True)
    else:
        dff = pd.DataFrame(df_filtered[col].value_counts())
        dff.reset_index(inplace=True)
        dff.columns = ['Options','Count']
    
    dff['Percent'] = dff.Count / len(df_filtered)
    dff = dff.sort_values(by = 'Percent', ascending = False)
    return dff


In [44]:
# plot bar plot for each option
# Sized to fit three charts (AIGA, Not AIGA, ALL) horizontally in a 16 wide page

def plotColumn(dff, AllorAIGA):

    # g = sns.barplot(y='Options', x = 'Count',data=dff, color = 'blue', ci = None)
    max_count = max(dff['Count'])
    
    for count, option in zip(dff['Count'],dff['Options']):
        print(AllorAIGA + ' - ' + col_dict[col] + ': ' + str(option.decode('utf-8')) + ": " + str(count))


In [45]:
for col in cate_cols:
    if 'V_8' in col:
        continue
    if 'V_35' in col:
        continue
    if 'V_36' in col:
        continue
    
    # get summary table each column (All)
    dff = summary_cate_col(col, AllorAIGA = 'All')
    plotColumn(dff, AllorAIGA = 'All')
    
    # get summary table each column (AIGA-Only)
    dff = summary_cate_col(col, AllorAIGA = 'AIGA-Only')
    plotColumn(dff, AllorAIGA = 'AIGA-Only')
    
    
    # get summary table each column (Not AIGA)
    #dff = summary_cate_col(col, AllorAIGA = 'NotAIGA')
    #plot_single_column(dff, AllorAIGA = 'NotAIGA')