# Welcome to an example Binder

This notebook uses `seaborn`, which we have because we included it in our `requirements.txt` file

## Setup the python environment

In [None]:
%matplotlib inline
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import re

## Constants

In [None]:
budget_question = 'The following questions are based on a sliding scale of need for investment. After reading or hearing the category, indicate whether you think this category requires:  No Additional Investment Needed, Minimal Investment Needed, Moderate Investment Needed, or Major Investment Needed.'
email_question = 'What is your email address?.2'
start_date = pd.Timestamp(2021,3,20)

## Load the data

In [None]:
golden_dust_df = pd.read_csv('./Golden-Hibiscus-Survey.csv')
golden_dust_df['Timestamp'] = pd.to_datetime(golden_dust_df['Timestamp'], infer_datetime_format=True)
date_filter = golden_dust_df['Timestamp'] > start_date
golden_dust_df = golden_dust_df[date_filter]
print("The total number of response to the survey was", len(golden_dust_df.index))
# print('Rows:', list(golden_dust_df.index))
# print('Columns:', list(golden_dust_df.columns))
responses_with_email_df = golden_dust_df.dropna(subset=[email_question])
responses_no_email_df = golden_dust_df[~golden_dust_df.index.isin(responses_with_email_df.index)]
responses_with_unique_email_df = responses_with_email_df.drop_duplicates(subset=[email_question])
unique_responses = pd.concat([responses_no_email_df, responses_with_unique_email_df])
print("The number of unique responses to the survey was", len(unique_responses.index))
print("The number of unique email addresses we received was", len(responses_with_email_df.index))
golden_dust_df = unique_responses
del unique_responses
del responses_with_unique_email_df
del responses_no_email_df
del responses_with_email_df

## Sort by Areas of Investment

In [None]:
budget_responses = list()
for col in golden_dust_df.columns:
    if budget_question in col:
        temp_series = golden_dust_df[col].value_counts()
        category_name = re.search(r"\[(.+)\]", str(temp_series.name)).group(1)
        temp_series.name = category_name
#         print(temp_series.name)
#         print(temp_series.to_string(),'\n')
        budget_responses.append(temp_series)
budget_responses_df = pd.concat(budget_responses, axis=1)
budget_responses_df.fillna(0, inplace=True)
budget_responses_df.loc['Major Investment Needed'] += budget_responses_df.loc['Necesita inversión grande']
budget_responses_df.loc['Moderate Investment Needed'] += budget_responses_df.loc['Necesita inversión moderada']
budget_responses_df.loc['Minimal Investment Needed'] += budget_responses_df.loc['Necesita inversión mínima']
budget_responses_df.loc['No Additional Investment Needed'] += budget_responses_df.loc['No necesita inversión adicional']
budget_responses_df.loc['Unsure'] += budget_responses_df.loc['No tengo opinión']
budget_responses_df.drop(['Necesita inversión grande', 'Necesita inversión moderada', 'Necesita inversión mínima', 'No necesita inversión adicional','No tengo opinión'], inplace=True)

for col in budget_responses_df.columns:
    print(col)
    print(budget_responses_df[col].to_string(), "\n")

## Sort by Need for Investment Categories

In [None]:
# print('Rows:', list(budget_responses_df.index))
# print('Columns:', list(budget_responses_df.columns))
for row in budget_responses_df.index:
    temp_series = budget_responses_df.loc[row].sort_values(ascending=False)
    print(temp_series.name)
    print("\t Average count is ", round(temp_series.mean(), 1))
    print("\t Standard deviation is ", round(temp_series.std(), 1))
    print(temp_series.to_string(), "\n")

## Make some plots!

In [None]:
x = random.randn(10,500)
x = gaussian_filter(x, [0, 10])
sns.lineplot(data=x.T,dashes=False,legend=None);

In [None]:
mat = random.randn(10,20)
sns.heatmap(mat);