# Goings-On to csv

In [1]:
import pandas as pd
import numpy as np
import math as math
import matplotlib.pyplot as plt
import seaborn as sns
import random as rd
%matplotlib inline 


In [2]:
# to see all the columns when using head() or describe()
pd.set_option('display.max_columns',60)

In [3]:
df = pd.read_csv("../aux_data/diaries_goingson_hh.csv")

In [4]:
df.shape

(5326, 12)

In [5]:
df.head()

Unnamed: 0,hh_ids,int_date,goingson_q1,goingson_q2,goingson_q3,goingson_q4,goingson_q5,goingson_q6,goingson_q7,goingson_q8,goingson_q9,goingson_q10
0,KELDL09,08oct2012,No,No,No,No,No,No,No,No,No,
1,KELDL10,02oct2012,No,Yes,No,No,No,Yes,Yes,No,Yes,
2,KMAKL02,12oct2012,No,No,No,No,No,No,No,No,No,
3,KELDK12,02jul2013,No,No,No,No,No,No,No,No,No,No
4,KELDK31,02jul2013,No,No,No,No,No,No,No,No,No,Yes


## Below I will also rename the columns to have them in numerical values. This is the only way that the plotting function will work I think.

# Explanation: 
###    q1: Was stopped, arrested or had some other problem with police or city council askari.
###    q2: Needed a doctor or medicine but went without
###    q3: Felt unsafe in home or community due to crime committed nearby
###    q4: Threatened with or experienced disconnection of electricity or water for not pay
###    q5: Had some assets taken to repay a debt
###    q6: Missed an appointment or work due to lack of transport or child care
###    q7: Expected an important source of income that did not come
###    q8: Started or stopped a romantic relationship (besides marriagedivorce)
###    q9: Went to sleep hungry or without eating
###    q10: A child was sent home from school FOR ANY REASON

## Let's first rename the columns with more meaningful names.

In [6]:
df.rename(columns={"goingson_q1":"police", "goingson_q2":"doctor", "goingson_q3":"unsafe","goingson_q4":"utilities","goingson_q5":"asset_taken","goingson_q6":"miss_app","goingson_q7":"miss_inc","goingson_q8":"romantic","goingson_q9":"hungry","goingson_q10":"school_sent",}, inplace=True)

In [7]:
df.head()

Unnamed: 0,hh_ids,int_date,police,doctor,unsafe,utilities,asset_taken,miss_app,miss_inc,romantic,hungry,school_sent
0,KELDL09,08oct2012,No,No,No,No,No,No,No,No,No,
1,KELDL10,02oct2012,No,Yes,No,No,No,Yes,Yes,No,Yes,
2,KMAKL02,12oct2012,No,No,No,No,No,No,No,No,No,
3,KELDK12,02jul2013,No,No,No,No,No,No,No,No,No,No
4,KELDK31,02jul2013,No,No,No,No,No,No,No,No,No,Yes


In [8]:
df.shape

(5326, 12)

In [9]:
df.replace({"Yes":1, "No":0}, inplace=True)

In [10]:
df.head()

Unnamed: 0,hh_ids,int_date,police,doctor,unsafe,utilities,asset_taken,miss_app,miss_inc,romantic,hungry,school_sent
0,KELDL09,08oct2012,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,
1,KELDL10,02oct2012,0.0,1.0,0.0,0.0,0.0,1.0,1.0,0.0,1.0,
2,KMAKL02,12oct2012,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,
3,KELDK12,02jul2013,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,KELDK31,02jul2013,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0


In [11]:
grouped = df.groupby("hh_ids").mean()

In [12]:
grouped.columns

Index(['police', 'doctor', 'unsafe', 'utilities', 'asset_taken', 'miss_app',
       'miss_inc', 'romantic', 'hungry', 'school_sent'],
      dtype='object')

In [13]:
#grouped = grouped.reset_index(level=0)

In [14]:
grouped.sample(5)

Unnamed: 0_level_0,police,doctor,unsafe,utilities,asset_taken,miss_app,miss_inc,romantic,hungry,school_sent
hh_ids,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
KMOMK06,0.0,0.0625,0.0,0.0,0.0,0.0,0.125,0.0,0.0,0.0
KELDK28,0.0,0.0,0.076923,0.0,0.0,0.0,0.083333,0.0,0.0,0.222222
KNBOM19,0.0,0.111111,0.269231,0.038462,0.038462,0.24,0.16,0.0,0.08,0.473684
KMAKL21,0.0,0.0,0.0,0.0,0.0,0.055556,0.0,0.0,0.0,0.266667
KNBOM32,0.75,0.083333,0.291667,0.0,0.0,0.041667,0.208333,0.086957,0.043478,0.0


## Null values?

In [15]:
grouped.isnull().describe()

Unnamed: 0,police,doctor,unsafe,utilities,asset_taken,miss_app,miss_inc,romantic,hungry,school_sent
count,298,298,298,298,298,298,298,298,298,298
unique,1,1,1,1,1,1,1,1,1,1
top,False,False,False,False,False,False,False,False,False,False
freq,298,298,298,298,298,298,298,298,298,298


In [16]:
grouped.to_csv("goingson.csv")