# Well Being To csv

In [2]:
import pandas as pd
import numpy as np
import math as math
import matplotlib.pyplot as plt
import seaborn as sns
import random as rd
%matplotlib inline 


In [3]:
# to see all the columns when using head() or describe()
pd.set_option('display.max_columns',60)

In [8]:
df = pd.read_csv("../../Notebooks/aux_data/diaries_wellbeing_ind.csv",dtype={"m_ids":str})

# Explanation: 
###    q1: How happy have you been since I saw you last?
###    q2: How have you been doing economically since I saw you last?
###    q3: How have your relationship with people been since I saw you last?
###    q4: How confident in yourself have you been feeling since I saw you last?

In [9]:
df.head()

Unnamed: 0,hh_ids,m_ids,int_date,wellbeing_q1,wellbeing_q2,wellbeing_q3,wellbeing_q4
0,KNBOK41,5.01e+16,26nov2012,Very happy,Very badly,Very well,Very confident
1,KNBOK41,5.01e+16,10dec2012,Neither happy nor unhappy,Moderately well,Very well,Very confident
2,KNBOK41,5.01e+16,21jan2013,Very happy,Moderately well,Very well,Very confident
3,KNBOK41,5.01e+16,05feb2013,Neither happy nor unhappy,Neither well nor badly,Very well,Very confident
4,KNBOK41,5.01e+16,19feb2013,Very happy,Moderately well,Moderately well,Very confident


## Let's first rename the columns with more meaningful names.

In [10]:
df.rename(columns={"wellbeing_q1":"happiness", "wellbeing_q2":"economically", "wellbeing_q3":"relationships", "wellbeing_q4":"confidence"}, inplace=True)

In [11]:
df.head()

Unnamed: 0,hh_ids,m_ids,int_date,happiness,economically,relationships,confidence
0,KNBOK41,5.01e+16,26nov2012,Very happy,Very badly,Very well,Very confident
1,KNBOK41,5.01e+16,10dec2012,Neither happy nor unhappy,Moderately well,Very well,Very confident
2,KNBOK41,5.01e+16,21jan2013,Very happy,Moderately well,Very well,Very confident
3,KNBOK41,5.01e+16,05feb2013,Neither happy nor unhappy,Neither well nor badly,Very well,Very confident
4,KNBOK41,5.01e+16,19feb2013,Very happy,Moderately well,Moderately well,Very confident


In [12]:
df.shape

(7835, 7)

# Let's make all the values numerical.

### q1: How happy have you been since I saw you last?

In [13]:
df["happiness"].replace({"Very unhappy":1,"Moderately unhappy":2, "Neither happy nor unhappy":3, "Moderately happy":4, "Very happy":5}, inplace = True)

### q2: How have you been doing economically since I saw you last?

In [14]:
df["economically"].replace({"Very badly":1, "Moderately badly":2, "Neither well nor badly":3, "Moderately well":4, "Very well":5},inplace=True)

### q3: How have your relationship with people been since I saw you last?

In [15]:
df["relationships"].replace({"Very badly":1, "Moderately badly":2, "Neither well nor badly":3, "Moderately well":4, "Very well":5},inplace = True)

###    q4: How confident in yourself have you been feeling since I saw you last?

In [16]:
df["confidence"].replace({"Very unconfident":1, "Moderately unconfident":2, "Neither confident nor unconfident":3, "Moderately confident":4, "Very confident":5},inplace = True)

In [17]:
df.head()

Unnamed: 0,hh_ids,m_ids,int_date,happiness,economically,relationships,confidence
0,KNBOK41,5.01e+16,26nov2012,5.0,1.0,5.0,5.0
1,KNBOK41,5.01e+16,10dec2012,3.0,4.0,5.0,5.0
2,KNBOK41,5.01e+16,21jan2013,5.0,4.0,5.0,5.0
3,KNBOK41,5.01e+16,05feb2013,3.0,3.0,5.0,5.0
4,KNBOK41,5.01e+16,19feb2013,5.0,4.0,4.0,5.0


In [18]:
ndf = df.groupby("hh_ids")[["happiness","economically","relationships","confidence"]].mean()

In [19]:
ndf.head()

Unnamed: 0_level_0,happiness,economically,relationships,confidence
hh_ids,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
KELDK01,4.045455,3.863636,4.863636,4.681818
KELDK02,4.105263,3.947368,4.842105,4.736842
KELDK08,4.038462,3.653846,4.730769,4.884615
KELDK09,4.052632,3.947368,4.947368,4.894737
KELDK10,4.111111,4.0,4.777778,4.944444


### to have the hh_ids as a column

In [20]:
ndf = ndf.reset_index()

In [21]:
ndf.head()

Unnamed: 0,hh_ids,happiness,economically,relationships,confidence
0,KELDK01,4.045455,3.863636,4.863636,4.681818
1,KELDK02,4.105263,3.947368,4.842105,4.736842
2,KELDK08,4.038462,3.653846,4.730769,4.884615
3,KELDK09,4.052632,3.947368,4.947368,4.894737
4,KELDK10,4.111111,4.0,4.777778,4.944444


In [22]:
ndf.isnull().describe()

Unnamed: 0,hh_ids,happiness,economically,relationships,confidence
count,298,298,298,298,298
unique,1,1,1,1,1
top,False,False,False,False,False
freq,298,298,298,298,298


In [23]:
ndf.to_csv("wellbeing.csv")