In [1]:
%matplotlib inline
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np
import pysentani as snt
import sentani as sti

survey = snt.find_survey("../data-anonymous")

../data-anonymous/sentani-merged-cleaned-anonymous-2014-11-29.xlsx


In [2]:
# pysentani monthly electricity expenditure function
survey['electricity_monthly'] = snt.elec_expenditure_monthly(survey)

In [3]:
# shows all values for power_supply_other survey responses
# we are interested in selections that contain "genset tetanga" meaning neighbor's generator
survey['power_supply_other'].value_counts(dropna=False)
# other selections different from genset tetanga:
    #Bantuan listrik tetanga = neighbor electrical assistance
    #Sambung aliran genset dari tetangga = Connect the flow generator from a neighbor
    #Di sambung dri tetangah = In continued dri neighbor (do we omit this one?)
    #Sambungan dari tetangga = The connection of the neighbors
    #Listrik tetangga = electricity neighbors

NaN                                                                                     1039
Pln genyem                                                                               110
Genset tetangga.                                                                           7
Pln sentani                                                                                4
Minyak tanah                                                                               3
Genset tetanga                                                                             3
Pln genyem                                                                                 2
Lampu gas                                                                                  1
Listrik                                                                                    1
Minyak tanah.                                                                              1
Bantuan listrik tetanga.                                              

In [18]:
#this will be used later when applying a function
#hrs_series = ['app_TV_hrs','app_fridge_hrs','app_radio_hrs','app_lighting_hrs',
#              'app_fan_hrs','app_rice_cooker_hrs']

#wek_series = ['app_TV_per_wk','app_fridge_per_wk','app_radio_per_wk','app_lighting_per_week',
#              'app_fan_per_wk','app_rice_cooker_per_wk']

In [5]:
# drops all NaN responses in power_supply_other category
df = survey.dropna(subset=['power_supply_other'])
# sorts field by only selections that contain the string "tet"
# which isolates all genset tetanga responses and renames as tet
tet = df['power_supply_other'].str.contains('tet')
# grouping genset tetanga responses with appliance use hours and times per week
# this will give insight into how much power these households are using per week
# which is not being reported in monthly expenditures (except for one household) 
df[tet][['app_TV_hrs','app_TV_per_wk',
         'app_fridge_hrs','app_fridge_per_wk',
         'app_fan_hrs','app_fan_per_wk',
         'app_lighting_hrs','app_lighting_per_wk',
         'app_radio_hrs','app_radio_per_wk',
         'app_rice_cooker_hrs','app_rice_cooker_per_wk',
         'electricity_monthly']].fillna(0).transpose()

Unnamed: 0,258,272,286,305,505,520,522,524,562,584,711,853,899,923,928,939,969
app_TV_hrs,6,2,0,0,0,5,0,5,0,0,0,8,1,0,5,0,6
app_TV_per_wk,7,7,0,0,0,4,0,7,0,0,0,7,4,0,7,0,7
app_fridge_hrs,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
app_fridge_per_wk,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
app_fan_hrs,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
app_fan_per_wk,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
app_lighting_hrs,6,6,6,0,0,5,0,5,0,5,0,0,6,0,7,0,6
app_lighting_per_wk,7,7,7,0,0,4,0,7,0,7,0,0,4,0,7,0,7
app_radio_hrs,0,0,0,0,0,0,0,0,0,2,24,4,2,0,5,0,0
app_radio_per_wk,0,0,0,0,0,0,0,0,0,7,7,4,4,0,7,0,0


In [6]:
# creates column sorted by genset tetanga responses with app_TV_hrs responses
survey['TV_hrs'] = df[tet][['app_TV_hrs']].fillna(0)
# creates column sorted by genset tetanga responses with app_TV_per_wk responses
survey['TV_week'] = df[tet][['app_TV_per_wk']].fillna(0)
# multiplies use hours by times per week to get total hours
TV_df = survey[["TV_hrs"]].multiply(survey["TV_week"], axis="index").dropna()

In [7]:
TV_df.transpose()

Unnamed: 0,258,272,286,305,505,520,522,524,562,584,711,853,899,923,928,939,969
TV_hrs,42,14,0,0,0,20,0,35,0,0,0,56,4,0,35,0,42


In [8]:
survey['fridge_hrs'] = df[tet][['app_fridge_hrs']].fillna(0)
survey['fridge_week'] = df[tet][['app_fridge_per_wk']].fillna(0)
fridge_df = survey[["fridge_hrs"]].multiply(survey["fridge_week"], axis="index").dropna()

In [9]:
# no responses in this category
fridge_df.transpose()

Unnamed: 0,258,272,286,305,505,520,522,524,562,584,711,853,899,923,928,939,969
fridge_hrs,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [10]:
survey['fan_hrs'] = df[tet][['app_fan_hrs']].fillna(0)
survey['fan_week'] = df[tet][['app_fan_per_wk']].fillna(0)
fan_df = survey[["fan_hrs"]].multiply(survey["fan_week"], axis='index').dropna()

In [11]:
# no responses here either
fan_df.transpose()

Unnamed: 0,258,272,286,305,505,520,522,524,562,584,711,853,899,923,928,939,969
fan_hrs,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [12]:
survey['lighting_hrs'] = df[tet][['app_lighting_hrs']].fillna(0)
survey['lighting_week'] = df[tet][['app_lighting_per_wk']].fillna(0)
lighting_df = survey[["lighting_hrs"]].multiply(survey["lighting_week"], axis='index').dropna()

In [13]:
lighting_df.transpose()

Unnamed: 0,258,272,286,305,505,520,522,524,562,584,711,853,899,923,928,939,969
lighting_hrs,42,42,42,0,0,20,0,35,0,35,0,0,24,0,49,0,42


In [14]:
survey['radio_hrs'] = df[tet][['app_radio_hrs']].fillna(0)
survey['radio_week'] = df[tet][['app_radio_per_wk']].fillna(0)
radio_df = survey[["radio_hrs"]].multiply(survey["radio_week"], axis="index").dropna()

In [15]:
radio_df.transpose()

Unnamed: 0,258,272,286,305,505,520,522,524,562,584,711,853,899,923,928,939,969
radio_hrs,0,0,0,0,0,0,0,0,0,14,168,16,8,0,35,0,0


In [16]:
survey['rice_cooker_hrs'] = df[tet][['app_rice_cooker_hrs']].fillna(0)
survey['rice_cooker_week'] = df[tet][['app_rice_cooker_per_wk']].fillna(0)
rice_cooker_df = survey[["rice_cooker_hrs"]].multiply(survey["rice_cooker_week"], axis="index").dropna()

In [17]:
rice_cooker_df.transpose()

Unnamed: 0,258,272,286,305,505,520,522,524,562,584,711,853,899,923,928,939,969
rice_cooker_hrs,0,14,0,0,0,0,0,35,0,0,0,0,0,0,0,0,0
