In [1]:
import pandas as pd
import pysentani as sti
survey = pd.read_excel('../data-anonymous/sentani-merged-cleaned-anonymous-2014-11-29.xlsx')
survey['access_type'] = sti.access_type(survey)
survey = survey.fillna(0)

Up to this point, information from survey responses regarding appliance use times and uses per week have been used to infer an amount of time that a given appliance is used, and subsequently, the energy use from each appliance. The next step in verifying that this analyisis is plausible will be to verify that, in responses indicating appliance use, appliance ownership is also reported. Null responses are treated as 0 which is indicates no ownership/use time reported.

We will first start by analyzing only survey responses that include both use response columns and ownership column for one appliance.

In [2]:
show_data_a = survey[['app_now/TV','app_TV_per_wk','app_TV_hrs']]
show_data_a.head(20).transpose()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19
app_now/TV,1,1,1,1,0,1,1,0,0,1,1,1,1,0,0,0,1,1,1,1
app_TV_per_wk,7,7,7,2,0,7,0,0,0,7,7,7,7,0,0,0,7,7,3,3
app_TV_hrs,4,2,3,2,0,2,0,0,0,7,3,3,5,0,0,0,3,3,6,2


Columns that return 0(does not own) in app_now/TV should also report 0 use time in both use columns.

A new column is created with the product of both TV use columns and then the sum is taken. This returns the total reported use hours for a TV across the entire survey. 

In [3]:
survey['TV_hrs']  = survey['app_TV_per_wk'] * survey['app_TV_hrs']
survey['TV_hrs'].sum()

37863.0

Then multiply the same data above by the app_now/TV column. Any use times reported that do not report TV ownership will now be muliplied by 0(does not own). If both numbers match, then there has been no conflicting reports between appliance ownership and usage.

In [4]:
TV_check_a = survey['app_TV_per_wk'] * survey['app_TV_hrs'] * survey['app_now/TV']
TV_check_a.sum()

37863.0

This method will now be used to check each of the remaining five appliance categories at one time.

In [5]:
show_data_b = survey[['app_now/fridge','app_fridge_per_wk','app_fridge_hrs',
                      'app_now/radio','app_radio_per_wk','app_radio_hrs',
                      'app_now/fan','app_fan_per_wk','app_fan_hrs',
                      'app_now/rice_cooker','app_rice_cooker_per_wk','app_rice_cooker_hrs',
                      'app_now/lighting','app_lighting_per_wk','app_lighting_hrs']]
show_data_b.head(20).transpose()
#one data point reflects 2e5 in the question, survey['app_lighting_per_wk']: 
#For lighting, how many times a week do you use it? Presumably this question was
#meant to ask how many days a week do you use lighting. The next sanity check 
#will need to discover how many values are present above 7 for each appliance_per_wk category
#and similarly values above 24 for daily use hours.

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19
app_now/fridge,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
app_fridge_per_wk,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
app_fridge_hrs,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
app_now/radio,1,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,1,0,0,0
app_radio_per_wk,7,3,0,0,0,0,0,0,4,7,0,0,0,0,0,0,7,0,0,0
app_radio_hrs,2,2,0,0,0,0,0,0,2,7,0,0,0,0,0,0,4,0,0,0
app_now/fan,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0
app_fan_per_wk,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,0
app_fan_hrs,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,6,0
app_now/rice_cooker,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [6]:
survey['fridge_hrs']      = survey['app_fridge_per_wk'] * survey['app_fridge_hrs']
survey['radio_hrs']       = survey['app_radio_per_wk'] * survey['app_radio_hrs']
survey['fan_hrs']         = survey['app_fan_per_wk'] * survey['app_fan_hrs']
survey['rice_cooker_hrs'] = survey['app_rice_cooker_per_wk'] * survey['app_rice_cooker_hrs']
survey['lighting_hrs']    = survey['app_lighting_per_wk'] * survey['app_lighting_hrs']

In [7]:
appliance_hrs = survey[['fridge_hrs','radio_hrs','fan_hrs',
                        'rice_cooker_hrs','lighting_hrs']].sum()
appliance_hrs

fridge_hrs          152226
radio_hrs             8735
fan_hrs               5349
rice_cooker_hrs      16075
lighting_hrs       1544640
dtype: float64

In [8]:
survey['fridge_check'] = survey['app_fridge_per_wk'] * survey['app_fridge_hrs'] * survey['app_now/fridge']
survey['radio_check'] = survey['app_radio_per_wk'] * survey['app_radio_hrs'] * survey['app_now/radio']
survey['fan_check'] = survey['app_fan_per_wk'] * survey['app_fan_hrs'] * survey['app_now/fan']
survey['rc_check'] = survey['app_rice_cooker_per_wk'] * survey['app_rice_cooker_hrs'] * survey['app_now/rice_cooker']
survey['lighting_check'] = survey['app_lighting_per_wk'] * survey['app_lighting_hrs'] * survey['app_now/lighting']

In [9]:
hours_check = survey[['fridge_check','radio_check','fan_check',
                      'rc_check','lighting_check']].sum()
hours_check

fridge_check       152226
radio_check          8735
fan_check            5349
rc_check            16075
lighting_check    1544640
dtype: float64