In [1]:
from __future__ import print_function
import pickle
import os.path
from googleapiclient.discovery import build
from google_auth_oauthlib.flow import InstalledAppFlow
from google.auth.transport.requests import Request
import pandas as pd
import numpy as np
import re

pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

## Obtain data from Google spreadsheet

In [2]:
SCOPES = ['https://www.googleapis.com/auth/spreadsheets.readonly']

if os.path.exists('token.pickle'):
    with open('token.pickle', 'rb') as token:
        creds = pickle.load(token)
        
# If there are no (valid) credentials available, let the user log in.
if not creds or not creds.valid:
    if creds and creds.expired and creds.refresh_token:
        creds.refresh(Request())
    else:
        flow = InstalledAppFlow.from_client_secrets_file(
            'credentials.json', SCOPES)
        creds = flow.run_local_server(port=0)
    # Save the credentials for the next run
    with open('token.pickle', 'wb') as token:
        pickle.dump(creds, token)

service = build('sheets', 'v4', credentials=creds)

sheet = service.spreadsheets()

In [3]:
def gsheet2df(gsheet):
    """ Converts Google sheet data to a Pandas DataFrame.
    Note: This script assumes that your data contains a header file on the first row!
    Also note that the Google API returns 'none' from empty cells - in order for the code
    below to work, you'll need to make sure your sheet doesn't contain empty cells,
    or update the code to account for such instances.
    """
    header = gsheet.get('values', [])[0]   # Assumes first line is header!
    values = gsheet.get('values', [])[1:]  # Everything else is data.
    if not values:
        print('No data found.')
    else:
        all_data = []
        for col_id, col_name in enumerate(header):
            column_data = []
            for row in values:
                column_data.append(row[col_id])
            ds = pd.Series(data=column_data, name=col_name)
            all_data.append(ds)
        df = pd.concat(all_data, axis=1)
        return df

In [4]:
spreadsheet = '1EHTQy7nnT0JWgiWenlgxbGOpTN0jvc3ZWMSYVcb3reQ'
sheet_range = '8th April 2020!A:XX'

survey_responses = sheet.values().get(spreadsheetId=spreadsheet, range=sheet_range).execute()

In [5]:
survey = gsheet2df(survey_responses)

## Combine columns 

In [6]:
questions = ['NameoftheDistrict', '1_1', '1_2', '2_1', '2_2', '2_3', 
             '3_1.mgnrega', '3_1.pmKisan', '3_1.ujjwala', '3_1.janDhan', '3_1.oldAgeWidow', 
             '3_1.registeredLabour', '3_1.freeRation', '3_1.notEligible', '3_1.none',
             '3_2.mgnrega', '3_2.pmKisan', '3_2.ujjwala', '3_2.janDhan', '3_2.oldAgeWidow', 
             '3_2.registeredLabour', '3_2.bplRation', '3_2.generalRation', '3_2.none',
             '3_3', '3_4', '4_1', '4_2']

for question in questions:
    cols = [col for col in survey.columns if question in col] 
    if question == 'NameoftheDistrict':
        cols = cols + ['locationBihar']
    survey[question] = survey[cols].sum(1)
    survey = survey.drop(cols, 1)

cols = ['_2' , '4_2']
survey['4_2'] = survey[cols].sum(1)
survey = survey.drop(['_2'], 1)

## Clean strings

In [7]:
# Column names
survey = survey.rename(columns={"_id": "ID", "created": "time_created", "modified": "time_modified",
                                "introductionIam": "surveyer", 
                                "NUMBER_ID": "number_id", "introductionCallConnected": "call_connected",
                                "introductionAvailability": "availability", "locationSelectState": "state",
                                "locationNameofArea": "sub_district", "endCallStatusEndCallStatus": "end_call_status", 
                                "NameoftheDistrict": "district", "1_1": "need_meds", "1_2": "comments_health",
                                "2_1": "have_food", "2_3": "comments_food", '3_1.mgnrega': "received_mgnrega", 
                                '3_1.pmKisan': "received_pm_kisan", '3_1.ujjwala': "received_ujjwala", 
                                '3_1.janDhan': "received_jan_dhan", '3_1.oldAgeWidow': "received_old_age_widow", 
                                '3_1.registeredLabour': "received_registered_labor", 
                                '3_1.freeRation': "received_free_ration", '3_1.notEligible': "received_not_eligible", 
                                '3_1.none': "received_none", '3_2.mgnrega': "eligible_mgnrega",
                                '3_2.pmKisan': "eligible_pm_kisan", '3_2.ujjwala': "eligible_ujjwala", 
                                '3_2.janDhan': "eligible_jan_dhan", '3_2.oldAgeWidow': "eligible_old_age_widow",
                                '3_2.registeredLabour': "eligible_registered_labor", 
                                '3_2.bplRation': "eligible_bpl_ration", 
                                '3_2.generalRation': "eligible_general_ration",
                                '3_2.none': "eligible_none", "3_3": "employment_status", "3_4": "comments_benefits",
                                "4_1": "need_to_revert",
                                "4_2": "comments_additional"})

In [8]:
# yes / no / true / false etc

binary_cols = ['call_connected', 'availability', 'need_meds', 'have_food', 'received_mgnrega', 'received_pm_kisan', 
                'received_ujjwala', 'received_jan_dhan', 'received_old_age_widow', 'received_registered_labor', 
                'received_free_ration', 'received_not_eligible', 'received_none', 'eligible_mgnrega',
                'eligible_pm_kisan', 'eligible_ujjwala', 'eligible_jan_dhan', 'eligible_old_age_widow', 
                'eligible_registered_labor', 'eligible_bpl_ration', 'eligible_general_ration', 'eligible_none',
                'need_to_revert']

survey[binary_cols] = survey[binary_cols].replace(to_replace=r'(?i)^(yes|true)', value=True, regex=True)
survey[binary_cols] = survey[binary_cols].replace(to_replace=r'(?i)^(no|false)', value=False, regex=True)

In [9]:
survey['time_created'] = pd.to_datetime(survey['time_created'], format='%Y-%m-%d %I:%M %p UTC')

In [10]:
# date columns
survey['time_created'] = pd.to_datetime(survey['time_created'], format='%Y-%m-%d %I:%M %p UTC')
survey['time_created'] = survey['time_created'].dt.tz_localize("UTC").dt.tz_convert('Asia/Kolkata').dt.tz_localize(None)
survey['time_modified'] = pd.to_datetime(survey['time_modified'], format='%Y-%m-%d %I:%M %p UTC')
survey['time_modified'] = survey['time_modified'].dt.tz_localize("UTC").dt.tz_convert('Asia/Kolkata').dt.tz_localize(None)


In [11]:
# state, district, sub district
def capitalizeName(name):
    name = re.sub(r"(?i)village","", name)
    name = re.sub(r"(?<=\w)([A-Z])", r" \1", name).capitalize()
    name = name.replace("-", " ")
    name = name.strip()
    name = name.title()
    return name

survey[['state', 'district', 'sub_district']] = survey[['state', 'district', 'sub_district']].applymap(capitalizeName)

In [12]:
# employment status
survey['employment_status'] = survey['employment_status'].apply(lambda x: re.sub("([^\x00-\x7F])+","",x))

In [13]:
# NaNs
survey.replace(r'^\s*$', np.NaN, regex=True, inplace=True)
survey.dropna(axis=1, how="all", inplace=True)

## Other checks

In [14]:
# duplicated rows
survey[survey.duplicated('ID')]

Unnamed: 0,ID,time_created,time_modified,surveyer,number_id,call_connected,availability,state,sub_district,end_call_status,district,need_meds,comments_health,have_food,comments_food,received_mgnrega,received_pm_kisan,received_ujjwala,received_jan_dhan,received_old_age_widow,received_registered_labor,received_free_ration,received_not_eligible,received_none,eligible_mgnrega,eligible_pm_kisan,eligible_ujjwala,eligible_jan_dhan,eligible_old_age_widow,eligible_registered_labor,eligible_bpl_ration,eligible_general_ration,eligible_none,employment_status,comments_benefits,need_to_revert,comments_additional


In [15]:
# data present when call not connected or no availability
nodata_rows = survey[(survey['call_connected']==False) | (survey['availability']==False)]
nodata_rows[pd.notnull(nodata_rows[nodata_rows.columns.difference(['ID', 'time_created', 'time_modified', 'surveyer',
                                                                  'number_id', 'call_connected', 
                                                                  'availability', 'end_call_status'])]).all(axis=1)]

Unnamed: 0,ID,time_created,time_modified,surveyer,number_id,call_connected,availability,state,sub_district,end_call_status,district,need_meds,comments_health,have_food,comments_food,received_mgnrega,received_pm_kisan,received_ujjwala,received_jan_dhan,received_old_age_widow,received_registered_labor,received_free_ration,received_not_eligible,received_none,eligible_mgnrega,eligible_pm_kisan,eligible_ujjwala,eligible_jan_dhan,eligible_old_age_widow,eligible_registered_labor,eligible_bpl_ration,eligible_general_ration,eligible_none,employment_status,comments_benefits,need_to_revert,comments_additional
8,5e8dbeb3f0d65d001aac7393,2020-04-08 17:38:00,2020-04-08 17:38:00,indusActionStaffVolunteer,510939.0,False,,Rajasthan,N A,wrongNumber,Ajmer,False,,True,,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,True,unemployed,,False,
218,5e8d85e3f0d65d001aac590f,2020-04-08 13:35:00,2020-04-08 13:35:00,indusActionStaffVolunteer,327265.0,True,False,Chhattisgarh,G P Jhalmala,callDone,Balod,False,The family consist four members; it includes t...,True,They having ration but to purchase vegetable a...,False,False,False,False,False,False,True,False,False,True,False,True,True,False,False,True,False,False,unemployedPostLockdown,"By professional, he is working as construction...",False,"PDS scheme is functional, government need to s..."
336,5e8c7d1ef0d65d001aac4cee,2020-04-07 18:46:00,2020-04-07 18:46:00,indusActionStaffVolunteer,546461.0,False,,Uttar Pradesh,Na,callDone,Kanpur Nagar,False,No issue.,True,Sufficient food.,False,False,True,True,False,False,True,True,False,False,False,False,True,False,False,False,False,False,employedFormalSector,No issue with the job.,False,He is doing well and take care of himself.
399,5e8c6cf3f0d65d001aac469f,2020-04-07 17:37:00,2020-04-07 17:37:00,indusActionStaffVolunteer,,False,,Rajasthan,N A,notRespondingSwitchedOff,Ajmer,False,,True,,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,True,unemployed,,False,
401,5e8c6c72f0d65d001aac4694,2020-04-07 17:35:00,2020-04-07 17:35:00,indusActionStaffVolunteer,,False,,Rajasthan,N A,wrongNumber,Ajmer,False,,True,,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,True,unemployed,,False,


In [16]:
# call not connected but available
nodata_rows[(nodata_rows['call_connected']==False) & (nodata_rows['availability']==True)]

Unnamed: 0,ID,time_created,time_modified,surveyer,number_id,call_connected,availability,state,sub_district,end_call_status,district,need_meds,comments_health,have_food,comments_food,received_mgnrega,received_pm_kisan,received_ujjwala,received_jan_dhan,received_old_age_widow,received_registered_labor,received_free_ration,received_not_eligible,received_none,eligible_mgnrega,eligible_pm_kisan,eligible_ujjwala,eligible_jan_dhan,eligible_old_age_widow,eligible_registered_labor,eligible_bpl_ration,eligible_general_ration,eligible_none,employment_status,comments_benefits,need_to_revert,comments_additional


In [17]:
# removing duplicates
survey.drop_duplicates(inplace=True)
survey.drop_duplicates(subset=['ID'], inplace=True)

In [18]:
# keeping only available
survey = survey[survey['call_connected'] == True]
survey = survey[survey['availability'] == True]

In [19]:
# state not entered
survey[survey['state'].isnull()]

Unnamed: 0,ID,time_created,time_modified,surveyer,number_id,call_connected,availability,state,sub_district,end_call_status,district,need_meds,comments_health,have_food,comments_food,received_mgnrega,received_pm_kisan,received_ujjwala,received_jan_dhan,received_old_age_widow,received_registered_labor,received_free_ration,received_not_eligible,received_none,eligible_mgnrega,eligible_pm_kisan,eligible_ujjwala,eligible_jan_dhan,eligible_old_age_widow,eligible_registered_labor,eligible_bpl_ration,eligible_general_ration,eligible_none,employment_status,comments_benefits,need_to_revert,comments_additional


In [20]:
# district not entered
survey[survey['district'].isnull()]

Unnamed: 0,ID,time_created,time_modified,surveyer,number_id,call_connected,availability,state,sub_district,end_call_status,district,need_meds,comments_health,have_food,comments_food,received_mgnrega,received_pm_kisan,received_ujjwala,received_jan_dhan,received_old_age_widow,received_registered_labor,received_free_ration,received_not_eligible,received_none,eligible_mgnrega,eligible_pm_kisan,eligible_ujjwala,eligible_jan_dhan,eligible_old_age_widow,eligible_registered_labor,eligible_bpl_ration,eligible_general_ration,eligible_none,employment_status,comments_benefits,need_to_revert,comments_additional


In [21]:
# removing rows with no state entry
survey = survey[survey['state'].notnull()]

In [22]:
# 3.2 not eligible true but 3.1 not eligible false
survey[(survey['eligible_none']==True) & (survey['received_not_eligible']==False)]

Unnamed: 0,ID,time_created,time_modified,surveyer,number_id,call_connected,availability,state,sub_district,end_call_status,district,need_meds,comments_health,have_food,comments_food,received_mgnrega,received_pm_kisan,received_ujjwala,received_jan_dhan,received_old_age_widow,received_registered_labor,received_free_ration,received_not_eligible,received_none,eligible_mgnrega,eligible_pm_kisan,eligible_ujjwala,eligible_jan_dhan,eligible_old_age_widow,eligible_registered_labor,eligible_bpl_ration,eligible_general_ration,eligible_none,employment_status,comments_benefits,need_to_revert,comments_additional
1,5e8dbff4f0d65d001aac73ba,2020-04-08 17:43:00,2020-04-08 17:43:00,indusActionStaffVolunteer,370921.0,True,True,Rajasthan,N A,callDone,Jaipur,False,No need,True,No need,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,True,unemployed,,False,He works in a private job in Jaipur but curren...
3,5e8dbfa7f0d65d001aac73b1,2020-04-08 17:42:00,2020-04-08 17:42:00,indusActionStaffVolunteer,753355.0,True,True,Uttar Pradesh,Naini A D A,callDone,Prayagraj,False,currently no any issue,True,filhal koi samsya ni hai .,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,True,unemployedPostLockdown,banaras ke rehne wale hain .3 saal pehle prayg...,False,ola me driver the abhi berojgar hai. future ko...
7,5e8dbf17f0d65d001aac739e,2020-04-08 17:39:00,2020-04-08 17:39:00,indusActionStaffVolunteer,362007.0,True,True,Delhi,Not Specified,callDone,East Delhi,True,A member requires a medical attention but the ...,True,,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,True,unemployed,,False,
23,5e8dbba2f0d65d001aac734d,2020-04-08 17:25:00,2020-04-08 17:25:00,indusActionStaffVolunteer,373701.0,True,True,Bihar,Chhapra,callDone,Saran,True,,True,,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,True,employedFormalSector,,False,
24,5e8dbb7df0d65d001aac7349,2020-04-08 17:24:00,2020-04-08 17:24:00,indusActionStaffVolunteer,857808.0,True,True,Uttar Pradesh,Kalyanpur,callDone,Lucknow,False,swasth sambdhi koi samsya ni hai,False,bijli ka chota mota kaam krte the lock down ke...,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,True,unemployedPostLockdown,inke pas ration card nahi hai .inke pita ji ka...,False,bijli ka chota mota kaam krte the lock down ke...
65,5e8db388f0d65d001aac6c04,2020-04-08 16:50:00,2020-04-08 16:50:00,indusActionStaffVolunteer,224916.0,True,True,Rajasthan,N A,callDone,Jaipur,False,No comments,True,No comments,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,True,employedInformalSector,No comments,False,No comments
73,5e8db106f0d65d001aac6b28,2020-04-08 16:39:00,2020-04-08 16:39:00,indusActionStaffVolunteer,604257.0,True,True,Maharashtra,Govandi,callDone,Akola,False,He said that they are not in need of health se...,True,Izhar said that although he and his wife are c...,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,True,unemployedPostLockdown,He said that there are many in his area who ar...,True,More information regarding essentials in govan...
75,5e8db03cf0d65d001aac6a9a,2020-04-08 16:36:00,2020-04-08 16:36:00,indusActionStaffVolunteer,156313.0,True,True,Uttar Pradesh,N A,callDone,Shamli,False,,True,,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,True,unemployed,No comments,True,No comments
79,5e8daff6f0d65d001aac6a89,2020-04-08 16:35:00,2020-04-08 16:35:00,indusActionStaffVolunteer,401683.0,True,True,Rajasthan,N A,callDone,Bhilwara,False,,False,"2-4 days food available,",False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,True,unemployedPostLockdown,If there was a survey before the lock-down it ...,False,No comments
92,5e8dad30f0d65d001aac6a2c,2020-04-08 16:23:00,2020-04-08 16:23:00,solveNinjaReapBenefitStaffVolunteer,833573.0,True,True,Chhattisgarh,Sakhara,callDone,Balod,False,Nil,True,Nil,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,True,unemployed,Nil,False,Nil


In [23]:
# bpl and general ration
survey[(survey['eligible_bpl_ration']==True) & (survey['eligible_general_ration']==True)]

Unnamed: 0,ID,time_created,time_modified,surveyer,number_id,call_connected,availability,state,sub_district,end_call_status,district,need_meds,comments_health,have_food,comments_food,received_mgnrega,received_pm_kisan,received_ujjwala,received_jan_dhan,received_old_age_widow,received_registered_labor,received_free_ration,received_not_eligible,received_none,eligible_mgnrega,eligible_pm_kisan,eligible_ujjwala,eligible_jan_dhan,eligible_old_age_widow,eligible_registered_labor,eligible_bpl_ration,eligible_general_ration,eligible_none,employment_status,comments_benefits,need_to_revert,comments_additional
101,5e8dab58f0d65d001aac6a03,2020-04-08 16:15:00,2020-04-08 16:15:00,indusActionStaffVolunteer,754674.0,True,True,Rajasthan,Shastri Nagar,callDone,Jaipur,False,,False,,False,False,False,False,False,False,False,False,True,True,False,True,True,False,False,True,True,False,employedInformalSector,,False,"Daily wage worker, did not know much about ser..."
106,5e8daa92f0d65d001aac69ee,2020-04-08 16:12:00,2020-04-08 16:12:00,solveNinjaReapBenefitStaffVolunteer,812495.0,True,True,Chhattisgarh,Sakhara,callAgainFollowUp,Balod,False,Nil,False,Only rice that also at the cost of 1 rs. And n...,False,False,False,False,False,False,True,False,False,True,False,True,False,False,False,True,True,False,employedInformalSector,Nil,True,The caller was aware of the MGNREGA scheme but...
112,5e8da990f0d65d001aac6875,2020-04-08 16:08:00,2020-04-08 16:08:00,indusActionStaffVolunteer,607727.0,True,True,Rajasthan,312024,callDone,Udaipur,False,,False,Will have problems after 10 day,False,False,False,False,False,False,False,False,True,False,False,False,True,False,True,True,True,False,employedInformalSector,,True,
172,5e8d9996f0d65d001aac5f88,2020-04-08 14:59:00,2020-04-08 14:59:00,indusActionStaffVolunteer,905897.0,True,True,Maharashtra,Wakad,callDone,Pune,False,All fine,True,All fine,False,False,False,False,False,False,False,False,True,False,False,False,False,True,False,True,True,False,employedFormalSector,,False,"Rahul Pawar, was living in Wakad, Pune but is ..."
392,5e8c6e45f0d65d001aac46c2,2020-04-07 17:42:00,2020-04-07 17:42:00,indusActionStaffVolunteer,,True,True,Rajasthan,N A,callDone,Bikaner,False,,True,,True,False,False,False,False,False,True,False,False,True,False,False,False,False,False,True,True,False,unemployedPostLockdown,He is a farmer but doesn't have PM Kisan accou...,False,He is a farmer but doesn't have PM Kisan accou...
458,5e8c6030f0d65d001aac417f,2020-04-07 16:42:00,2020-04-07 16:42:00,indusActionStaffVolunteer,915304.0,True,True,Maharashtra,"Sanatacruz, Bus Stand",callAgainFollowUp,Mumbai City,False,No,False,No food to eat. No government food has been re...,False,False,False,False,False,False,False,False,True,True,False,False,False,False,False,True,True,True,employedInformalSector,Ration has given that also with additional cos...,True,His brother-in-law has committed suicide.\nPol...
482,5e8c5833f0d65d001aac4037,2020-04-07 16:08:00,2020-04-07 16:08:00,indusActionStaffVolunteer,615981.0,True,True,Uttar Pradesh,Sandauli,callDone,Barabanki,False,-,True,-,False,False,False,True,False,False,False,False,False,False,True,True,True,False,False,True,True,False,unemployed,Farmer,False,-
491,5e8c55ccf0d65d001aac3e9d,2020-04-07 15:58:00,2020-04-07 15:58:00,indusActionStaffVolunteer,662950.0,True,True,Rajasthan,Bandikui,callDone,Dausa,False,"All good , no problems",False,"All good, home is well furnished",True,True,True,True,False,True,True,False,False,True,False,True,True,False,True,True,True,False,unemployedPostLockdown,,False,
494,5e8c54e3f0d65d001aac3e6b,2020-04-07 15:54:00,2020-04-07 15:54:00,indusActionStaffVolunteer,619473.0,True,True,Telangana,E C I L,callDone,Hyderabad,False,gave them emergency numbers to get assistance,True,had ration card and had no problem,False,False,False,False,False,False,True,False,False,False,True,False,False,False,False,True,True,False,employedInformalSector,was aware about his benefits,False,"the family was of 2, wife and husband had no p..."
495,5e8c54e2f0d65d001aac3e68,2020-04-07 15:54:00,2020-04-07 15:54:00,indusActionStaffVolunteer,619473.0,True,True,Telangana,E C I L,callDone,Hyderabad,False,gave them emergency numbers to get assistance,True,had ration card and had no problem,False,False,False,False,False,False,True,False,False,False,True,False,False,False,False,True,True,False,employedInformalSector,was aware about his benefits,False,"the family was of 2, wife and husband had no p..."


In [24]:
# overall eligibility vs scheme wise eligibility doesn't match
survey[(survey['eligible_none']==True) & (survey[['received_mgnrega', 'received_pm_kisan',
       'received_ujjwala', 'received_jan_dhan', 'received_old_age_widow',
       'received_registered_labor', 'received_free_ration',
       'eligible_mgnrega', 'eligible_pm_kisan', 'eligible_ujjwala', 'eligible_jan_dhan',
       'eligible_old_age_widow', 'eligible_registered_labor',
       'eligible_bpl_ration', 'eligible_general_ration']].isin([True]).any(axis=1))]

Unnamed: 0,ID,time_created,time_modified,surveyer,number_id,call_connected,availability,state,sub_district,end_call_status,district,need_meds,comments_health,have_food,comments_food,received_mgnrega,received_pm_kisan,received_ujjwala,received_jan_dhan,received_old_age_widow,received_registered_labor,received_free_ration,received_not_eligible,received_none,eligible_mgnrega,eligible_pm_kisan,eligible_ujjwala,eligible_jan_dhan,eligible_old_age_widow,eligible_registered_labor,eligible_bpl_ration,eligible_general_ration,eligible_none,employment_status,comments_benefits,need_to_revert,comments_additional
107,5e8daa76f0d65d001aac69eb,2020-04-08 16:11:00,2020-04-08 16:11:00,indusActionStaffVolunteer,659303,True,True,Madhya Pradesh,Thencha,callAgainFollowUp,Barwani,True,A kid in the house has TB. Will have to check ...,True,Has food for now. Has ration card too. Gave hi...,False,False,False,False,False,False,False,False,True,False,False,True,False,False,False,False,True,True,unemployedPostLockdown,,True,F/o Jandhan account
119,5e8da7e4f0d65d001aac6853,2020-04-08 16:01:00,2020-04-08 16:01:00,indusActionStaffVolunteer,889293,True,True,Rajasthan,Shastri Nagar,callDone,Jaipur,False,,False,,False,False,True,True,False,False,True,False,False,False,False,False,False,False,False,False,False,True,employedFormalSector,,False,
295,5e8d5bd5f0d65d001aac537f,2020-04-08 10:36:00,2020-04-08 10:36:00,indusActionStaffVolunteer,501318,True,True,Telangana,Uppaguda N R I Colony,callAgainFollowUp,Sangareddy,False,ఏం లేదు,True,ఎం లేదు,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,True,unemployed,They have no ration card,False,వారికి రేషన్ కార్డు లేదుfood kii ఇబ్బంది పడు...
320,5e8c8350f0d65d001aac4d34,2020-04-07 19:12:00,2020-04-07 19:12:00,indusActionStaffVolunteer,836660,True,True,Karnataka,Shakangar,callDone,Bangalore Rural,False,none,False,none,False,False,True,False,False,False,False,False,True,False,False,False,False,False,False,False,False,True,unemployedPostLockdown,none,False,the stakeholder (Mr Raju) is still in Bangalor...
321,5e8c8229f0d65d001aac4d30,2020-04-07 19:07:00,2020-04-07 19:07:00,indusActionStaffVolunteer,403316,True,True,Uttar Pradesh,Harnatha,callDone,Amethi,False,No need,True,Bought their ration,False,False,True,True,False,False,True,False,False,False,False,False,False,False,False,False,False,True,employedInformalSector,Educated abt the govt schemes.,False,na
326,5e8c80bbf0d65d001aac4d1c,2020-04-07 19:01:00,2020-04-07 19:01:00,indusActionStaffVolunteer,344830,True,True,Uttar Pradesh,Goaltowli.,callDone,Kanpur Dehat,False,No medical need.,False,Needs ration.,False,False,True,True,False,False,False,False,False,False,False,False,True,False,False,True,False,True,employedInformalSector,Agriculture,True,He needs a ration supply.\nLocated in Uttar Pr...
458,5e8c6030f0d65d001aac417f,2020-04-07 16:42:00,2020-04-07 16:42:00,indusActionStaffVolunteer,915304,True,True,Maharashtra,"Sanatacruz, Bus Stand",callAgainFollowUp,Mumbai City,False,No,False,No food to eat. No government food has been re...,False,False,False,False,False,False,False,False,True,True,False,False,False,False,False,True,True,True,employedInformalSector,Ration has given that also with additional cos...,True,His brother-in-law has committed suicide.\nPol...
566,5e8c4594f0d65d001aac32b3,2020-04-07 14:49:00,2020-04-07 14:49:00,indusActionStaffVolunteer,354182,True,True,Telangana,"Yellareddyguda, Malkajigiri",callDone,Hyderabad,False,Using medicine which is available near by medi...,False,"They were labour construction worker, migrated...",False,False,False,False,False,True,True,False,False,False,False,False,False,False,False,False,False,True,employedInformalSector,They not aware of govt benefits as migrant lab...,True,May be needed to follow up them. I have given ...
784,5e8b10f4f0d65d001aabf4d7,2020-04-06 16:52:00,2020-04-06 16:52:00,indusActionStaffVolunteer,919805,True,True,Haryana,Faridabad,callDone,Faridabad,False,Has Stocked up on all the necessary medicines.,True,All the essential shops are open,False,False,False,False,False,False,False,True,True,False,False,False,False,False,False,False,True,True,employedInformalSector,Haven't opted for any government schemes.,False,Well Informed about the current situation and ...


In [25]:
survey[pd.isnull(survey[survey.columns.difference(['number_id', 'comments_additional','comments_food', 
                                                   'comments_health', 'sub_district', 'comments_benefits'])]).any(axis=1)]

Unnamed: 0,ID,time_created,time_modified,surveyer,number_id,call_connected,availability,state,sub_district,end_call_status,district,need_meds,comments_health,have_food,comments_food,received_mgnrega,received_pm_kisan,received_ujjwala,received_jan_dhan,received_old_age_widow,received_registered_labor,received_free_ration,received_not_eligible,received_none,eligible_mgnrega,eligible_pm_kisan,eligible_ujjwala,eligible_jan_dhan,eligible_old_age_widow,eligible_registered_labor,eligible_bpl_ration,eligible_general_ration,eligible_none,employment_status,comments_benefits,need_to_revert,comments_additional


In [26]:
len(survey['time_created'].dt.strftime("%d/%m/%Y").unique())

3

## Data aggregation

In [27]:
def get_dashboard_row(df, agg_level, state, district):
    
    date = df['time_created'].max().strftime("%d/%m/%Y")
    num_days = len(df['time_created'].dt.strftime("%d/%m/%Y").unique())
    num_surveyed = len(df)
    
    num_unemployed = len(df[df['employment_status'] == 'unemployed'])
    perc_unemployed = num_unemployed / num_surveyed
    
    num_unemployed_post_lockdown = len(df[df['employment_status'] =='unemployedPostLockdown'])
    perc_unemployed_post_lockdown = num_unemployed_post_lockdown / num_surveyed
    
    num_eligible_jan_dhan = len(df[df['eligible_jan_dhan'] == True])
    perc_eligible_jan_dhan = num_eligible_jan_dhan / num_surveyed
    
    num_need_med = len(df[df['need_meds'] == True])
    perc_need_med = num_need_med / num_surveyed
    
    num_need_food = len(df[df['have_food'] == False])
    perc_need_food = num_need_food / num_surveyed

    perc_received_no_benefit = -999
    perc_received_mgnrega = -999
    perc_received_pm_kisan = -999
    perc_received_ujjwala = -999
    perc_received_jan_dhan = -999
    perc_received_old_age_widow_disability = -999
    perc_received_registered_labor = -999
    perc_received_free_ration = -999

    num_eligible_atleast_one = len(df[df['eligible_none'] == False])
    num_received_no_benefit = len(df[(df['received_none'] == True) & (df['eligible_none'] == False)])
    if (num_eligible_atleast_one > 0):
        perc_received_no_benefit = num_received_no_benefit / num_eligible_atleast_one

    num_eligible_mgnrega = len(df[df['eligible_mgnrega'] == True])
    num_received_mgnrega = len(df[df['received_mgnrega'] == True])
    if (num_eligible_mgnrega > 0):
        perc_received_mgnrega = num_received_mgnrega / num_eligible_mgnrega

    num_eligible_pm_kisan = len(df[df['eligible_pm_kisan'] == True])
    num_received_pm_kisan = len(df[df['received_pm_kisan'] == True])
    if (num_eligible_pm_kisan > 0):
        perc_received_pm_kisan = num_received_pm_kisan / num_eligible_pm_kisan

    num_eligible_ujjwala = len(df[df['eligible_ujjwala'] == True])
    num_received_ujjwala = len(df[df['received_ujjwala'] == True])
    if (num_eligible_ujjwala > 0):
        perc_received_ujjwala = num_received_ujjwala / num_eligible_ujjwala

    num_received_jan_dhan = len(df[df['received_jan_dhan'] == True])
    if (num_eligible_jan_dhan > 0):
        perc_received_jan_dhan = num_received_jan_dhan / num_eligible_jan_dhan

    num_eligible_old_age_widow_disability = len(df[df['eligible_old_age_widow'] == True])
    num_received_old_age_widow_disability = len(df[df['received_old_age_widow'] == True])
    if (num_eligible_old_age_widow_disability > 0):
        perc_received_old_age_widow_disability = num_received_old_age_widow_disability / num_eligible_old_age_widow_disability
    
    num_eligible_registered_labor = len(df[df['eligible_registered_labor'] == True])
    num_received_registered_labor = len(df[df['received_registered_labor'] == True])
    if (num_eligible_registered_labor > 0):
        perc_received_registered_labor = num_received_registered_labor / num_eligible_registered_labor

    num_eligible_free_ration = len(df[(df['eligible_bpl_ration'] == True) | (df['eligible_general_ration'] == True)])
    num_received_free_ration = len(df[df['received_free_ration'] == True])
    if (num_eligible_free_ration > 0):
        perc_received_free_ration = num_received_free_ration / num_eligible_free_ration

    return [agg_level, state, district, date, num_days, num_surveyed, 
            num_unemployed, perc_unemployed, num_unemployed_post_lockdown, perc_unemployed_post_lockdown,  
            num_eligible_jan_dhan, perc_eligible_jan_dhan, num_need_med, perc_need_med, 
            num_need_food, perc_need_food, 
            num_eligible_atleast_one, num_received_no_benefit, perc_received_no_benefit, 
            num_eligible_mgnrega, num_received_mgnrega, perc_received_mgnrega, 
            num_eligible_pm_kisan, num_received_pm_kisan, perc_received_pm_kisan,
            num_eligible_ujjwala, num_received_ujjwala, perc_received_ujjwala,
            num_received_jan_dhan, perc_received_jan_dhan,
            num_eligible_old_age_widow_disability, num_received_old_age_widow_disability, 
            perc_received_old_age_widow_disability,
            num_eligible_registered_labor, num_received_registered_labor, perc_received_registered_labor,
            num_eligible_free_ration, num_received_free_ration, perc_received_free_ration]

In [28]:
column_names = ['agg_level', 'state', 'district', 'date', 'num_days', 'num_surveyed', 
                'num_unemployed', 'perc_unemplyed', 'num_unemployed_post_lockdown', 'perc_unemployed_post_lockdown',
                'num_eligible_jan_dhan', 'perc_eligible_jan_dhan', 'num_need_med', 'perc_need_med', 
                'num_need_food', 'perc_need_food', 
                'num_eligible_atleast_one', 'num_received_no_benefit', 'perc_received_no_benefit', 
                'num_eligible_mgnrega', 'num_received_mgnrega', 'perc_received_mgnrega', 
                'num_eligible_pm_kisan', 'num_received_pm_kisan', 'perc_received_pm_kisan',
                'num_eligible_ujjwala', 'num_received_ujjwala', 'perc_received_ujjwala',
                'num_received_jan_dhan', 'perc_received_jan_dhan',
                'num_eligible_old_age_widow_disability', 'num_received_old_age_widow_disability', 
                'perc_received_old_age_widow_disability',
                'num_eligible_registered_labor', 'num_received_registered_labor', 'perc_received_registered_labor',
                'num_eligible_free_ration', 'num_received_free_ration', 'perc_received_free_ration']
dashboard = pd.DataFrame(columns = column_names)

In [29]:
states = survey['state'].unique().tolist()
states = [state for state in states if str(state) != 'nan']

In [30]:
for state in states:
    state_df = survey[survey['state'] == state]
    dashboard = dashboard.append(pd.Series(get_dashboard_row(state_df, 'State', state, -999),
                                           index=dashboard.columns ), ignore_index=True)
    districts = state_df['district'].unique().tolist()
    districts = [district for district in districts if str(district) != 'nan']
    for district in districts:
        dist_df = state_df[state_df['district'] == district]
        dashboard = dashboard.append(pd.Series(get_dashboard_row(dist_df, 'District', state, district),
                                               index=dashboard.columns ), ignore_index=True)

In [31]:
dashboard.to_csv('data/dashboard_08_04.csv', index=False)