In [1]:
from __future__ import print_function
import pickle
import os.path
from googleapiclient.discovery import build
from google_auth_oauthlib.flow import InstalledAppFlow
from google.auth.transport.requests import Request
import pandas as pd
import numpy as np
import re

pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

## Obtain data from Google spreadsheet

In [2]:
SCOPES = ['https://www.googleapis.com/auth/spreadsheets.readonly']

if os.path.exists('token.pickle'):
    with open('token.pickle', 'rb') as token:
        creds = pickle.load(token)
        
# If there are no (valid) credentials available, let the user log in.
if not creds or not creds.valid:
    if creds and creds.expired and creds.refresh_token:
        creds.refresh(Request())
    else:
        flow = InstalledAppFlow.from_client_secrets_file(
            'credentials.json', SCOPES)
        creds = flow.run_local_server(port=0)
    # Save the credentials for the next run
    with open('token.pickle', 'wb') as token:
        pickle.dump(creds, token)

service = build('sheets', 'v4', credentials=creds)

sheet = service.spreadsheets()

In [3]:
def gsheet2df(gsheet):
    """ Converts Google sheet data to a Pandas DataFrame.
    Note: This script assumes that your data contains a header file on the first row!
    Also note that the Google API returns 'none' from empty cells - in order for the code
    below to work, you'll need to make sure your sheet doesn't contain empty cells,
    or update the code to account for such instances.
    """
    header = gsheet.get('values', [])[0]   # Assumes first line is header!
    values = gsheet.get('values', [])[1:]  # Everything else is data.
    if not values:
        print('No data found.')
    else:
        all_data = []
        for col_id, col_name in enumerate(header):
            column_data = []
            for row in values:
                column_data.append(row[col_id])
            ds = pd.Series(data=column_data, name=col_name)
            all_data.append(ds)
        df = pd.concat(all_data, axis=1)
        return df

In [4]:
spreadsheet = '1EHTQy7nnT0JWgiWenlgxbGOpTN0jvc3ZWMSYVcb3reQ'
sheet_range = '12th April 2020!A:XX'

survey_responses = sheet.values().get(spreadsheetId=spreadsheet, range=sheet_range).execute()

In [5]:
survey = gsheet2df(survey_responses)

## Combine columns 

In [6]:
questions = ['NameoftheDistrict', '1_1', '1_2', '2_1', '2_2', '2_3', 
             '3_1.mgnrega', '3_1.pmKisan', '3_1.ujjwala', '3_1.janDhan', '3_1.oldAgeWidow', 
             '3_1.registeredLabour', '3_1.freeRation', '3_1.notEligible', '3_1.none',
             '3_2.mgnrega', '3_2.pmKisan', '3_2.ujjwala', '3_2.janDhan', '3_2.oldAgeWidow', 
             '3_2.registeredLabour', '3_2.bplRation', '3_2.generalRation', '3_2.none',
             '3_3', '3_4', '4_1', '4_2']

for question in questions:
    cols = [col for col in survey.columns if question in col] 
    if question == 'NameoftheDistrict':
        cols = cols + ['locationBihar']
    survey[question] = survey[cols].sum(1)
    survey = survey.drop(cols, 1)

cols = ['_2' , '4_2']
survey['4_2'] = survey[cols].sum(1)
survey = survey.drop(['_2'], 1)

## Clean strings

In [7]:
# Column names
survey = survey.rename(columns={"_id": "ID", "created": "time_created", "modified": "time_modified",
                                "volunteerType": "surveyer", 
                                "Number_ID": "number_id", "introductionCallConnected": "call_connected",
                                "introductionAvailability": "availability", 
                                "endCallStatusEndCallStatus": "end_call_status", 
                                "locationSelectState": "state", "NameoftheDistrict": "district", 
                                "locationNameofArea": "area_name", "1_1": "need_meds", "1_2": "comments_health",
                                "2_1": "have_food", "2_3": "comments_food", 
                                '3_2.mgnrega': "eligible_mgnrega",
                                '3_2.pmKisan': "eligible_pm_kisan", '3_2.ujjwala': "eligible_ujjwala", 
                                '3_2.janDhan': "eligible_jan_dhan", '3_2.oldAgeWidow': "eligible_old_age_widow",
                                '3_2.registeredLabour': "eligible_registered_labor", 
                                '3_2.bplRation': "eligible_bpl_ration", 
                                '3_2.generalRation': "eligible_general_ration",
                                '3_2.none': "eligible_none", 
                                '3_1.mgnrega': "received_mgnrega", 
                                '3_1.pmKisan': "received_pm_kisan", '3_1.ujjwala': "received_ujjwala", 
                                '3_1.janDhan': "received_jan_dhan", '3_1.oldAgeWidow': "received_old_age_widow", 
                                '3_1.registeredLabour': "received_registered_labor", 
                                '3_1.freeRation': "received_free_ration", '3_1.notEligible': "received_not_eligible", 
                                '3_1.none': "received_none", "3_4": "comments_benefits",
                                "3_3": "employment_status", 
                                "4_1": "need_to_revert",
                                "4_2": "comments_additional"})

In [8]:
# yes / no / true / false etc

binary_cols = ['call_connected', 'availability', 'need_meds', 'have_food', 'received_mgnrega', 'received_pm_kisan', 
                'received_ujjwala', 'received_jan_dhan', 'received_old_age_widow', 'received_registered_labor', 
                'received_free_ration', 'received_not_eligible', 'received_none', 'eligible_mgnrega',
                'eligible_pm_kisan', 'eligible_ujjwala', 'eligible_jan_dhan', 'eligible_old_age_widow', 
                'eligible_registered_labor', 'eligible_bpl_ration', 'eligible_general_ration', 'eligible_none',
                'need_to_revert']

survey[binary_cols] = survey[binary_cols].replace(to_replace=r'(?i)^(yes|true)', value=True, regex=True)
survey[binary_cols] = survey[binary_cols].replace(to_replace=r'(?i)^(no|false)', value=False, regex=True)

In [9]:
survey['time_created'] = pd.to_datetime(survey['time_created'], format='%Y-%m-%d %I:%M %p UTC')

In [10]:
# date columns
survey['time_created'] = pd.to_datetime(survey['time_created'], format='%Y-%m-%d %I:%M %p UTC')
survey['time_created'] = survey['time_created'].dt.tz_localize("UTC").dt.tz_convert('Asia/Kolkata').dt.tz_localize(None)
survey['time_modified'] = pd.to_datetime(survey['time_modified'], format='%Y-%m-%d %I:%M %p UTC')
survey['time_modified'] = survey['time_modified'].dt.tz_localize("UTC").dt.tz_convert('Asia/Kolkata').dt.tz_localize(None)


In [11]:
# state, district, area name
def capitalizeName(name):
    name = re.sub(r"(?i)village","", name)
    name = re.sub(r"(?<=\w)([A-Z])", r" \1", name).capitalize()
    name = name.replace("-", " ")
    name = name.strip()
    name = name.title()
    return name

survey[['state', 'district', 'area_name']] = survey[['state', 'district', 'area_name']].applymap(capitalizeName)

In [12]:
# employment status
survey['employment_status'] = survey['employment_status'].apply(lambda x: re.sub("([^\x00-\x7F])+","",x))

In [13]:
# NaNs
survey.replace(r'^\s*$', np.NaN, regex=True, inplace=True)
survey.dropna(axis=1, how="all", inplace=True)

## Other checks

In [14]:
# removing duplicates
survey.drop_duplicates(inplace=True)

In [15]:
# duplicated rows
duplicated = survey[survey.duplicated('ID')]
duplicated['issue'] = 'duplicated ID'

In [16]:
# removing ID duplicates 
survey.drop_duplicates(subset=['ID'], inplace=True)

In [17]:
# call not connected but available
available_issue = survey[(survey['call_connected']!=True) & (survey['availability']==True)]
available_issue['issue'] = 'call connected is false but shown available'

In [18]:
# data present when call not connected or no availability
nodata_rows = survey[(survey['call_connected']!=True) | (survey['availability']!=True)]
nodata_rows = nodata_rows[pd.notnull(nodata_rows[nodata_rows.columns.difference(['ID', 'time_created', 
                                                                                 'time_modified', 'surveyer',
                                                                                 'number_id', 'call_connected', 
                                                                                 'availability', 
                                                                                 'end_call_status'])]).all(axis=1)]
nodata_rows['issue'] = 'data present when call connected or availability is false'

In [19]:
# keeping only available
survey = survey[survey['call_connected'] == True]
survey = survey[survey['availability'] == True]

# adding those rows where data is present 
survey = pd.concat([survey, nodata_rows[nodata_rows.columns.difference(['issue'], sort=False)]])

In [20]:
# end call status is empty
end_empty = survey[pd.isnull(survey['end_call_status'])]
end_empty['issue'] = 'end call status is empty'

end_empty_data = end_empty[pd.notnull(end_empty[end_empty.columns.difference(['ID', 'time_created', 
                                                                                 'time_modified', 'surveyer',
                                                                                 'number_id', 'call_connected', 
                                                                                 'availability', 
                                                                                 'end_call_status'])]).all(axis=1)]

In [21]:
# removing rows with empty end call status but keeping the ones which have data
survey = survey[pd.notnull(survey['end_call_status'])]
survey = pd.concat([survey, end_empty_data[end_empty_data.columns.difference(['issue'], sort=False)]])

In [22]:
# end call status is incomplete
call_incomplete = survey[survey['end_call_status']=='incompleteCall'].copy()
call_incomplete['issue'] = 'end call status - incomplete call'

In [23]:
# removing incomplete calls
survey = survey[survey['end_call_status']!='incompleteCall']

In [24]:
# state not entered
state_blank = survey[survey['state'].isnull()]
state_blank['issue'] = 'state not entered'

In [25]:
# removing rows with no state entry
survey = survey[survey['state'].notnull()]

In [26]:
# district not entered
district_blank = survey[survey['district'].isnull()]
district_blank['issue'] = 'district not entered'

In [27]:
# 3.2 not eligible true but 3.1 not eligible false
eligibility_none = survey[(survey['eligible_none']==True) & (survey['received_not_eligible']==False)].copy()
eligibility_none['issue'] = 'eligibility issue - schemewise vs overall, check both 3.2 and 3.1'

In [28]:
# removing the rows with 3.2 not eligible true but 3.1 not eligible false
survey = survey[(survey['eligible_none']==False) | (survey['received_not_eligible']==True)]

In [29]:
# bpl and general ration
bpl_general_ration = survey[(survey['eligible_bpl_ration']==True) & (survey['eligible_general_ration']==True)].copy()
bpl_general_ration['issue'] = 'both bpl and general ration eligibility'

In [30]:
# removing the rows with both bpl and general ration eligibility 
# survey = survey[(survey['eligible_bpl_ration']==False) | (survey['eligible_general_ration']==False)]

In [31]:
# overall eligibility vs scheme wise eligibility doesn't match
eligibility_overall = survey[(survey['eligible_none']==True) & (survey[['received_mgnrega', 'received_pm_kisan',
                               'received_ujjwala', 'received_jan_dhan', 'received_old_age_widow',
                               'received_registered_labor', 'received_free_ration',
                               'eligible_mgnrega', 'eligible_pm_kisan', 'eligible_ujjwala', 'eligible_jan_dhan',
                               'eligible_old_age_widow', 'eligible_registered_labor',
                               'eligible_bpl_ration', 'eligible_general_ration']].isin([True]).any(axis=1))].copy()
eligibility_overall['issue'] = 'eligibility issue - schemewise vs overall, check both 3.2 and 3.1'

In [32]:
# removing the rows where overall eligibility vs scheme wise eligibility doesn't match
survey = survey[(survey['eligible_none']==False) | (survey[['received_mgnrega', 'received_pm_kisan',
                       'received_ujjwala', 'received_jan_dhan', 'received_old_age_widow',
                       'received_registered_labor', 'received_free_ration',
                       'eligible_mgnrega', 'eligible_pm_kisan', 'eligible_ujjwala', 'eligible_jan_dhan',
                       'eligible_old_age_widow', 'eligible_registered_labor',
                       'eligible_bpl_ration', 'eligible_general_ration']].isin([False]).any(axis=1))]

In [33]:
# missing data
missing = survey[pd.isnull(survey[survey.columns.difference(['number_id', 'comments_additional','comments_food', 
                                                             'comments_health', 'area_name', 
                                                             'comments_benefits'])]).any(axis=1)].copy()
missing['issue'] = 'data in some cells missing'

In [34]:
# removing the rows with missing cells
survey = survey[pd.notnull(survey[survey.columns.difference(['number_id', 'comments_additional','comments_food', 
                                                             'comments_health', 'area_name', 
                                                             'comments_benefits'])]).all(axis=1)]

In [35]:
# number ID missing
num_id_missing = survey[pd.isnull(survey['number_id'])].copy()
num_id_missing['issue'] = 'number ID missing'

In [36]:
# received vs eligible mismatch
mgnrega = survey[(survey['received_mgnrega']==True) & (survey['eligible_mgnrega']==False)].copy()
mgnrega['issue'] = 'mgnrega received is true but mgnrega eligible is false'

pm_kisan = survey[(survey['received_pm_kisan']==True) & (survey['eligible_pm_kisan']==False)].copy()
pm_kisan['issue'] = 'pm kisan received is true but pm kisan eligible is false'

ujjwala = survey[(survey['received_ujjwala']==True) & (survey['eligible_ujjwala']==False)].copy()
ujjwala['issue'] = 'ujjwala received is true but ujjwala eligible is false'

jan_dhan = survey[(survey['received_jan_dhan']==True) & (survey['eligible_jan_dhan']==False)].copy()
jan_dhan['issue'] = 'jan dhan received is true but jan dhan eligible is false'

old_age = survey[(survey['received_old_age_widow']==True) & (survey['eligible_old_age_widow']==False)].copy()
old_age['issue'] = 'old age disability received is true but old age disability a eligible is false'

reg_labor = survey[(survey['received_registered_labor']==True) & (survey['eligible_registered_labor']==False)].copy()
reg_labor['issue'] = 'registered labor is true but registered labor eligible is false'

ration = survey[(survey['received_free_ration']==True) & (survey[['eligible_bpl_ration', 'eligible_general_ration']].isin([False]).all(axis=1))].copy()
ration['issue'] = 'ration received is true but ration eligible is false'


In [37]:
# if scheme received is True and scheme eligible is False, marking scheme eligible as True
survey.loc[(survey['received_mgnrega']==True) & (survey['eligible_mgnrega']==False),'eligible_mgnrega'] = True
survey.loc[(survey['received_pm_kisan']==True) & (survey['eligible_pm_kisan']==False),'eligible_pm_kisan'] = True
survey.loc[(survey['received_ujjwala']==True) & (survey['eligible_ujjwala']==False),'eligible_ujjwala'] = True
survey.loc[(survey['received_jan_dhan']==True) & (survey['eligible_jan_dhan']==False),'eligible_jan_dhan'] = True
survey.loc[(survey['received_old_age_widow']==True) & (survey['eligible_old_age_widow']==False),'eligible_old_age_widow'] = True
survey.loc[(survey['received_registered_labor']==True) & (survey['eligible_registered_labor']==False),'eligible_registered_labor'] = True
survey.loc[(survey['received_free_ration']==True) & (survey[['eligible_bpl_ration', 'eligible_general_ration']].isin([False]).all(axis=1)),'eligible_general_ration'] = True


In [38]:
#received none true when at least one benefit has been received 
received_none = survey[(survey['received_none']==True) & (survey[['received_mgnrega', 'received_pm_kisan', 
                               'received_ujjwala', 'received_jan_dhan', 'received_old_age_widow', 
                               'received_registered_labor', 'received_free_ration']].isin([True]).any(axis=1))].copy()

received_none['issue'] = 'received none is true when at least one benefit has been received'

In [39]:
# if survey received none is True and any of the benefits received is also True, marking survey received none as False
survey.loc[(survey['received_none']==True) & (survey[['received_mgnrega', 'received_pm_kisan', 'received_ujjwala', 
                   'received_jan_dhan', 'received_old_age_widow', 'received_registered_labor', 
                   'received_free_ration']].isin([True]).any(axis=1)), 'received_none'] = False
                                          

## Exporting rows with issues

In [40]:
issues = pd.concat([duplicated, available_issue, nodata_rows, end_empty,
                    call_incomplete, state_blank, district_blank, eligibility_none,
                    bpl_general_ration, eligibility_overall, missing, num_id_missing,
                    mgnrega, pm_kisan, ujjwala, jan_dhan, old_age, reg_labor,
                    ration, received_none]).reset_index(drop=True)

issues.drop_duplicates(subset=issues.columns.difference(['issue']), inplace=True)

cols = ["issue", "ID", "time_created", "time_modified", "surveyer", "number_id", "call_connected", "availability", 
        "end_call_status", "state", "district", "area_name", "need_meds", "comments_health","have_food", 
        "comments_food", "eligible_mgnrega", "eligible_pm_kisan", "eligible_ujjwala", "eligible_jan_dhan", 
        "eligible_old_age_widow", "eligible_registered_labor", "eligible_bpl_ration", "eligible_general_ration",
        "eligible_none", "received_mgnrega", "received_pm_kisan", "received_ujjwala", "received_jan_dhan", 
        "received_old_age_widow", "received_registered_labor", "received_free_ration", "received_not_eligible", 
        "received_none", "comments_benefits", "employment_status",  "need_to_revert", "comments_additional"]

issues = issues[cols]

issues.to_csv('data/issues_12_04.csv', index=False)

## Exporting clean data (which is also used for the analysis in the rest of this notebook)

In [41]:
cols = ["ID", "time_created", "time_modified", "surveyer", "number_id", "call_connected", "availability", 
        "end_call_status", "state", "district", "area_name", "need_meds", "comments_health","have_food", 
        "comments_food", "eligible_mgnrega", "eligible_pm_kisan", "eligible_ujjwala", "eligible_jan_dhan", 
        "eligible_old_age_widow", "eligible_registered_labor", "eligible_bpl_ration", "eligible_general_ration",
        "eligible_none", "received_mgnrega", "received_pm_kisan", "received_ujjwala", "received_jan_dhan", 
        "received_old_age_widow", "received_registered_labor", "received_free_ration", "received_not_eligible", 
        "received_none", "comments_benefits", "employment_status",  "need_to_revert", "comments_additional"]

survey = survey[cols]

survey.to_csv('data/clean_data_12_04.csv', index=False)

## Data aggregation

In [42]:
def get_dashboard_row(df, agg_level, state, district, full=True):
    
    date = df['time_created'].max().strftime("%d/%m/%Y")
    num_days = len(df['time_created'].dt.strftime("%d/%m/%Y").unique())
    num_surveyed = len(df)
    
    num_unemployed = len(df[df['employment_status'] == 'unemployed'])
    perc_unemployed = num_unemployed / num_surveyed
    
    num_unemployed_post_lockdown = len(df[df['employment_status'] =='unemployedPostLockdown'])
    perc_unemployed_post_lockdown = num_unemployed_post_lockdown / num_surveyed
    
    num_eligible_jan_dhan = len(df[df['eligible_jan_dhan'] == True])
    perc_eligible_jan_dhan = num_eligible_jan_dhan / num_surveyed
    
    num_need_med = len(df[df['need_meds'] == True])
    perc_need_med = num_need_med / num_surveyed
    
    num_need_food = len(df[df['have_food'] == False])
    perc_need_food = num_need_food / num_surveyed

    perc_received_no_benefit = -999
    perc_received_mgnrega = -999
    perc_received_pm_kisan = -999
    perc_received_ujjwala = -999
    perc_received_jan_dhan = -999
    perc_received_old_age_widow_disability = -999
    perc_received_registered_labor = -999
    perc_received_free_ration = -999

    num_eligible_atleast_one = len(df[df['eligible_none'] == False])
    num_received_no_benefit = len(df[(df['received_none'] == True) & (df['eligible_none'] == False)])
    if (num_eligible_atleast_one > 0):
        perc_received_no_benefit = num_received_no_benefit / num_eligible_atleast_one

    num_eligible_mgnrega = len(df[df['eligible_mgnrega'] == True])
    num_received_mgnrega = len(df[df['received_mgnrega'] == True])
    if (num_eligible_mgnrega > 0):
        perc_received_mgnrega = num_received_mgnrega / num_eligible_mgnrega

    num_eligible_pm_kisan = len(df[df['eligible_pm_kisan'] == True])
    num_received_pm_kisan = len(df[df['received_pm_kisan'] == True])
    if (num_eligible_pm_kisan > 0):
        perc_received_pm_kisan = num_received_pm_kisan / num_eligible_pm_kisan

    num_eligible_ujjwala = len(df[df['eligible_ujjwala'] == True])
    num_received_ujjwala = len(df[df['received_ujjwala'] == True])
    if (num_eligible_ujjwala > 0):
        perc_received_ujjwala = num_received_ujjwala / num_eligible_ujjwala

    num_received_jan_dhan = len(df[df['received_jan_dhan'] == True])
    if (num_eligible_jan_dhan > 0):
        perc_received_jan_dhan = num_received_jan_dhan / num_eligible_jan_dhan

    num_eligible_old_age_widow_disability = len(df[df['eligible_old_age_widow'] == True])
    num_received_old_age_widow_disability = len(df[df['received_old_age_widow'] == True])
    if (num_eligible_old_age_widow_disability > 0):
        perc_received_old_age_widow_disability = num_received_old_age_widow_disability / num_eligible_old_age_widow_disability
    
    num_eligible_registered_labor = len(df[df['eligible_registered_labor'] == True])
    num_received_registered_labor = len(df[df['received_registered_labor'] == True])
    if (num_eligible_registered_labor > 0):
        perc_received_registered_labor = num_received_registered_labor / num_eligible_registered_labor

    num_eligible_free_ration = len(df[(df['eligible_bpl_ration'] == True) | (df['eligible_general_ration'] == True)])
    num_received_free_ration = len(df[df['received_free_ration'] == True])
    if (num_eligible_free_ration > 0):
        perc_received_free_ration = num_received_free_ration / num_eligible_free_ration

    if full: 
        return [agg_level, state, district, date, num_days, num_surveyed, 
                num_unemployed, perc_unemployed, num_unemployed_post_lockdown, perc_unemployed_post_lockdown,  
                num_eligible_jan_dhan, perc_eligible_jan_dhan, num_need_med, perc_need_med, 
                num_need_food, perc_need_food, 
                num_eligible_atleast_one, num_received_no_benefit, perc_received_no_benefit, 
                num_eligible_mgnrega, num_received_mgnrega, perc_received_mgnrega, 
                num_eligible_pm_kisan, num_received_pm_kisan, perc_received_pm_kisan,
                num_eligible_ujjwala, num_received_ujjwala, perc_received_ujjwala,
                num_received_jan_dhan, perc_received_jan_dhan,
                num_eligible_old_age_widow_disability, num_received_old_age_widow_disability, 
                perc_received_old_age_widow_disability,
                num_eligible_registered_labor, num_received_registered_labor, perc_received_registered_labor,
                num_eligible_free_ration, num_received_free_ration, perc_received_free_ration]
    
    return [agg_level, state, district, date, num_days, num_surveyed, perc_unemployed, perc_unemployed_post_lockdown,  
            perc_eligible_jan_dhan, perc_need_med, perc_need_food, perc_received_no_benefit, 
            perc_received_mgnrega, perc_received_pm_kisan, perc_received_ujjwala, perc_received_jan_dhan,
            perc_received_old_age_widow_disability, perc_received_registered_labor, perc_received_free_ration]


## Exporting dashboard with both counts and percentages

In [43]:
column_names = ['agg_level', 'state', 'district', 'date', 'num_days', 'num_surveyed', 
                'num_unemployed', 'perc_unemployed', 'num_unemployed_post_lockdown', 'perc_unemployed_post_lockdown',
                'num_eligible_jan_dhan', 'perc_eligible_jan_dhan', 'num_need_med', 'perc_need_med', 
                'num_need_food', 'perc_need_food', 
                'num_eligible_atleast_one', 'num_received_no_benefit', 'perc_received_no_benefit', 
                'num_eligible_mgnrega', 'num_received_mgnrega', 'perc_received_mgnrega', 
                'num_eligible_pm_kisan', 'num_received_pm_kisan', 'perc_received_pm_kisan',
                'num_eligible_ujjwala', 'num_received_ujjwala', 'perc_received_ujjwala',
                'num_received_jan_dhan', 'perc_received_jan_dhan',
                'num_eligible_old_age_widow_disability', 'num_received_old_age_widow_disability', 
                'perc_received_old_age_widow_disability',
                'num_eligible_registered_labor', 'num_received_registered_labor', 'perc_received_registered_labor',
                'num_eligible_free_ration', 'num_received_free_ration', 'perc_received_free_ration']

full_dashboard = pd.DataFrame(columns = column_names)

In [44]:
states = survey['state'].unique().tolist()
states = [state for state in states if str(state) != 'nan']

In [45]:
full_dashboard = full_dashboard.append(pd.Series(get_dashboard_row(survey, 'Country', -999, -999),
                                                 index=full_dashboard.columns), ignore_index=True)
for state in states:
    state_df = survey[survey['state'] == state]
    full_dashboard = full_dashboard.append(pd.Series(get_dashboard_row(state_df, 'State', state, -999),
                                                     index=full_dashboard.columns), ignore_index=True)
    districts = state_df['district'].unique().tolist()
    districts = [district for district in districts if str(district) != 'nan']
    for district in districts:
        dist_df = state_df[state_df['district'] == district]
        full_dashboard = full_dashboard.append(pd.Series(get_dashboard_row(dist_df, 'District', state, district),
                                                         index=full_dashboard.columns), ignore_index=True)

In [46]:
full_dashboard.to_csv('data/full_dashboard_12_04.csv', index=False)

## Exporting dashboard with percentages only

In [47]:
column_names = ['agg_level', 'state', 'district', 'date', 'num_days', 'num_surveyed', 'perc_unemployed', 
                'perc_unemployed_post_lockdown', 'perc_eligible_jan_dhan', 'perc_need_med', 'perc_need_food', 
                'perc_received_no_benefit', 'perc_received_mgnrega', 'perc_received_pm_kisan',
                'perc_received_ujjwala', 'perc_received_jan_dhan', 'perc_received_old_age_widow_disability',
                'perc_received_registered_labor', 'perc_received_free_ration']

dashboard = pd.DataFrame(columns = column_names)

In [48]:
dashboard = dashboard.append(pd.Series(get_dashboard_row(survey, 'Country', -999, -999, False),
                                       index=dashboard.columns), ignore_index=True)
for state in states:
    state_df = survey[survey['state'] == state]
    dashboard = dashboard.append(pd.Series(get_dashboard_row(state_df, 'State', state, -999, False),
                                           index=dashboard.columns), ignore_index=True)
    districts = state_df['district'].unique().tolist()
    districts = [district for district in districts if str(district) != 'nan']
    for district in districts:
        dist_df = state_df[state_df['district'] == district]
        dashboard = dashboard.append(pd.Series(get_dashboard_row(dist_df, 'District', state, district, False),
                                               index=dashboard.columns), ignore_index=True)

In [49]:
dashboard.to_csv('data/dashboard_12_04.csv', index=False)

## Exporting qualitative responses

In [50]:
survey.to_csv('data/qualitative_12_04.csv', columns=['ID', 'number_id', 'time_created', 'surveyer', 
                                                     'state', 'district', 'comments_health', 
                                                     'comments_food', 'comments_benefits',
                                                     'comments_additional'], index=False)