In [101]:
%matplotlib inline

import matplotlib.pyplot as plt
import geopandas as gpd
import palettable as pltt
import seaborn as sns
from seaborn import palplot
import pandas as pd

In [108]:
# Data file paths
health_infra_dta_path = 'data/Hospital/ddl_health_infra_2011.dta'

# Read dta file in
health_infra_df= pd.read_stata(health_infra_dta_path)
# Display top of table
health_infra_df.head()

Unnamed: 0,pc11_district_id,pc11_district_name,pc11_state_id,pc11_state_name,count_2011_sub,count_2011_dish,count_2011_sth,count_2011_phc,count_2011_chc,pr_2011_phc,pr_2011_chc,pr_2011_sub,pr_2011_dish,pr_2011_sth
0,1,Kupwara,1,Jammu & Kashmir,157,1,0,32,6,0.344717,0.064634,1.691268,0.010772,0.0
1,2,Badgam,1,Jammu & Kashmir,139,1,0,56,8,0.696582,0.099512,1.729016,0.012439,0.0
2,3,LehLadakh,1,Jammu & Kashmir,122,1,0,15,3,1.053564,0.210713,8.568987,0.070238,0.0
3,4,Kargil,1,Jammu & Kashmir,118,1,0,16,4,1.065417,0.266354,7.857453,0.066589,0.0
4,5,Punch,1,Jammu & Kashmir,155,1,0,34,4,0.668528,0.07865,3.0477,0.019663,0.0


### Data Documentation

Health Infrastructure: These data were available on Government of India’s Open Government Data Platform (https://data.gov.in/catalog/all-india-health-centres-directory?filters%5Bfield_catalog_reference%5D=3786581&format=json&offset=0&limit=6&sort%5Bcreated%5D=desc) and reflect the distribution of health infrastructure on 7th October 2016.

We export these data into ArcGIS where we join these data with shapefiles for district boundaries in 2011 (census year) and 2019 (latest) to obtain the count of number of facilities of each type at the district level.  

Population data in 2016: To get the facilities per 10000 population, we merge these data with the population data in 2016. We use geocoded population data from Gridded Population of the World, Version 4 (GPWv4) hosted at the Socioeconomic Data and Applications Center (SEDAC) (https://sedac.ciesin.columbia.edu/theme/population). This database contains gridded population data at the sub-district level from the 2011 Census. We estimated aggregate population in 2016 using the population growth rates calculated from the US Census Bureau’s estimates for India (https://www.census.gov/data-tools/demo/idb/informationGateway.php).


|   Variable Name                   |             Definition                                                         |
|-----------------------------------|--------------------------------------------------------------------------------|
|pc11_district_id                   |2011 Census District ID                                                         |
|pc11_district_name                 |2011 Census District name                                                       |
|pc11_state_id                      |2011 Census State ID                                                            |
|pc11_state_name                    |2011 Census State Name                                                          |
|count_2011_sub                     |Number of sub-centres in the district in 2016                                   |
|count_2011_dish                    |Number of district hospitals in the district in 2016                            |
|count_2011_sth                     |Number of sub-district/taluk hospitals in the district in 2016                  |
|count_2011_phc                     |Number of primary health centers in the district in 2016                        |
|count_2011_chc                     |Number of community health centers in the district in 2016                      | 
|pr_2011_phc                        |Number of primary health centers per 10000 people in the district in 2016       |
|pr_2011_chc                        |Number of community health centers per 10000 people in the district in 2016     |
|pr_2011_sub                        |Number of sub-centres per 10000 people in the district in 2016                  |
|pr_2011_dish                       |Number of district hospitals per 10000 people in the district in 2016           |
|pr_2011_sth                        |Number of sub-district/taluk hospitals per 10000 people in the district in 2016 |


In [94]:
# General info
health_infra_df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 640 entries, 0 to 639
Data columns (total 9 columns):
 #   Column                    Non-Null Count  Dtype  
---  ------                    --------------  -----  
 0   district_id               640 non-null    object 
 1   district_name             640 non-null    object 
 2   state_id                  640 non-null    int8   
 3   state_name                640 non-null    object 
 4   primery_health_centers    640 non-null    float32
 5   community_health_centers  640 non-null    float32
 6   sub_centers               640 non-null    float32
 7   district_hospitals        640 non-null    float32
 8   sub_taluk_hospitals       640 non-null    float32
dtypes: float32(5), int8(1), object(3)
memory usage: 53.1+ KB


In [97]:
# number of NaN values per column
health_infra_df.isnull().sum()

pc11_district_id      0
pc11_district_name    0
pc11_state_id         0
pc11_state_name       0
count_2011_sub        0
count_2011_dish       0
count_2011_sth        0
count_2011_phc        0
count_2011_chc        0
pr_2011_phc           0
pr_2011_chc           0
pr_2011_sub           0
pr_2011_dish          0
pr_2011_sth           0
dtype: int64

In [98]:
health_infra_df.describe()

Unnamed: 0,pc11_state_id,count_2011_sub,count_2011_dish,count_2011_sth,count_2011_phc,count_2011_chc,pr_2011_phc,pr_2011_chc,pr_2011_sub,pr_2011_dish,pr_2011_sth
count,640.0,640.0,640.0,640.0,640.0,640.0,640.0,640.0,640.0,640.0,640.0
mean,17.114062,101.807812,1.445312,1.939062,45.971875,8.353125,0.297052,0.062479,0.698854,0.018468,0.011016
std,9.426486,183.125231,1.189473,2.984735,38.561243,6.691526,0.287464,0.099366,1.150004,0.057902,0.020868
min,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,9.0,0.0,1.0,0.0,19.0,3.0,0.159362,0.028572,0.0,0.004302,0.0
50%,18.0,2.0,1.0,1.0,37.0,7.0,0.240226,0.046316,0.008252,0.007258,0.003896
75%,24.0,150.0,2.0,3.0,61.25,12.0,0.351808,0.070428,1.196458,0.013279,0.013957
max,35.0,1513.0,11.0,17.0,254.0,36.0,4.455616,1.771778,10.396439,1.17139,0.290844


In [103]:
# Droping the count columns and district and state names columns
health_infra_df=health_infra_df.drop([ 'pc11_district_name', 'pc11_state_name', 'count_2011_sub', 'count_2011_dish', 
                                      'count_2011_sth', 'count_2011_phc', 'count_2011_chc'], axis=1
                                    )
# Display top of table
health_infra_df.head()

Unnamed: 0,pc11_district_id,pc11_state_id,pr_2011_phc,pr_2011_chc,pr_2011_sub,pr_2011_dish,pr_2011_sth
0,1,1,0.344717,0.064634,1.691268,0.010772,0.0
1,2,1,0.696582,0.099512,1.729016,0.012439,0.0
2,3,1,1.053564,0.210713,8.568987,0.070238,0.0
3,4,1,1.065417,0.266354,7.857453,0.066589,0.0
4,5,1,0.668528,0.07865,3.0477,0.019663,0.0


In [104]:
# renameing columns
health_infra_df=health_infra_df.rename(columns={'pr_2011_phc' : 'primery_health_centers',
                                                'pr_2011_chc' : 'community_health_centers',
                                                'pr_2011_sub' : 'sub_centers',
                                                'pr_2011_dish': 'district_hospitals',
                                                'pr_2011_sth' : 'sub_taluk_hospitals'
                                               }
                                      )
# Display top of table
health_infra_df.head()

Unnamed: 0,pc11_district_id,pc11_state_id,primery_health_centers,community_health_centers,sub_centers,district_hospitals,sub_taluk_hospitals
0,1,1,0.344717,0.064634,1.691268,0.010772,0.0
1,2,1,0.696582,0.099512,1.729016,0.012439,0.0
2,3,1,1.053564,0.210713,8.568987,0.070238,0.0
3,4,1,1.065417,0.266354,7.857453,0.066589,0.0
4,5,1,0.668528,0.07865,3.0477,0.019663,0.0


In [106]:
#Grouping by state and unisng mean method (It's not very accurate but easier to see correlation)
health_infra_df.groupby(['pc11_state_id']).mean() 


Unnamed: 0_level_0,primery_health_centers,community_health_centers,sub_centers,district_hospitals,sub_taluk_hospitals
pc11_state_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1,0.55873,0.075288,2.517695,0.023306,0.0
2,1.086273,0.197573,3.553399,0.05011,0.073942
3,0.186799,0.054559,0.004164,0.011972,0.017877
4,0.346446,0.017766,0.159898,0.035533,0.008883
5,0.221672,0.050771,0.007392,0.02311,0.015373
6,0.189849,0.048686,1.00963,0.011058,0.008259
7,0.464051,0.011952,0.080956,0.055422,0.013098
8,0.329906,0.081615,0.00277,0.005239,0.006241
9,0.153661,0.030154,0.002628,0.00852,9.6e-05
10,0.173246,0.005943,0.767142,0.00438,0.002422
