In [58]:
%matplotlib inline

import matplotlib.pyplot as plt
import geopandas as gpd
import palettable as pltt
import seaborn as sns
from seaborn import palplot
import pandas as pd

In [59]:
# Data file paths
hospitals_dta_path = 'data/Hospital/dlhs4_hospitals_dist_pc11.dta'

# Read dta file in
hospitals_df= pd.read_stata(hospitals_dta_path)
# Display top of table
hospitals_df.head()

Unnamed: 0,pc11_state_id,pc11_district_id,dlhs4_dh_beds,dlhs4_dh_count,dlhs4_dh_staff,dlhs4_dh_icu_beds,dlhs4_chc_beds,dlhs4_chc_count,dlhs4_chc_staff,dlhs4_chc_beds_ven,dlhs4_phc_beds,dlhs4_phc_count,dlhs4_phc_staff,dlhs4_phc_pop,dlhs4_phc_beds_oxy,pc11_pca_tot_p,dlhs4_phc_mult,dlhs4_total_beds,dlhs4_total_staff,dlhs4_total_facilities
0,2,23,200,1,42,0.0,165,8,86,0.0,68,16,41,157586.0,38.0,519080,3.293947,588.988403,263.051849,61.703159
1,2,24,300,1,53,0.0,262,11,183,30.0,71,19,100,867424.0,60.0,1510075,1.740873,685.60199,410.087311,45.076588
2,2,25,70,1,17,1.0,21,2,26,0.0,33,8,27,24337.0,13.0,31564,1.296955,133.79953,78.017792,13.375642
3,2,26,200,1,68,2.0,105,6,101,0.0,24,5,31,91380.0,12.0,437903,4.79211,420.010651,317.55542,30.96055
4,2,27,300,1,92,3.0,303,10,178,0.0,50,14,65,207184.0,30.0,999777,4.825551,844.277527,583.660828,78.557716


### Data Documentation

***No documentation***

|   Variable Name                   |             Definition                                      |
|-----------------------------------|-------------------------------------------------------------|
|pc11_district_id                   |Unique District ID Census 2011                               |
|pc11_state_id                      |State ID in census 2011                                      |
|dlhs4_dh_beds                      |District hospital beds                                       |
|dlhs4_dh_count                     |Number of district hospitals                                 |
|dlhs4_dh_staff                     |District hospitals staff                                     |
|dlhs4_dh_icu_beds                  |District hospitals Intensive care beds                       | 
|dlhs4_chc_beds                     |Community health centers beds                                |
|dlhs4_chc_count                    |Number of Community health centers                           |
|dlhs4_chc_staff                    |Community health centers staff                               |
|dlhs4_chc_beds_ven                 |Community health centers beds with ventilator                |
|dlhs4_phc_count                    |Number of Public health centers                              |
|dlhs4_phc_staff                    |Public health centers staff                                  |
|dlhs4_phc_pop                      |??????                               |
|dlhs4_phc_beds_oxy                 |Public health centers oxygen beds                            |
|pc11_pca_tot_p                     |Populations as of 2011                                       |
|dlhs4_phc_mult                     |??????                                      |
|dlhs4_total_beds                   |Total beds                                                   |
|dlhs4_total_staff                  |Total staff                                                  |
|dlhs4_total_facilities             |Totall facilities                                            |




In [60]:
# General info
hospitals_df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 552 entries, 0 to 551
Data columns (total 20 columns):
 #   Column                  Non-Null Count  Dtype  
---  ------                  --------------  -----  
 0   pc11_state_id           552 non-null    object 
 1   pc11_district_id        552 non-null    object 
 2   dlhs4_dh_beds           552 non-null    int16  
 3   dlhs4_dh_count          552 non-null    int8   
 4   dlhs4_dh_staff          552 non-null    int16  
 5   dlhs4_dh_icu_beds       541 non-null    float64
 6   dlhs4_chc_beds          552 non-null    int16  
 7   dlhs4_chc_count         552 non-null    int8   
 8   dlhs4_chc_staff         552 non-null    int16  
 9   dlhs4_chc_beds_ven      535 non-null    float64
 10  dlhs4_phc_beds          552 non-null    int16  
 11  dlhs4_phc_count         552 non-null    int8   
 12  dlhs4_phc_staff         552 non-null    int16  
 13  dlhs4_phc_pop           542 non-null    float64
 14  dlhs4_phc_beds_oxy      542 non-null    fl

In [61]:
hospitals_df=hospitals_df.astype({'dlhs4_dh_beds': 'float64',
                                 'dlhs4_dh_count': 'float64',
                                 'dlhs4_dh_staff': 'float64',
                                 'dlhs4_chc_beds':'float64',
                                 'dlhs4_chc_count':'float64',
                                 'dlhs4_chc_staff':'float64',
                                 'dlhs4_phc_beds':'float64',
                                 'dlhs4_phc_count':'float64',
                                 'dlhs4_phc_staff':'float64',
                                 'pc11_pca_tot_p':'float64',
                                 'dlhs4_phc_mult':'float64',
                                 'dlhs4_total_beds':'float64',
                                 'dlhs4_total_staff':'float64',
                                 'dlhs4_total_facilities':'float64'
                                 })

In [62]:
# General info
hospitals_df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 552 entries, 0 to 551
Data columns (total 20 columns):
 #   Column                  Non-Null Count  Dtype  
---  ------                  --------------  -----  
 0   pc11_state_id           552 non-null    object 
 1   pc11_district_id        552 non-null    object 
 2   dlhs4_dh_beds           552 non-null    float64
 3   dlhs4_dh_count          552 non-null    float64
 4   dlhs4_dh_staff          552 non-null    float64
 5   dlhs4_dh_icu_beds       541 non-null    float64
 6   dlhs4_chc_beds          552 non-null    float64
 7   dlhs4_chc_count         552 non-null    float64
 8   dlhs4_chc_staff         552 non-null    float64
 9   dlhs4_chc_beds_ven      535 non-null    float64
 10  dlhs4_phc_beds          552 non-null    float64
 11  dlhs4_phc_count         552 non-null    float64
 12  dlhs4_phc_staff         552 non-null    float64
 13  dlhs4_phc_pop           542 non-null    float64
 14  dlhs4_phc_beds_oxy      542 non-null    fl

In [63]:
# number of NaN values per column
hospitals_df.isnull().sum()

pc11_state_id              0
pc11_district_id           0
dlhs4_dh_beds              0
dlhs4_dh_count             0
dlhs4_dh_staff             0
dlhs4_dh_icu_beds         11
dlhs4_chc_beds             0
dlhs4_chc_count            0
dlhs4_chc_staff            0
dlhs4_chc_beds_ven        17
dlhs4_phc_beds             0
dlhs4_phc_count            0
dlhs4_phc_staff            0
dlhs4_phc_pop             10
dlhs4_phc_beds_oxy        10
pc11_pca_tot_p             0
dlhs4_phc_mult             0
dlhs4_total_beds           0
dlhs4_total_staff          0
dlhs4_total_facilities     0
dtype: int64

In [64]:
hospitals_df.describe()

Unnamed: 0,dlhs4_dh_beds,dlhs4_dh_count,dlhs4_dh_staff,dlhs4_dh_icu_beds,dlhs4_chc_beds,dlhs4_chc_count,dlhs4_chc_staff,dlhs4_chc_beds_ven,dlhs4_phc_beds,dlhs4_phc_count,dlhs4_phc_staff,dlhs4_phc_pop,dlhs4_phc_beds_oxy,pc11_pca_tot_p,dlhs4_phc_mult,dlhs4_total_beds,dlhs4_total_staff,dlhs4_total_facilities
count,552.0,552.0,552.0,541.0,552.0,552.0,552.0,535.0,552.0,552.0,552.0,542.0,542.0,552.0,552.0,552.0,552.0,552.0
mean,399.173913,2.789855,343.54529,26.042514,211.085145,8.713768,221.128623,6.203738,85.536232,15.21558,146.496377,640105.1,64.175277,1950634.0,5.007963,927.654522,1115.031263,68.050684
std,568.074487,2.396164,1333.831039,93.496762,181.208571,6.210578,308.662573,25.357474,63.131834,8.542978,191.17535,661390.0,58.47641,1528864.0,9.590714,827.93385,2409.172681,48.042209
min,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2810.0,0.0,8004.0,0.0,46.80735,18.0,1.0
25%,120.0,1.0,61.75,0.0,84.75,4.0,77.0,0.0,39.75,9.0,55.0,263278.0,17.0,955886.0,2.118562,395.741592,365.764168,32.555521
50%,259.0,2.0,119.0,4.0,179.0,8.0,151.0,0.0,69.0,14.5,97.5,478384.5,49.0,1641280.0,3.072459,753.407745,615.950684,59.293962
75%,521.75,4.0,255.5,16.0,272.25,12.0,237.25,0.0,122.0,20.0,173.25,776422.2,95.0,2642438.0,5.038369,1224.57016,1073.755646,90.378647
max,10522.0,16.0,17642.0,911.0,1315.0,35.0,2787.0,292.0,408.0,47.0,2138.0,4614076.0,396.0,11060150.0,175.017868,11334.803711,28652.914062,366.035736


In [65]:
# Dropping columns with total variables, because there no connection to the other variables 
hospitals_df=hospitals_df.drop(['dlhs4_total_beds', 'dlhs4_total_staff', 'dlhs4_total_facilities'], axis=1
                    )
# Display top of table
hospitals_df.head()

Unnamed: 0,pc11_state_id,pc11_district_id,dlhs4_dh_beds,dlhs4_dh_count,dlhs4_dh_staff,dlhs4_dh_icu_beds,dlhs4_chc_beds,dlhs4_chc_count,dlhs4_chc_staff,dlhs4_chc_beds_ven,dlhs4_phc_beds,dlhs4_phc_count,dlhs4_phc_staff,dlhs4_phc_pop,dlhs4_phc_beds_oxy,pc11_pca_tot_p,dlhs4_phc_mult
0,2,23,200.0,1.0,42.0,0.0,165.0,8.0,86.0,0.0,68.0,16.0,41.0,157586.0,38.0,519080.0,3.293947
1,2,24,300.0,1.0,53.0,0.0,262.0,11.0,183.0,30.0,71.0,19.0,100.0,867424.0,60.0,1510075.0,1.740873
2,2,25,70.0,1.0,17.0,1.0,21.0,2.0,26.0,0.0,33.0,8.0,27.0,24337.0,13.0,31564.0,1.296955
3,2,26,200.0,1.0,68.0,2.0,105.0,6.0,101.0,0.0,24.0,5.0,31.0,91380.0,12.0,437903.0,4.79211
4,2,27,300.0,1.0,92.0,3.0,303.0,10.0,178.0,0.0,50.0,14.0,65.0,207184.0,30.0,999777.0,4.825551


In [66]:
#Creating list with indicators 
indicators=['dlhs4_dh_beds', 'dlhs4_dh_count', 'dlhs4_dh_staff', 'dlhs4_dh_icu_beds', 'dlhs4_dh_icu_beds',
           'dlhs4_chc_beds', 'dlhs4_chc_count', 'dlhs4_chc_staff', 'dlhs4_chc_beds_ven', 'dlhs4_phc_count',
           'dlhs4_phc_staff', 'dlhs4_phc_pop', 'dlhs4_phc_beds_oxy']

### Data by state

In [67]:
#Creating state based dataset 
hospitals_st=hospitals_df
# Display top of table
hospitals_st.head()

Unnamed: 0,pc11_state_id,pc11_district_id,dlhs4_dh_beds,dlhs4_dh_count,dlhs4_dh_staff,dlhs4_dh_icu_beds,dlhs4_chc_beds,dlhs4_chc_count,dlhs4_chc_staff,dlhs4_chc_beds_ven,dlhs4_phc_beds,dlhs4_phc_count,dlhs4_phc_staff,dlhs4_phc_pop,dlhs4_phc_beds_oxy,pc11_pca_tot_p,dlhs4_phc_mult
0,2,23,200.0,1.0,42.0,0.0,165.0,8.0,86.0,0.0,68.0,16.0,41.0,157586.0,38.0,519080.0,3.293947
1,2,24,300.0,1.0,53.0,0.0,262.0,11.0,183.0,30.0,71.0,19.0,100.0,867424.0,60.0,1510075.0,1.740873
2,2,25,70.0,1.0,17.0,1.0,21.0,2.0,26.0,0.0,33.0,8.0,27.0,24337.0,13.0,31564.0,1.296955
3,2,26,200.0,1.0,68.0,2.0,105.0,6.0,101.0,0.0,24.0,5.0,31.0,91380.0,12.0,437903.0,4.79211
4,2,27,300.0,1.0,92.0,3.0,303.0,10.0,178.0,0.0,50.0,14.0,65.0,207184.0,30.0,999777.0,4.825551


In [68]:
#Grouping by state 
hospitals_st=hospitals_st.groupby(['pc11_state_id']).apply(sum)

# Display top of table
hospitals_st.head()

Unnamed: 0_level_0,pc11_state_id,pc11_district_id,dlhs4_dh_beds,dlhs4_dh_count,dlhs4_dh_staff,dlhs4_dh_icu_beds,dlhs4_chc_beds,dlhs4_chc_count,dlhs4_chc_staff,dlhs4_chc_beds_ven,dlhs4_phc_beds,dlhs4_phc_count,dlhs4_phc_staff,dlhs4_phc_pop,dlhs4_phc_beds_oxy,pc11_pca_tot_p,dlhs4_phc_mult
pc11_state_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
2,020202020202020202020202,023024025026027028029030031032033034,2330.0,12.0,738.0,18.0,1905.0,77.0,1212.0,30.0,686.0,149.0,589.0,2519734.0,351.0,6864602.0,37.691905
3,0303030303030303030303030303030303030303,0350360370380390400410420430440450460470480490...,5119.0,58.0,2977.0,250.0,2850.0,120.0,3351.0,252.0,1077.0,162.0,1573.0,6642305.0,895.0,27743338.0,92.852465
4,04,055,550.0,1.0,492.0,20.0,75.0,2.0,65.0,25.0,0.0,0.0,0.0,0.0,0.0,1055450.0,0.0
5,05050505050505050505050505,056057058059060061062063064065066067068,4360.0,39.0,1887.0,89.0,1391.0,60.0,1323.0,0.0,365.0,84.0,704.0,3739781.0,307.0,10086292.0,62.901037
6,060606060606060606060606060606060606060606,0690700710720730740750760770780790800810820830...,3965.0,42.0,2330.0,83.0,1871.0,106.0,2365.0,0.0,1258.0,244.0,1853.0,11292847.0,1207.0,25351462.0,50.508115


In [69]:
# Dropping columns 
hospitals_st=hospitals_st.drop(['pc11_state_id','pc11_district_id', 'dlhs4_phc_mult'], axis=1)

# Display top of table
hospitals_st.head()

Unnamed: 0_level_0,dlhs4_dh_beds,dlhs4_dh_count,dlhs4_dh_staff,dlhs4_dh_icu_beds,dlhs4_chc_beds,dlhs4_chc_count,dlhs4_chc_staff,dlhs4_chc_beds_ven,dlhs4_phc_beds,dlhs4_phc_count,dlhs4_phc_staff,dlhs4_phc_pop,dlhs4_phc_beds_oxy,pc11_pca_tot_p
pc11_state_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
2,2330.0,12.0,738.0,18.0,1905.0,77.0,1212.0,30.0,686.0,149.0,589.0,2519734.0,351.0,6864602.0
3,5119.0,58.0,2977.0,250.0,2850.0,120.0,3351.0,252.0,1077.0,162.0,1573.0,6642305.0,895.0,27743338.0
4,550.0,1.0,492.0,20.0,75.0,2.0,65.0,25.0,0.0,0.0,0.0,0.0,0.0,1055450.0
5,4360.0,39.0,1887.0,89.0,1391.0,60.0,1323.0,0.0,365.0,84.0,704.0,3739781.0,307.0,10086292.0
6,3965.0,42.0,2330.0,83.0,1871.0,106.0,2365.0,0.0,1258.0,244.0,1853.0,11292847.0,1207.0,25351462.0
