# Exploring health facility data

In [1]:
import pandas as pd
import numpy as np
import re


In [2]:
pd.set_option("display.max_columns", None)
pd.set_option("display.max_rows", 200)

In [3]:
df = pd.read_csv('health_facility_assessment_cleaned.csv',index_col=0)
df.dtypes

facility_name                                                                 object
facility_gps_lat                                                             float64
facility_gps_lon                                                             float64
region_province                                                               object
district                                                                      object
chiefdom                                                                      object
facility_location                                                             object
facility_type_other                                                          float64
managing_authority_other                                                      object
outpatient_only                                                               object
capacity.number_consultation_rooms                                           float64
capacity.number_inpatient_beds                                   

In [9]:
num_cols = []
for col in df.columns:
    name_parts = col.split('.')
    if len(re.findall('number_', name_parts[-1]))!=0 or name_parts[-1][:6]=='month_' or name_parts[-1][:15]=='monthly_average' or name_parts[-1][:6]=='score_':
        num_cols.append(col)
num_cols

['capacity.number_consultation_rooms',
 'capacity.number_inpatient_beds',
 'capacity.number_maternity_beds',
 'cho_number_in_post',
 'cho_number_present_at_visit',
 'cht_number_in_post',
 'cht_number_present_at_visit',
 'cha_number_in_post',
 'cha_number_present_at_visit',
 'cm_number_in_post',
 'cm_number_present_at_visit',
 'sechn_number_in_post',
 'sechn_number_present_at_visit',
 'lab_tech_number_in_post',
 'lab_tech_number_present_at_visit',
 'mch_aides_number_in_post',
 'mch_aides_number_present_at_visit',
 'cleaner_porter_number_in_post',
 'cleaner_porter_number_present_at_visit',
 'other_number_in_post',
 'other_number_present_at_visit',
 'consultations.head_count.month_1',
 'consultations.head_count.month_2',
 'consultations.head_count.month_3',
 'consultations.head_count.month_4',
 'consultations.general_outpatient.month_1',
 'consultations.general_outpatient.month_2',
 'consultations.general_outpatient.month_3',
 'consultations.general_outpatient.month_4',
 'consultations.de

In [5]:
df.mean(numeric_only=True)

facility_gps_lat                                                               9.731000
facility_gps_lon                                                              13.066807
facility_type_other                                                                 NaN
capacity.number_consultation_rooms                                             1.769231
capacity.number_inpatient_beds                                                 2.269231
capacity.number_maternity_beds                                                 2.469231
cho_number_in_post                                                             0.723077
cho_number_present_at_visit                                                    0.407692
cht_number_in_post                                                             0.200000
cht_number_present_at_visit                                                    0.169231
cha_number_in_post                                                             0.415385
cha_number_present_at_visit     

Let's check for any super weird outliers.

In [6]:
is_outlier = (df[num_cols] - df[num_cols].mean()).abs() > 3*df[num_cols].std()
for col in num_cols:
    if is_outlier[col].any()==True:
        print('Col',col)
        print(df[is_outlier[col]==True][col])

Col capacity.number_consultation_rooms
103    12.0
Name: capacity.number_consultation_rooms, dtype: float64
Col capacity.number_inpatient_beds
70     12.0
124    11.0
Name: capacity.number_inpatient_beds, dtype: float64
Col capacity.number_maternity_beds
103    100.0
Name: capacity.number_maternity_beds, dtype: float64
Col cho_number_in_post
28      7.0
66      8.0
84     15.0
103    12.0
Name: cho_number_in_post, dtype: float64
Col cho_number_present_at_visit
84     12.0
103    12.0
Name: cho_number_present_at_visit, dtype: float64
Col cht_number_in_post
67      4.0
84      4.0
103    12.0
Name: cht_number_in_post, dtype: float64
Col cht_number_present_at_visit
103    12.0
Name: cht_number_present_at_visit, dtype: float64
Col cha_number_in_post
103    12.0
120     5.0
Name: cha_number_in_post, dtype: float64
Col cha_number_present_at_visit
103    12.0
Name: cha_number_present_at_visit, dtype: float64
Col cm_number_in_post
103    12.0
Name: cm_number_in_post, dtype: float64
Col cm_numb

None of the outliers look too wild. 

In [10]:
num_cols_no_score = []
for col in num_cols:
    if len(re.findall('score',col))!=0:
        pass
    else:
        num_cols_no_score.append(col)
corr = df[num_cols_no_score].corr()
corr.style.background_gradient(cmap='coolwarm')

Unnamed: 0,capacity.number_consultation_rooms,capacity.number_inpatient_beds,capacity.number_maternity_beds,cho_number_in_post,cho_number_present_at_visit,cht_number_in_post,cht_number_present_at_visit,cha_number_in_post,cha_number_present_at_visit,cm_number_in_post,cm_number_present_at_visit,sechn_number_in_post,sechn_number_present_at_visit,lab_tech_number_in_post,lab_tech_number_present_at_visit,mch_aides_number_in_post,mch_aides_number_present_at_visit,cleaner_porter_number_in_post,cleaner_porter_number_present_at_visit,other_number_in_post,other_number_present_at_visit,consultations.head_count.month_1,consultations.head_count.month_2,consultations.head_count.month_3,consultations.head_count.month_4,consultations.general_outpatient.month_1,consultations.general_outpatient.month_2,consultations.general_outpatient.month_3,consultations.general_outpatient.month_4,consultations.deliveries.month_1,consultations.deliveries.month_2,consultations.deliveries.month_3,consultations.deliveries.month_4,consultations.pent_vaccines.month_1,consultations.pent_vaccines.month_2,consultations.pent_vaccines.month_3,consultations.pent_vaccines.month_4,consultations.head_count.monthly_average_head_counts,consultations.general_outpatient.monthly_average_general_outpatient,consultations.deliveries.monthly_average_deliveries,consultations.pent_vaccines.monthly_average_pent_vaccines
capacity.number_consultation_rooms,1.0,0.522649,0.724965,0.37888,0.487655,0.608046,0.667492,0.632321,0.700466,0.747703,0.731407,0.617135,0.666623,0.71222,0.710419,0.445757,0.42129,0.605762,0.558479,0.196924,0.344839,0.061264,0.092066,0.097029,0.034331,0.134419,0.131563,0.13301,0.063005,0.111617,0.103734,0.140786,0.045234,0.103843,0.158077,0.108247,0.025977,0.075166,0.124291,0.113623,0.114026
capacity.number_inpatient_beds,0.522649,1.0,0.275859,0.223971,0.240981,0.210542,0.238654,0.329095,0.336387,0.446899,0.383224,0.388959,0.338604,0.350525,0.339908,0.180795,0.097555,0.320276,0.273041,0.031439,0.168578,0.266315,0.348257,0.320669,0.265728,0.349639,0.322235,0.286802,0.199008,0.186646,0.139469,0.213596,0.129506,0.380559,0.378666,0.300561,0.280685,0.314237,0.308959,0.178932,0.381178
capacity.number_maternity_beds,0.724965,0.275859,1.0,0.511029,0.666481,0.882327,0.946914,0.805809,0.896593,0.853688,0.912811,0.499277,0.689431,0.876901,0.936,0.55731,0.687655,0.754745,0.791435,0.247202,0.570538,-0.104334,-0.102391,-0.099758,-0.102067,-0.103086,-0.095417,-0.09457,-0.105129,-0.108136,-0.118128,-0.079317,-0.088917,-0.090736,-0.081293,-0.0677,-0.096553,-0.106519,-0.104137,-0.107949,-0.096104
cho_number_in_post,0.37888,0.223971,0.511029,1.0,0.847236,0.649319,0.596579,0.4189,0.464862,0.649776,0.615326,0.452537,0.478787,0.515995,0.520567,0.416814,0.473299,0.503899,0.484316,0.067783,0.267156,-0.152474,-0.118745,-0.126298,-0.112549,-0.099651,-0.080717,-0.076608,-0.106152,-0.105387,-0.068822,-0.016926,-0.067645,0.016502,0.043686,-0.000567,-0.041898,-0.133276,-0.094299,-0.069137,0.006433
cho_number_present_at_visit,0.487655,0.240981,0.666481,0.847236,1.0,0.8089,0.762498,0.540509,0.590723,0.748831,0.740055,0.468197,0.586029,0.601625,0.651484,0.468192,0.569085,0.615936,0.633324,0.124064,0.388871,-0.200732,-0.173374,-0.18941,-0.185706,-0.15333,-0.131422,-0.140628,-0.173098,-0.128955,-0.140382,-0.092272,-0.140319,-0.027538,-0.020514,-0.073784,-0.08145,-0.195202,-0.155569,-0.135194,-0.056417
cht_number_in_post,0.608046,0.210542,0.882327,0.649319,0.8089,1.0,0.978706,0.715635,0.791755,0.813296,0.859374,0.454764,0.625939,0.762029,0.821635,0.544989,0.676259,0.716664,0.734332,0.190746,0.479768,-0.147059,-0.123893,-0.136639,-0.128333,-0.159656,-0.148857,-0.156157,-0.167849,-0.178696,-0.164668,-0.126236,-0.150332,-0.085197,-0.098213,-0.102681,-0.095993,-0.139737,-0.165369,-0.169965,-0.109176
cht_number_present_at_visit,0.667492,0.238654,0.946914,0.596579,0.762498,0.978706,1.0,0.774739,0.854045,0.845532,0.902637,0.468952,0.655931,0.822223,0.88489,0.550001,0.682326,0.744123,0.766603,0.21966,0.525855,-0.143475,-0.12275,-0.135599,-0.127432,-0.146053,-0.134035,-0.142725,-0.152431,-0.163528,-0.156,-0.119003,-0.134628,-0.088914,-0.103362,-0.092921,-0.097687,-0.137979,-0.150394,-0.157479,-0.109451
cha_number_in_post,0.632321,0.329095,0.805809,0.4189,0.540509,0.715635,0.774739,1.0,0.897481,0.77015,0.773053,0.502945,0.599741,0.744871,0.779098,0.513215,0.565278,0.686706,0.689225,0.23734,0.441203,-0.04908,-0.008106,0.003515,-0.006538,-0.014436,0.017499,0.04569,0.035308,-0.063114,-0.054389,0.010427,-0.02341,-0.058759,-0.014426,0.000535,-0.051513,-0.015982,0.021396,-0.032163,-0.035138
cha_number_present_at_visit,0.700466,0.336387,0.896593,0.464862,0.590723,0.791755,0.854045,0.897481,1.0,0.807109,0.831827,0.491546,0.66089,0.804457,0.847323,0.479899,0.588552,0.750599,0.773559,0.256906,0.505086,-0.054468,-0.042014,-0.031318,-0.021427,-0.056661,-0.033217,-0.005202,-0.025004,-0.10893,-0.115806,-0.087633,-0.083867,-0.064034,-0.065163,-0.054313,-0.050266,-0.039393,-0.031688,-0.108966,-0.067036
cm_number_in_post,0.747703,0.446899,0.853688,0.649776,0.748831,0.813296,0.845532,0.77015,0.807109,1.0,0.950946,0.712136,0.723216,0.88941,0.882331,0.618943,0.572907,0.728336,0.709056,0.211414,0.491251,0.011074,0.030375,0.012038,-0.019059,0.009727,0.028941,0.025205,-0.044505,0.042201,0.027926,0.132535,0.032011,0.09066,0.147895,0.12045,-0.017274,0.009859,0.00807,0.070598,0.100486


There are some obvious correlations above, e.g. numbers of consultations are correlated with one another. 