# Accessible Healthcare Facilities in Nairobi County

In [8]:
# This is an exploitory Data Analysis project on the Healthcare Facilities accessible in Nairobi County, Kenya.

**Importing packages**

In [1]:
# I will begin by importing python packages that I will need for this analysis

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt



**Importing Data**

In [2]:
#Loading data from data source(MSexcel)
#I will load my CSV file using pandas

file_name = "Healthcare Facilities in Nairobi"
df = pd.read_csv(r"C:/Users/akeyo/OneDrive/Documents/DA Projects/Healthcare Facilities in Nairobi.csv")


In [3]:
# Displaying the loaded data
print(df)

     Facility Number                   Facility Name Sub-County  \
0                  1       MAMA LUCY KIBAKI HOSPITAL      NJIRU   
1                  3    APTC  EMBAKASI HEALTH CENTRE   EMBAKASI   
2                  4                 AYANY ESTATE HC      KIBRA   
3                  5                    BABA DOGO HC   KASARANI   
4                  6                     BAHATI DISP   MAKADARA   
..               ...                             ...        ...   
194              863  VICTORY CENTRAL MEDICAL CLINIC   EMBAKASI   
195              872      WESTLANDS COTTAGE HOSPITAL  WESTLANDS   
196              176                MEDANTA AFRICARE  WESTLANDS   
197               18  NYINA WA MUMBI HEALTH SERVICES  DAGORETTI   
198              418              MEDIHEAL HOSPITALS  WESTLANDS   

                                        Facility Type   
0    Hospital Moh and Mission Districts, sub-districts  
1                                        Health Centre  
2                       

**Data Exploration**

In [4]:
# Exploring the first 5 lines of my dataframe

df.head()

Unnamed: 0,Facility Number,Facility Name,Sub-County,Facility Type
0,1,MAMA LUCY KIBAKI HOSPITAL,NJIRU,"Hospital Moh and Mission Districts, sub-districts"
1,3,APTC EMBAKASI HEALTH CENTRE,EMBAKASI,Health Centre
2,4,AYANY ESTATE HC,KIBRA,Health Centre
3,5,BABA DOGO HC,KASARANI,Health Centre
4,6,BAHATI DISP,MAKADARA,Dispensary


In [5]:
# Viewing the last 10 lines

df.tail(10)

Unnamed: 0,Facility Number,Facility Name,Sub-County,Facility Type
189,781,PANGANI CRESCENT MEDICAL AID CLINIC,KAMUKUNJI,Private Clinics and Medical centres
190,825,ST. JAMES HOSPITAL,EMBAKASI,Private Hospital
191,828,ST. MARY'S MEDICAL DISP,EMBAKASI,Dispensary
192,833,ST. THOMAS MEDICAL HEALTH SERVICES,EMBAKASI,Private Clinics and Medical centres
193,834,ST. VERONICA HEALTH SERVICES & LABORATORY,KAMUKUNJI,Private Clinics and Medical centres
194,863,VICTORY CENTRAL MEDICAL CLINIC,EMBAKASI,Private Clinics and Medical centres
195,872,WESTLANDS COTTAGE HOSPITAL,WESTLANDS,Private Hospital
196,176,MEDANTA AFRICARE,WESTLANDS,Private Hospital
197,18,NYINA WA MUMBI HEALTH SERVICES,DAGORETTI,Health Centre
198,418,MEDIHEAL HOSPITALS,WESTLANDS,Private Hospital


In [6]:
# Exploring the data using summary statistics

df.describe()

Unnamed: 0,Facility Number
count,199.0
mean,200.798995
std,209.176137
min,1.0
25%,57.5
50%,133.0
75%,243.0
max,872.0


In [7]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 199 entries, 0 to 198
Data columns (total 4 columns):
 #   Column           Non-Null Count  Dtype 
---  ------           --------------  ----- 
 0   Facility Number  199 non-null    int64 
 1   Facility Name    199 non-null    object
 2   Sub-County       199 non-null    object
 3   Facility Type    199 non-null    object
dtypes: int64(1), object(3)
memory usage: 6.3+ KB


In [8]:
df.dtypes

Facility Number     int64
Facility Name      object
Sub-County         object
Facility Type      object
dtype: object

In [9]:
df.columns

Index(['Facility Number', 'Facility Name', 'Sub-County', 'Facility Type '], dtype='object')

**Data Cleaning**

In [10]:
# Which are the Sub-counties tallied in the dataset?

df['Sub-County'].unique()

array(['NJIRU', 'EMBAKASI', 'KIBRA', 'KASARANI', 'MAKADARA', 'KAMUKUNJI',
       'WESTLANDS', 'STAREHE', "LANG'ATA", 'MATHARE', 'DAGORETTI',
       'PUMWANI'], dtype=object)

In [11]:
# Renaming Columns

new_columns = {'Facility Number' : 'facility_No',
               'Facility Name': 'Name',
               'Sub-County': 'Area',
               'Facility Type': 'Type'}

df.rename(columns = new_columns, inplace=True)

In [12]:
df.head()

Unnamed: 0,facility_No,Name,Area,Facility Type
0,1,MAMA LUCY KIBAKI HOSPITAL,NJIRU,"Hospital Moh and Mission Districts, sub-districts"
1,3,APTC EMBAKASI HEALTH CENTRE,EMBAKASI,Health Centre
2,4,AYANY ESTATE HC,KIBRA,Health Centre
3,5,BABA DOGO HC,KASARANI,Health Centre
4,6,BAHATI DISP,MAKADARA,Dispensary


In [13]:
# Changing data types

df['Name'].astype(str).str.isdigit()
df['Area'].astype(str).str.isdigit()


0      False
1      False
2      False
3      False
4      False
       ...  
194    False
195    False
196    False
197    False
198    False
Name: Area, Length: 199, dtype: bool

In [14]:
# Viewing the sum of missing values in each column

df.isnull().sum()

facility_No       0
Name              0
Area              0
Facility Type     0
dtype: int64

NB: I cleaned most of the data on MSexcel while analyzing it. This is why there are no missing data or data that needs to be dropped.
This included dropping missing data and locations.


**Data Aggregation**

Grouping the Data

In [15]:
# First I'll aggregate facilities based on each Sub-County
# I'll create my groupby object

df.groupby('Area')

<pandas.core.groupby.generic.DataFrameGroupBy object at 0x00000256653FEE30>

In [16]:
# Using my object

Area_groups = df.groupby('Area')
Area_groups.get_group('WESTLANDS')

Unnamed: 0,facility_No,Name,Area,Facility Type
6,8,BETTER LIVING CENTRE SDA DISP,WESTLANDS,Dispensary
26,33,GETRUDES GARDENS CHILDREN'S HOSPITAL,WESTLANDS,"Hospital Moh and Mission Districts, sub-districts"
27,34,HIGHRIDGE HC,WESTLANDS,Health Centre
37,46,KABETE TECHNICAL AND TRADE DISP,WESTLANDS,Dispensary
42,51,KANGEMI HEALTH CENTRE,WESTLANDS,Health Centre
55,68,LADY NORTH HEALTH CENTRE,WESTLANDS,Health Centre
60,75,LOWER KABETE SHC,WESTLANDS,Health Centre
91,124,STATE HOUSE DISP,WESTLANDS,Dispensary
92,125,STATE HOUSE ROAD DISP,WESTLANDS,Dispensary
94,127,ULINZI HOUSE MIR DISP,WESTLANDS,Dispensary


In [17]:
# groups made

# It is a dictionary where each key corresponds to a Sub-County name
# The values are the index ranges for each group

Area_groups.groups

{'DAGORETTI': [38, 50, 52, 65, 69, 75, 85, 86, 93, 97, 101, 105, 106, 108, 111, 138, 158, 165, 173, 175, 197], 'EMBAKASI': [1, 15, 23, 44, 95, 107, 112, 113, 120, 129, 147, 153, 157, 159, 178, 190, 191, 192, 194], 'KAMUKUNJI': [5, 9, 10, 14, 16, 19, 20, 21, 28, 30, 31, 32, 33, 34, 35, 40, 48, 54, 56, 70, 73, 74, 81, 83, 84, 87, 88, 90, 114, 115, 119, 131, 136, 137, 139, 142, 144, 149, 152, 154, 155, 156, 189, 193], 'KASARANI': [3, 12, 39, 41, 45, 46, 47, 49, 58, 76, 77, 80, 109, 110, 118, 121, 145, 172], 'KIBRA': [2, 53, 57, 100, 116, 122, 125, 126, 127, 133, 140, 146], 'LANG'ATA': [13, 24, 25, 36, 43, 72, 128, 150, 166, 167, 168, 169, 174, 188], 'MAKADARA': [4, 11, 22, 59, 61, 62, 63, 64, 66, 71, 78, 79, 102, 103, 104, 124, 130, 132, 161, 186], 'MATHARE': [29, 67, 68, 82, 96, 123, 143], 'NJIRU': [0, 8, 17, 18, 51, 89, 177, 183], 'PUMWANI': [141], 'STAREHE': [7, 148, 179], 'WESTLANDS': [6, 26, 27, 37, 42, 55, 60, 91, 92, 94, 98, 99, 117, 134, 135, 151, 160, 162, 163, 164, 170, 171, 176

In [18]:
# I can use Size to get a count of the number of items in each group

Area_groups.size()

Area
DAGORETTI    21
EMBAKASI     19
KAMUKUNJI    44
KASARANI     18
KIBRA        12
LANG'ATA     14
MAKADARA     20
MATHARE       7
NJIRU         8
PUMWANI       1
STAREHE       3
WESTLANDS    32
dtype: int64

**Pivot Tables**

In [None]:
from matplotlib import pyplot as plt

# Enlarge the plot]