# Kaduna infrastructure Analysis

### **Goal**

#### To report the infrastructure needs of the state if any

In [124]:
import numpy as np
import statistics
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [125]:
data = pd.read_excel("data (3).xlsx")

In [126]:
"""
Cell generated by Data Wrangler.
"""
def clean_data(data):
    # Drop duplicate rows across all columns
    data = data.drop_duplicates()
    # Replace missing values with "" in column: 'registration_status'
    data = data.fillna({'registration_status': "Unknown"})
    # Replace missing values with "Unknown" in column: 'license_status'
    data = data.fillna({'license_status': "Unknown"})
    # Drop reg_number column
    data = data.drop(columns=["reg_number"])
    return data

data = clean_data(data.copy())
data.head()

Unnamed: 0,state,lga,ward,uid,facility_code,facility_name,start_date,ownership,facility_level,longitude,latitude,operation_status,registration_status,license_status,created,last_updated
0,Kaduna,Birnin Gwari,Magajin Gari 2,84313191,18/01/1/1/2/0001,Alumma Nursing And Maternity Home,2000-01-01 00:00:00,Private,Primary,6.64095,10.66508,Operational,Registered,Licensed,2018-01-01 01:01:01,2019-10-18 08:53:26
1,Kaduna,Birnin Gwari,Magajin Gari 2,45229535,18/01/1/1/1/0052,Anguwan Haladu Priamry Health Centre,1972-01-01 00:00:00,Public,Primary,6.53769,10.66777,Operational,Registered,Licensed,2018-01-02 01:01:01,2019-10-18 08:54:15
2,Kaduna,Birnin Gwari,Magajin Gari 1,87588064,18/01/1/1/1/0002,Anguwan Shitu Health Clinic,2000-01-01 00:00:00,Public,Primary,6.54543,10.66193,Operational,Registered,Licensed,2018-01-03 01:01:01,2019-10-18 08:55:01
3,Kaduna,Birnin Gwari,Randagi,54491097,18/01/1/1/1/0045,Anguwar Baduku Health Clinic,2000-01-01 00:00:00,Public,Primary,,,Closed (Temporary),Unknown,Unknown,2018-01-04 01:01:01,2019-10-18 08:51:16
4,Kaduna,Birnin Gwari,Magajin Gari 2,84563688,18/01/1/1/1/0003,Awaro Health Clinic,2000-01-01 00:00:00,Public,Primary,6.60406,10.70954,Operational,Registered,Licensed,2018-01-05 01:01:01,2019-10-28 12:07:15


In [127]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1567 entries, 0 to 1566
Data columns (total 16 columns):
 #   Column               Non-Null Count  Dtype         
---  ------               --------------  -----         
 0   state                1567 non-null   object        
 1   lga                  1567 non-null   object        
 2   ward                 1567 non-null   object        
 3   uid                  1567 non-null   int64         
 4   facility_code        1567 non-null   object        
 5   facility_name        1567 non-null   object        
 6   start_date           1403 non-null   object        
 7   ownership            1567 non-null   object        
 8   facility_level       1567 non-null   object        
 9   longitude            1441 non-null   float64       
 10  latitude             1443 non-null   float64       
 11  operation_status     1567 non-null   object        
 12  registration_status  1567 non-null   object        
 13  license_status       1567 non-nul

In [128]:
data.describe()

Unnamed: 0,uid,longitude,latitude,created,last_updated
count,1567.0,1441.0,1443.0,1567,1567
mean,49929630.0,7.837609,10.353312,2020-02-23 01:01:00.999999744,2019-08-24 01:39:41.999999744
min,12440600.0,2.483,4.00452,2018-01-01 01:01:01,2018-01-01 01:01:01
25%,31905830.0,7.45788,9.702,2019-01-27 13:01:01,2019-10-29 10:53:28.500000
50%,50038410.0,7.86443,10.50375,2020-02-23 01:01:01,2019-10-29 20:49:58
75%,67691340.0,8.27344,10.98715,2021-03-20 13:01:01,2019-10-30 11:06:32.500000
max,87641260.0,9.87462,13.867,2022-04-16 01:01:01,2023-01-26 16:59:04
std,21359680.0,0.614498,0.782395,,


In [129]:
data["facility_name"].value_counts()

facility_name
Alheri Health Clinic                                       2
Barwa Health Clinic                                        2
Alheri Clinic                                              2
Gora Health Clinic                                         2
Kunkumi Health Clinic                                      2
                                                          ..
Tanda Health Clinic                                        1
Takau Primary Health Centre                                1
Tajak Health Clinic                                        1
Sir Partrict Ibrahim Yakowa Memorial Hospital Kafanchan    1
Zaria Clinic and medical centre Tudun Wada                 1
Name: count, Length: 1546, dtype: int64

In [130]:
data["lga"].unique()

array(['Birnin Gwari', 'Chikun', 'Giwa', 'Igabi', 'Ikara', 'jaba',
       "Jema'a", 'Kachia', 'Kaduna North', 'Kaduna South', 'Kagarko',
       'Kajuru', 'Kaura', 'Kauru', 'Kubau', 'Kudan', 'Lere', 'Makarfi',
       'Sabon Gari', 'Sanga', 'Soba', 'Zangon Kataf', 'Zaria'],
      dtype=object)

In [131]:
mixed_up_start_dates = data["start_date"][data["start_date"].astype(str).str.contains("/")].to_list()

In [132]:
correct_dates = ["1/1/2000", "1/6/2013", "10/12/2003", "2/22/1990", "12/6/2008", "9/2/2000", "1/1/2019"]

for i in range(len(mixed_up_start_dates)):
    data.loc[data["start_date"].isin([mixed_up_start_dates[i]]), "start_date"] = correct_dates[i]

data["start_date"] = pd.to_datetime(data["start_date"])

In [133]:
import random

def generate_random_dates():
    month = random.randint(1, 12)
    day = random.randint(1, 28)  # Adjusted for leap years
    return pd.to_datetime(f"2001-{month}-{day}")

missing_dates_indices = data['start_date'].isnull()
random_dates = [generate_random_dates() for _ in range(data['start_date'].isnull().sum())]
data.loc[missing_dates_indices, 'start_date'] = random_dates


In [134]:
data.dropna(subset=['longitude', 'latitude'], inplace=True)