In [1]:
import pandas as pd
import os

In [2]:
# Load the data
Hdf = pd.read_csv('Resources/isabel.csv')

# Display the first few rows of the DataFrame to verify
Hdf.head()

Unnamed: 0,IndicatorCode,Indicator,ValueType,ParentLocationCode,ParentLocation,Location type,SpatialDimValueCode,Location,Period type,Period,...,FactValueUoM,FactValueNumericLowPrefix,FactValueNumericLow,FactValueNumericHighPrefix,FactValueNumericHigh,Value,FactValueTranslationID,FactComments,Language,DateModified
0,WHS4_154,Antenatal care coverage - at least four visits...,text,EMR,Eastern Mediterranean,Country,SAU,Saudi Arabia,Year,2023,...,,,,,,88.8,,Women Health and Reproductive Care Survey 2023.,EN,2024-05-13 00:00:00
1,WHS4_154,Antenatal care coverage - at least four visits...,text,AFR,Africa,Country,SEN,Senegal,Year,2021-2023,...,,,,,,68.4,,DHS 2023 (DHS Continuous) Key indicators.,EN,2024-05-13 00:00:00
2,WHS4_154,Antenatal care coverage - at least four visits...,text,EMR,Eastern Mediterranean,Country,TUN,Tunisia,Year,2021-2023,...,,,,,,79.0,,MICS 2023 (snaphots),EN,2024-05-13 00:00:00
3,WHS4_154,Antenatal care coverage - at least four visits...,text,EMR,Eastern Mediterranean,Country,JOR,Jordan,Year,2021-2023,...,,,,,,93.1,,PFHS 2023 (key indicators).,EN,2024-05-13 00:00:00
4,WHS4_154,Antenatal care coverage - at least four visits...,text,EMR,Eastern Mediterranean,Country,YEM,Yemen,Year,2020-2023,...,,,,,,32.4,,MICS 2022-2023.,EN,2024-05-13 00:00:00


In [3]:
Isabeldf_cleaned = Hdf[['IndicatorCode', 'Indicator', 'ParentLocation', 'Location', 'Period', 'Dim2', 'Value', 'FactComments']]

Isabeldf_cleaned.head()

Unnamed: 0,IndicatorCode,Indicator,ParentLocation,Location,Period,Dim2,Value,FactComments
0,WHS4_154,Antenatal care coverage - at least four visits...,Eastern Mediterranean,Saudi Arabia,2023,,88.8,Women Health and Reproductive Care Survey 2023.
1,WHS4_154,Antenatal care coverage - at least four visits...,Africa,Senegal,2021-2023,,68.4,DHS 2023 (DHS Continuous) Key indicators.
2,WHS4_154,Antenatal care coverage - at least four visits...,Eastern Mediterranean,Tunisia,2021-2023,,79.0,MICS 2023 (snaphots)
3,WHS4_154,Antenatal care coverage - at least four visits...,Eastern Mediterranean,Jordan,2021-2023,,93.1,PFHS 2023 (key indicators).
4,WHS4_154,Antenatal care coverage - at least four visits...,Eastern Mediterranean,Yemen,2020-2023,,32.4,MICS 2022-2023.


In [4]:
# Create a copy of the DataFrame
IsDf_cleaned = Isabeldf_cleaned.copy()
# Rename the 'Value' column to 'Adolescent birth rate (per 1000 women)'

IsDf_cleaned.rename(columns={'Value': 'Anatal_care%', 'Dim2': 'Age-group'}, inplace=True)


In [5]:
IsDf_cleaned.head()

Unnamed: 0,IndicatorCode,Indicator,ParentLocation,Location,Period,Age-group,Anatal_care%,FactComments
0,WHS4_154,Antenatal care coverage - at least four visits...,Eastern Mediterranean,Saudi Arabia,2023,,88.8,Women Health and Reproductive Care Survey 2023.
1,WHS4_154,Antenatal care coverage - at least four visits...,Africa,Senegal,2021-2023,,68.4,DHS 2023 (DHS Continuous) Key indicators.
2,WHS4_154,Antenatal care coverage - at least four visits...,Eastern Mediterranean,Tunisia,2021-2023,,79.0,MICS 2023 (snaphots)
3,WHS4_154,Antenatal care coverage - at least four visits...,Eastern Mediterranean,Jordan,2021-2023,,93.1,PFHS 2023 (key indicators).
4,WHS4_154,Antenatal care coverage - at least four visits...,Eastern Mediterranean,Yemen,2020-2023,,32.4,MICS 2022-2023.


# NEW CLEANING

In [7]:
#IA Define the function to split the period
def split_period(period):
    if '-' in period:
        starting_year, ending_year = period.split('-')
    else:
        starting_year = ending_year = period
    return starting_year, ending_year


IsDf_cleaned[['Starting_year', 'Ending_year']] = IsDf_cleaned['Period'].apply(lambda x: pd.Series(split_period(x)))




In [13]:
#IA
def categorize_period(ending_year):
    if 2003 <= int(ending_year) <= 2005:
        return '2003-2005'
    elif 2006 <= int(ending_year) <= 2008:
        return '2006-2008'
    elif 2009 <= int(ending_year) <= 2011:
        return '2009-2011'
    elif 2012 <= int(ending_year) <= 2014:
        return '2012-2014'
    elif 2015 <= int(ending_year) <= 2017:
        return '2015-2017'
    elif 2018 <= int(ending_year) <= 2020:
        return '2018-2020'
    elif 2021 <= int(ending_year) <= 2023:
        return '2021-2023'
    else:
        return 'Out_of_Range'  

# Period_range
IsDf_cleaned['Period_Range'] = IsDf_cleaned['Ending_year'].apply(categorize_period)


In [15]:
#IA
IsDf_cleaned.drop(columns=['Period'], inplace=True)
IsDf_cleaned.head()

Unnamed: 0,IndicatorCode,Indicator,ParentLocation,Location,Age-group,Anatal_care%,FactComments,Starting_year,Ending_year,Period_Range
0,WHS4_154,Antenatal care coverage - at least four visits...,Eastern Mediterranean,Saudi Arabia,,88.8,Women Health and Reproductive Care Survey 2023.,2023,2023,2021-2023
1,WHS4_154,Antenatal care coverage - at least four visits...,Africa,Senegal,,68.4,DHS 2023 (DHS Continuous) Key indicators.,2021,2023,2021-2023
2,WHS4_154,Antenatal care coverage - at least four visits...,Eastern Mediterranean,Tunisia,,79.0,MICS 2023 (snaphots),2021,2023,2021-2023
3,WHS4_154,Antenatal care coverage - at least four visits...,Eastern Mediterranean,Jordan,,93.1,PFHS 2023 (key indicators).,2021,2023,2021-2023
4,WHS4_154,Antenatal care coverage - at least four visits...,Eastern Mediterranean,Yemen,,32.4,MICS 2022-2023.,2020,2023,2021-2023
