In [1]:
import pandas as pd
import os

In [2]:
# Load the data
Hdf = pd.read_csv('Resources/Hdata.csv')

# Display the first few rows of the DataFrame to verify
Hdf.head()

Unnamed: 0,IndicatorCode,Indicator,ValueType,ParentLocationCode,ParentLocation,Location type,SpatialDimValueCode,Location,Period type,Period,...,FactValueUoM,FactValueNumericLowPrefix,FactValueNumericLow,FactValueNumericHighPrefix,FactValueNumericHigh,Value,FactValueTranslationID,FactComments,Language,DateModified
0,MDG_0000000003,Adolescent birth rate (per 1000 women),text,EUR,Europe,Country,HRV,Croatia,Year,2023,...,,,,,,0.0,,Registration National Statistics,EN,2024-03-26 00:00:00
1,MDG_0000000003,Adolescent birth rate (per 1000 women),text,EUR,Europe,Country,HRV,Croatia,Year,2023,...,,,,,,6.7,,Registration National Statistics,EN,2024-03-26 00:00:00
2,MDG_0000000003,Adolescent birth rate (per 1000 women),text,EMR,Eastern Mediterranean,Country,YEM,Yemen,Year,2023,...,,,,,,77.0,,Yemen 2022-2023 Multiple Indicator Cluster Survey,EN,2024-03-26 00:00:00
3,MDG_0000000003,Adolescent birth rate (per 1000 women),text,GLOBAL,Global,Country,MAC,"China, Macao Special Administrative Region",Year,2022,...,,,,,,0.0,,Registration National Statistics & WPP2022,EN,2024-03-26 00:00:00
4,MDG_0000000003,Adolescent birth rate (per 1000 women),text,EUR,Europe,Country,DNK,Denmark,Year,2022,...,,,,,,0.0,,Registration National Statistics & WPP2022,EN,2024-03-26 00:00:00


In [3]:
Hdf_cleaned = Hdf[['ParentLocation', 'Location', 'Period', 'Dim2', 'Value', 'FactComments']]

Hdf_cleaned.head()

Unnamed: 0,ParentLocation,Location,Period,Dim2,Value,FactComments
0,Europe,Croatia,2023,10-14 years,0.0,Registration National Statistics
1,Europe,Croatia,2023,15-19 years,6.7,Registration National Statistics
2,Eastern Mediterranean,Yemen,2023,15-19 years,77.0,Yemen 2022-2023 Multiple Indicator Cluster Survey
3,Global,"China, Macao Special Administrative Region",2022,10-14 years,0.0,Registration National Statistics & WPP2022
4,Europe,Denmark,2022,10-14 years,0.0,Registration National Statistics & WPP2022


In [4]:
# Create a copy of the DataFrame
hDf_cleaned = Hdf_cleaned.copy()

# Rename the 'Value' column to 'Adolescent birth rate (per 1000 women)'
hDf_cleaned.rename(columns={'Value': 'Adolescent birth rate (per 1000 women)', 'Dim2': 'Age-group'}, inplace=True)


In [5]:
hDf_cleaned.head()

Unnamed: 0,ParentLocation,Location,Period,Age-group,Adolescent birth rate (per 1000 women),FactComments
0,Europe,Croatia,2023,10-14 years,0.0,Registration National Statistics
1,Europe,Croatia,2023,15-19 years,6.7,Registration National Statistics
2,Eastern Mediterranean,Yemen,2023,15-19 years,77.0,Yemen 2022-2023 Multiple Indicator Cluster Survey
3,Global,"China, Macao Special Administrative Region",2022,10-14 years,0.0,Registration National Statistics & WPP2022
4,Europe,Denmark,2022,10-14 years,0.0,Registration National Statistics & WPP2022


In [6]:
# Export the cleaned data to CSV
hDf_cleaned.to_csv("Output/Halima.csv", index=False)

In [7]:
# Group by 'Location' (country) and find the maximum Adolescent birth rate
max_birth_rate =hDf_cleaned.groupby('Location')['Adolescent birth rate (per 1000 women)'].max()

# Find the country with the highest Adolescent birth rate
highest_birth_rate_country = max_birth_rate.idxmax()
highest_birth_rate_value = max_birth_rate.max()

print(f"The country with the highest Adolescent birth rate (per 1000 women) is {highest_birth_rate_country} with a rate of {highest_birth_rate_value}")

The country with the highest Adolescent birth rate (per 1000 women) is Central African Republic with a rate of 184.4


In [8]:
min_birth_rate = hDf_cleaned.groupby('Location')['Adolescent birth rate (per 1000 women)'].min()

# Find the country with the lowest Adolescent birth rate
lowest_birth_rate_country = min_birth_rate.idxmin()
lowest_birth_rate_value = min_birth_rate.min()

print(f"The country with the lowest Adolescent birth rate (per 1000 women) is {lowest_birth_rate_country} with a rate of {lowest_birth_rate_value}")

The country with the lowest Adolescent birth rate (per 1000 women) is Algeria with a rate of 0.0


In [9]:
highest_rate = hDf_cleaned.sort_values(by='Adolescent birth rate (per 1000 women)', ascending=False)

highest_rate.head(10)

Unnamed: 0,ParentLocation,Location,Period,Age-group,Adolescent birth rate (per 1000 women),FactComments
310,Africa,Central African Republic,2018,15-19 years,184.4,Central African Republic 2018-2019 Multiple In...
409,Africa,Equatorial Guinea,2010,15-19 years,176.0,Equatorial Guinea 2011 Demographic and Health ...
386,Africa,Angola,2014,15-19 years,162.7,Angola 2015-2016 Demographic and Health Survey
418,Africa,South Sudan,2008,15-19 years,158.1,South Sudan 2010 Household Health Survey Secon...
149,Africa,Mozambique,2021,15-19 years,158.0,Mozambique 2022-2023 Demographic and Health Su...
233,Africa,Niger,2020,15-19 years,150.3,Niger 2021 Malaria Indicator Survey
231,Africa,Mali,2020,15-19 years,144.8,Mali 2021 Malaria Indicators Survey
267,Africa,Madagascar,2019,15-19 years,143.0,Madagascar 2021 Demographic and Health Survey
307,Africa,Chad,2018,15-19 years,138.5,Chad 2019 Multiple Indicator Cluster Survey
306,Africa,Malawi,2018,15-19 years,135.6,Malawi 2019-2020 Multiple Indicator Cluster Su...


In [10]:
# Sort the DataFrame in ascending order based on 'Adolescent birth rate (per 1000 women)'
lowest_rate = hDf_cleaned.sort_values(by='Adolescent birth rate (per 1000 women)', ascending=True)

# Display the sorted DataFrame
lowest_rate.head(10)

Unnamed: 0,ParentLocation,Location,Period,Age-group,Adolescent birth rate (per 1000 women),FactComments
0,Europe,Croatia,2023,10-14 years,0.0,Registration National Statistics
74,South-East Asia,Bhutan,2021,10-14 years,0.0,Registration National Statistics & WPP2022
73,Americas,Bermuda,2021,10-14 years,0.0,Registration National Statistics & WPP2022
72,Europe,Belgium,2021,10-14 years,0.0,Registration Eurostat
71,Europe,Azerbaijan,2021,10-14 years,0.0,Registration National Statistics & WPP2022
70,Europe,Austria,2021,10-14 years,0.0,Registration Eurostat
211,Europe,Armenia,2020,10-14 years,0.0,Registration Eurostat
212,Americas,Montserrat,2020,10-14 years,0.0,Registration National Statistics & WPP2022
213,South-East Asia,Nepal,2020,10-14 years,0.0,Nepal 2022 Demographic and Health Survey
254,Eastern Mediterranean,Bahrain,2019,10-14 years,0.0,Registration National Statistics & WPP2022


In [11]:
# Count occurrences of Age-group values
age_group_counts = hDf_cleaned['Age-group'].value_counts()

# Display the counts
print("Age-group counts:")
print(age_group_counts)

Age-group counts:
Age-group
15-19 years    223
10-14 years    215
Name: count, dtype: int64


In [13]:
hDf_cleaned.dtypes

ParentLocation                             object
Location                                   object
Period                                      int64
Age-group                                  object
Adolescent birth rate (per 1000 women)    float64
FactComments                               object
dtype: object