In [1]:
# Dependencies
import pandas as pd
import matplotlib.pyplot as plt

# Set File Paths
literacy_total = "Data Files/literacy_rate_total.csv"
literacy_male = "Data Files/literacy_rate_male.csv"
literacy_female = "literacy_rate_female/API_SE.ADT.LITR.FE.ZS_DS2_en_csv_v2_1928530.csv"

In [2]:
# Import adult literacy (15+) total, skip blank rows, and select data
total_lit_df = pd.read_csv(literacy_total, skiprows=4)
total_lit_df = total_lit_df[['Country Name', '2015', '2016', '2017', '2018', '2019']]
total_lit_df.head()

Unnamed: 0,Country Name,2015,2016,2017,2018,2019
0,Aruba,,,,97.807419,
1,Afghanistan,,,,43.019718,
2,Angola,,,,,
3,Albania,,,,98.141151,
4,Andorra,,,,,


In [3]:
# Import adult male literacy, skip blank rows, and select data
male_lit_df = pd.read_csv(literacy_male, skiprows=4)
male_lit_df = male_lit_df[['Country Name', '2015', '2016', '2017', '2018', '2019']]
male_lit_df.head()

Unnamed: 0,Country Name,2015,2016,2017,2018,2019
0,Aruba,,,,97.800133,
1,Afghanistan,,,,55.475449,
2,Angola,,,,,
3,Albania,,,,98.513618,
4,Andorra,,,,,


In [4]:
# Import adult female literacy, skip blank rows, and select data
female_lit_df = pd.read_csv(literacy_female, skiprows=4)
female_lit_df = female_lit_df[['Country Name', '2015', '2016', '2017', '2018', '2019']]
female_lit_df.head()

Unnamed: 0,Country Name,2015,2016,2017,2018,2019
0,Aruba,,,,97.812683,
1,Afghanistan,,,,29.80521,
2,Angola,,,,,
3,Albania,,,,97.761124,
4,Andorra,,,,,


In [5]:
# Merge the male and female literacy dataframes
lit_merge_1 = male_lit_df.merge(female_lit_df, how='left', on='Country Name')
lit_merge_1.head()

Unnamed: 0,Country Name,2015_x,2016_x,2017_x,2018_x,2019_x,2015_y,2016_y,2017_y,2018_y,2019_y
0,Aruba,,,,97.800133,,,,,97.812683,
1,Afghanistan,,,,55.475449,,,,,29.80521,
2,Angola,,,,,,,,,,
3,Albania,,,,98.513618,,,,,97.761124,
4,Andorra,,,,,,,,,,


In [6]:
# Rename the columns
lit_merge_1 = lit_merge_1.rename(columns={'2015_x':'2015 Male Literacy', 
                                          '2016_x':'2016 Male Literacy', 
                                          '2017_x':'2017 Male Literacy',
                                          '2018_x':'2018 Male Literacy', 
                                          '2019_x':'2019 Male Literacy',
                                          '2015_y':'2015 Female Literacy', 
                                          '2016_y':'2016 Female Literacy', 
                                          '2017_y':'2017 Female Literacy',
                                          '2018_y':'2018 Female Literacy',
                                          '2019_y':'2019 Female Literacy'})
lit_merge_1.head()

Unnamed: 0,Country Name,2015 Male Literacy,2016 Male Literacy,2017 Male Literacy,2018 Male Literacy,2019 Male Literacy,2015 Female Literacy,2016 Female Literacy,2017 Female Literacy,2018 Female Literacy,2019 Female Literacy
0,Aruba,,,,97.800133,,,,,97.812683,
1,Afghanistan,,,,55.475449,,,,,29.80521,
2,Angola,,,,,,,,,,
3,Albania,,,,98.513618,,,,,97.761124,
4,Andorra,,,,,,,,,,


In [7]:
# Merge the Male/Female Rates with the Total Rates into a single dataframe
adult_literacy_df = total_lit_df.merge(lit_merge_1, how='left',on='Country Name')
adult_literacy_df.head()


Unnamed: 0,Country Name,2015,2016,2017,2018,2019,2015 Male Literacy,2016 Male Literacy,2017 Male Literacy,2018 Male Literacy,2019 Male Literacy,2015 Female Literacy,2016 Female Literacy,2017 Female Literacy,2018 Female Literacy,2019 Female Literacy
0,Aruba,,,,97.807419,,,,,97.800133,,,,,97.812683,
1,Afghanistan,,,,43.019718,,,,,55.475449,,,,,29.80521,
2,Angola,,,,,,,,,,,,,,,
3,Albania,,,,98.141151,,,,,98.513618,,,,,97.761124,
4,Andorra,,,,,,,,,,,,,,,


In [8]:
# Rename the columns
adult_literacy_df = adult_literacy_df.rename(columns={'2015':'2015 Total Literacy', 
                                          '2016':'2016 Total Literacy', 
                                          '2017':'2017 Total Literacy',
                                          '2018':'2018 Total Literacy', 
                                          '2019':'2019 Total Literacy'})
adult_literacy_df.head()

Unnamed: 0,Country Name,2015 Total Literacy,2016 Total Literacy,2017 Total Literacy,2018 Total Literacy,2019 Total Literacy,2015 Male Literacy,2016 Male Literacy,2017 Male Literacy,2018 Male Literacy,2019 Male Literacy,2015 Female Literacy,2016 Female Literacy,2017 Female Literacy,2018 Female Literacy,2019 Female Literacy
0,Aruba,,,,97.807419,,,,,97.800133,,,,,97.812683,
1,Afghanistan,,,,43.019718,,,,,55.475449,,,,,29.80521,
2,Angola,,,,,,,,,,,,,,,
3,Albania,,,,98.141151,,,,,98.513618,,,,,97.761124,
4,Andorra,,,,,,,,,,,,,,,


In [9]:
# Print the dataframe to a CSV file
adult_literacy_df.to_csv('Output/adult_literacy.csv', index=False)