In [2]:
import pandas as pd
from sqlalchemy import create_engine

# Cleaning ART CSV

In [3]:
#Reading in CSV file

art = "CSV Files/ART.csv"
art_df = pd.read_csv(art)
art_df.head()

Unnamed: 0,Entity,Code,Year,(% of people living with HIV)
0,Albania,ALB,2000,0.0
1,Albania,ALB,2001,0.0
2,Albania,ALB,2002,0.0
3,Albania,ALB,2003,0.0
4,Albania,ALB,2004,4.0


In [4]:
#filtering life expectancy csv on the year

art_df = art_df.loc[art_df["Year"] <= 2015]

art_df.head()

Unnamed: 0,Entity,Code,Year,(% of people living with HIV)
0,Albania,ALB,2000,0.0
1,Albania,ALB,2001,0.0
2,Albania,ALB,2002,0.0
3,Albania,ALB,2003,0.0
4,Albania,ALB,2004,4.0


In [6]:
for col in art_df.columns: 
    print(col) 

Entity
Code
Year
 (% of people living with HIV)


In [8]:
# Rename the headers to be more explanatory
art_df = art_df.rename(columns={" (% of people living with HIV)": "Percent_Living_With_HIV"
                                })


art_df.head()

Unnamed: 0,Entity,Code,Year,Percent_Living_With_HIV
0,Albania,ALB,2000,0.0
1,Albania,ALB,2001,0.0
2,Albania,ALB,2002,0.0
3,Albania,ALB,2003,0.0
4,Albania,ALB,2004,4.0


In [5]:
art_df.to_csv("CSV Files/art_2000to2015.csv",
                  encoding="utf-8", index=False, header=True)

# Cleaning Life Expectancy CSV

In [None]:
#Reading in CSV file

life_expectancy = "CSV Files/life-expectancy.csv"
life_expectancy_df = pd.read_csv(life_expectancy)
life_expectancy_df.head()

In [None]:
#filtering life expectancy csv on the year

five_years_life = life_expectancy_df.loc[life_expectancy_df["Year"] >= 2000]

five_years_life.head()

In [None]:
# Rename the headers to be more explanatory
five_years_life = five_years_life.rename(columns={"Life expectancy (Clio-Infra up to 1949; UN Population Division for 1950 to 2015) (years)": "Life_Expectancy"
                                        })


five_years_life.head()

In [None]:
five_years_life.to_csv("CSV Files/life_expectancy_2000to2017.csv",
                  encoding="utf-8", index=False, header=True)

# Cleaning AIDS CSV

In [None]:
#Reading in CSV file

aids = "CSV Files/aids.csv"
aids_df = pd.read_csv(aids)
aids_df.head()

In [None]:
#filtering aids csv on the year

five_years = aids_df.loc[aids_df["Year"] >= 2013]

five_years.head()

In [None]:
# Rename the headers to be more explanatory
renamed_df = five_years.rename(columns={"Deaths from HIV/AIDS (Number)": "Deaths",
                                       "New infections of HIV/AIDS (new cases of HIV infection)": "New_Infections",
                                        "Number of people living with HIV (tens) (tens of people living with HIV)": "HIV_Incidents(tens)"
                                        })


renamed_df.head()

In [None]:
# Push the remade DataFrame to a new CSV file
renamed_df.to_csv("CSV Files/aids_2013_to_2017.csv",
                  encoding="utf-8", index=False, header=True)

# Cleaning HIV Death Rate by Age Groups CSV

In [None]:
#reading in CSV File

death = "CSV Files/hiv-death-rates-by-age.csv"
death_df = pd.read_csv(death)
death_df.head()

In [None]:
#filtering death by age csv on the year

death_five_years = death_df.loc[death_df["Year"] >= 2013]

death_five_years.head()

In [None]:
# Rename the headers to be more explanatory
renamed_death_df = death_five_years.rename(columns={"Under-5s (per 100,000)": "(1-4)",
                                       "70+ years old (per 100,000)": "(70+)",
                                        "5-14 years old (per 100,000)": "(5-14)",
                                        "15-49 years old (per 100,000)": "(15-49)",
                                        "50-69 years old (per 100,000)": "(50-69)"
                                        })


renamed_death_df.head()

In [None]:
#only select the columns needed from dataframe 

renamed_death_df = renamed_death_df[["Entity", "Year", "(1-4)", "(5-14)",
                                    "(15-49)", "(50-69)", "(70+)"]]

renamed_death_df.head()

In [None]:
#push the remade DataFrame to a new CSV file
renamed_death_df.to_csv("CSV Files/death_age_2013_to_2017.csv",
                  encoding="utf-8", index=False, header=True)

# Loading DataFrames into DataBase 

In [None]:
connection_string = "root:cookies25@127.0.0.1/HIV?charset=utf8mb4"
engine = create_engine(f'mysql+pymysql://{connection_string}', pool_size=10, max_overflow=50)

In [None]:
#load ART dataframe into database, create table called ART
art_df.to_sql(name='ART', con=engine, if_exists='append', index=False)

In [None]:
#load Life Expectancy dataframe into database, create table called Life Expectancy
five_years_life.to_sql(name='Life_Expectancy', con=engine, if_exists='append', index=False)

In [None]:
#load AIDS dataframe into database, create table called AIDS
renamed_df.to_sql(name='AIDS', con=engine, if_exists='append', index=False)

In [None]:
#load Death dataframe into database, create table called Death
renamed_death_df.to_sql(name='Death', con=engine, if_exists='append', index=False)