# BAIS:3250 - Final Project
### Transforming Columns for Analysis

**Author(s):** Natalie Brown, Max Kaiser

**Date Modified:** 12-13-2024 (*date created:* 12-02-2024)


**Description:** Transforming various columns for a seamless analysis across countries

---

### Import Libaries
* **pandas:** for data frames and data cleaning functions

In [60]:
import pandas as pd
import numpy as np

---
### Load Data
* **06_country_merged_imputed.csv**

In [63]:
# load cleaned and imputed csv file
country_df=pd.read_csv('06_country_merged_imputed.csv',sep=',',encoding='utf-8')

# view
country_df.head()

Unnamed: 0,country,birth_rt,fertility_rt,infant_mortality_rt,life_expectancy,physicians_per_thousand,lat,long,gross_domestic_product_usd(b),primary_education_enrollment_pct,secondary_education_enrollment_pct,tax_revenue_pct,unemployment_rt,continent,covid_cases,covid_deaths,average_population
0,Afghanistan,32.49,4.47,47.9,64.5,0.28,33.93911,67.709953,19.1014,104.0,9.7,9.3,11.12,Asia,46498.0,1774.0,38673974
1,Albania,11.78,1.62,7.8,78.5,1.2,41.153332,20.168331,15.2781,107.0,55.0,18.6,12.33,Europe,37625.0,798.0,2865340
2,Algeria,24.28,3.02,20.1,76.7,1.72,28.033886,1.659626,169.9882,109.9,51.4,37.2,11.7,Africa,83199.0,2431.0,43621542
3,Andorra,7.2,1.27,2.7,83.8,3.33,42.506285,1.521801,3.1541,106.4,97.5,69.0,3.7,Europe,6712.0,76.0,77230
4,Angola,40.73,5.52,51.6,60.8,0.21,-11.202692,17.873887,94.6354,113.5,9.3,9.2,6.89,Africa,15139.0,348.0,32569069


---
### Address Data Types Before Transformation

In [66]:
# data types
print(f'Country df Data Types:\n{country_df.dtypes}')

Country df Data Types:
country                                object
birth_rt                              float64
fertility_rt                          float64
infant_mortality_rt                   float64
life_expectancy                       float64
physicians_per_thousand               float64
lat                                   float64
long                                  float64
gross_domestic_product_usd(b)         float64
primary_education_enrollment_pct      float64
secondary_education_enrollment_pct    float64
tax_revenue_pct                       float64
unemployment_rt                       float64
continent                              object
covid_cases                           float64
covid_deaths                          float64
average_population                      int64
dtype: object


### Transform Columns
- [x] make df copy for transformations
- [x] make covid death rate column (covid deaths / covid cases)
    - [x] drop columns after     
- [x] rename population

---

**covid death rate**

In [70]:
# make copy
country_transformed_df=country_df

# make columns accounting for those with zero 
country_transformed_df['covid_death_rt']=np.where(
    (country_transformed_df['covid_deaths'] == 0) | (country_transformed_df['covid_cases'] == 0),
    0,
    round(country_transformed_df['covid_deaths']/country_transformed_df['covid_cases'],4)*100
)

# review
country_transformed_df[['country','covid_death_rt']]

Unnamed: 0,country,covid_death_rt
0,Afghanistan,3.82
1,Albania,2.12
2,Algeria,2.92
3,Andorra,1.13
4,Angola,2.30
...,...,...
187,Venezuela,0.00
188,Vietnam,0.00
189,Yemen,29.12
190,Zambia,2.02


In [72]:
# drop
country_transformed_df=country_transformed_df.drop(columns=['covid_cases','covid_deaths'])

---
**rename population**

In [75]:
# rename
country_transformed_df=country_transformed_df.rename(columns={'average_population':'population'})

---
### Re-Order Columns

In [78]:
# get current order
country_transformed_df.columns

Index(['country', 'birth_rt', 'fertility_rt', 'infant_mortality_rt',
       'life_expectancy', 'physicians_per_thousand', 'lat', 'long',
       'gross_domestic_product_usd(b)', 'primary_education_enrollment_pct',
       'secondary_education_enrollment_pct', 'tax_revenue_pct',
       'unemployment_rt', 'continent', 'population', 'covid_death_rt'],
      dtype='object')

In [80]:
# reorder
country_transformed_df=country_transformed_df[['country','population','lat','long','continent',
                                               'gross_domestic_product_usd(b)','tax_revenue_pct','unemployment_rt',
                                               'primary_education_enrollment_pct','secondary_education_enrollment_pct',
                                               'life_expectancy','birth_rt','infant_mortality_rt','physicians_per_thousand','covid_death_rt']]

# review
country_transformed_df.head()

Unnamed: 0,country,population,lat,long,continent,gross_domestic_product_usd(b),tax_revenue_pct,unemployment_rt,primary_education_enrollment_pct,secondary_education_enrollment_pct,life_expectancy,birth_rt,infant_mortality_rt,physicians_per_thousand,covid_death_rt
0,Afghanistan,38673974,33.93911,67.709953,Asia,19.1014,9.3,11.12,104.0,9.7,64.5,32.49,47.9,0.28,3.82
1,Albania,2865340,41.153332,20.168331,Europe,15.2781,18.6,12.33,107.0,55.0,78.5,11.78,7.8,1.2,2.12
2,Algeria,43621542,28.033886,1.659626,Africa,169.9882,37.2,11.7,109.9,51.4,76.7,24.28,20.1,1.72,2.92
3,Andorra,77230,42.506285,1.521801,Europe,3.1541,69.0,3.7,106.4,97.5,83.8,7.2,2.7,3.33,1.13
4,Angola,32569069,-11.202692,17.873887,Africa,94.6354,9.2,6.89,113.5,9.3,60.8,40.73,51.6,0.21,2.3


---
### Save Data
* **07_country_final.csv**

In [83]:
# save
country_transformed_df.to_csv('07_country_final.csv',sep=',',encoding='utf-8',index=False)