<h1>Table of Contents<span class="tocSkip"></span></h1>
<div class="toc"><ul class="toc-item"></ul></div>

In [1]:
import glob
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from pathlib import Path

In [2]:
data_folder = Path.cwd().parents[0]/'data'

# get list of absolute file paths for all csv files in data folder
data_filepaths = [file for file in glob.glob(f"{data_folder}/*.csv")]

print(
    f'File names of csv files in data folder: {[Path(file).stem for file in data_filepaths]}'
)

File names of csv files in data folder: ['malaria_deaths', 'malaria_deaths_age', 'malaria_inc']


In [3]:
deaths = pd.read_csv(data_filepaths[0])
deaths_age = pd.read_csv(data_filepaths[1])
inc = pd.read_csv(data_filepaths[2])

In [4]:
def get_entity_type(code, entity, income_demo):
    
    if entity == 'World':
         entity_type = 'World'
    elif not pd.isnull(code):
        entity_type = 'Country'
    elif pd.isnull(code) and 'SDI' in entity:
        entity_type = 'SDI'
    elif pd.isnull(code) and entity in income_demo:
        entity_type = 'Income/Demographic'
    else:
        entity_type = 'Region'

    return entity_type

def assign_entity_type(df, income_demo):
    
    # apply 'get_entity_type' function to each row
    df['entity_type'] = df.apply(
        lambda x: get_entity_type(
            x['code'], 
            x['entity'],
            income_demo = income_demo
        ), 
        axis=1
    )
    return df

def fill_uk_code(df, uk_list):
    
    df.loc[
        (df.code.isnull() & df.entity.isin(uk_list)),
        "code"
    ] = "GBR"
    
    return df

def lowercase_columns(df):
    
    df.columns = df.columns.str.lower()
    
    return df

def rename_columns(df, rename_dict):
    
    for key, value in rename_dict.items():
        
        if key in df.columns:
            
            df.rename(
            columns = {
                key : value
            },
            inplace = True
        )
            
    return df

In [5]:
# list of entities that will be grouped under 'Income/Demographic' entity_type
income_demo = [
    'Fragile and conflict affected situations',
    'Heavily indebted poor countries (HIPC)',
    'Late-demographic dividend',
    'Least developed countries: UN classification',
    'Low & middle income', 
    'Low income', 
    'Lower middle income',
    'Middle income',
    'Pre-demographic dividend',
    'Upper middle income'
]

# list of uk entities
uk_list = ['Wales', 'England', 'Scotland', 'Northern Ireland', 'United Kingdom']

# dictionary of column names to rename
rename_dict ={
    'deaths - malaria - sex: both - age: age-standardized (rate) (per 100,000 people)': 'age_std_death_rate',
    'incidence of malaria (per 1,000 population at risk) (per 1,000 population at risk)' : 'malaria_incidence_1000'
}

In [6]:
deaths = lowercase_columns(deaths)
deaths = rename_columns(deaths, rename_dict)
deaths = fill_uk_code(deaths, uk_list)
test = assign_entity_type(deaths, income_demo)

In [7]:
test.entity_type.value_counts()

Country    5373
Region      621
SDI         135
World        27
Name: entity_type, dtype: int64

In [9]:
data_filepaths[0]

'C:\\Users\\PY\\Desktop\\portfolio\\projects\\malaria\\data\\malaria_deaths.csv'

In [11]:
Path(data_filepaths[0]).name

'malaria_deaths.csv'

In [12]:
'malaria_deaths.csv' == Path(data_filepaths[0]).name

True