In [2]:
import os
import numpy as np
import pandas as pd

# Population Data Preprocessing
### Data Source: [World Bank](https://population.un.org/wpp/Download/Standard/MostUsed)

### Data Description:
- The data contains the population of all countries from 1950 to 2020.
- The data is in excel format.
 
We have extracted the data from the excel file and saved it in a new file in the directory "Refined". We will now load the data and process it.

In [2]:
try:
    df = pd.read_excel('Refined/Population.xlsx')
except:
    df = pd.read_excel('Data/WPP2022_GEN_F01_DEMOGRAPHIC_INDICATORS_COMPACT_REV1.xlsx')
    df = df[df.iloc[:,2] == 'Pakistan']
    df = df.iloc[:,[2, 10, 12, 13, 14, 16, 17, 21, 36 ]]
    df.columns = ['Country', 'Year', 'Total', 'Male', 'Female', 'Ratio', 'Median Age', 'Increase Rate', 'Life Expectancy']
    os.makedirs('Refined', exist_ok=True)
    df.to_excel('Refined/Population.xlsx', index=False)

# GDP Data Preprocessing
### Data Source: [World Bank](https://data.worldbank.org/indicator/NY.GDP.MKTP.CD?locations=PK)

### Data Description:
- The data contains the GDP of all countries from 1960 to 2020.
- The data is in excel format.

We extracted the years and GDP value of Pakistan only and saved it in a new file in the directory "Refined". We will now load the data and process it.

In [7]:
try:
    df = pd.read_excel('Refined/GDP.xlsx')
except:
    df = pd.read_excel('Data/API_NY.GDP.MKTP.CD_DS2_en_excel_v2_339292.xls')
    df = df.iloc[[2, df[df.iloc[:, 0] == 'Pakistan'].index[0]]].T.reset_index()
    df.columns = ['Year', 'Value']
    df = df.iloc[:, 1:]
    df = df.iloc[4:-1]
    
    os.makedirs('Refined', exist_ok=True)
    df.to_excel('Refined/GDP.xlsx', index=False)

# Inflation Data Preprocessing
### Data Source: [World Bank](https://data.worldbank.org/indicator/FP.CPI.TOTL.ZG?locations=PK)

### Data Description:
- The data contains the inflation(consumer prices) of all countries from 1960 to 2020.
- The data is in excel format.

We extracted the years and inflation rate of Pakistan only and saved it in a new file in the directory "Refined". We will now load the data and process it.

In [5]:
try:
    df = pd.read_excel('Refined/Inflation.xlsx')
except: 
    df = pd.read_excel('Data/API_FP.CPI.TOTL.ZG_DS2_en_excel_v2_340510.xls')
    df = df.iloc[[2, df[df.iloc[:, 0] == 'Pakistan'].index[0]]].T.reset_index()
    df = df.iloc[:, 1:]

    df.columns = ['Year', 'Value']
    df = df.iloc[4:-1]
        
    os.makedirs('Refined', exist_ok=True)
    df.to_excel('Refined/Inflation.xlsx', index=False)