# **Set-up**

Terminal command:

```console
pip install numpy
pip install pandas
pip install openpyxl
pip install datetime
pip install matplotlib
```

In [34]:
import numpy as np
import pandas as pd
from datetime import datetime

Ensure current working directory is in "EC1B1" folder

# **Read Excel**

In [3]:
df_spain_raw = pd.read_excel('./data/data_spain.xlsx')
df_us_raw = pd.read_excel('./data/data_united_states.xlsx')

We save dataframe in another variable to keep the raw data untouched

In [43]:
spain_1 = df_spain_raw
us_1 = df_us_raw

Inspect the dataframe

In [44]:
spain_1.head()

Unnamed: 0,International Financial Statistics (IFS),Unnamed: 1,Unnamed: 2,Unnamed: 3,Unnamed: 4
0,,"Economic Activity, Industrial Production, Index","Exchange Rates, US Dollar per Domestic Currenc...","International Reserves and Liquidity, Reserves...","Prices, Consumer Price Index, All items, Index"
1,Jan 1960,,0.016667,233,2.857368
2,Feb 1960,,0.016667,253,2.855049
3,Mar 1960,,0.016667,299,2.851573
4,Apr 1960,,0.016667,326,2.853891


# **Initial Cleaning**

## For Spain:

In [45]:
spain_2 = spain_1 \
    .rename(columns={'International Financial Statistics (IFS)': 'date', 'Unnamed: 1': 'industrial_index', 'Unnamed: 2': 'exchange_rate', 'Unnamed: 3': 'reserves', 'Unnamed: 4': 'price_index'}) \
    .dropna()

In [46]:
spain_3 = spain_2
spain_3['exchange_rate'] = spain_3['exchange_rate'].apply(lambda x: 1/x)

In [47]:
spain_3

Unnamed: 0,date,industrial_index,exchange_rate,reserves,price_index
13,Jan 1961,18.681625,60.000,536,2.909531
14,Feb 1961,18.247714,60.000,565,2.885768
15,Mar 1961,18.423624,60.000,561.55,2.873017
16,Apr 1961,18.611261,60.000,593.5,2.880552
17,May 1961,19.584629,60.000,643.5,2.873017
...,...,...,...,...,...
368,Aug 1990,57.138375,97.029,53227.717281,52.200576
369,Sep 1990,97.803577,98.467,53790.187718,52.750025
370,Oct 1990,100.909947,95.697,55428.040577,53.218335
371,Nov 1990,104.110449,93.958,56347.862332,53.165017


In [48]:
spain_4 = spain_3
spain_4['date'] = spain_4['date'].apply(lambda x: datetime.strptime(x, '%b %Y'))

In [51]:
spain_5 = spain_4
spain_columns = ['industrial_index', 'exchange_rate', 'reserves', 'price_index']
for column in spain_columns:
    spain_5[column] = pd.to_numeric(spain_5[column])
    spain_5[column] = spain_5[column].round(2)

In [53]:
spain_5.head()

Unnamed: 0,date,industrial_index,exchange_rate,reserves,price_index
13,1961-01-01,18.68,60.0,536.0,2.91
14,1961-02-01,18.25,60.0,565.0,2.89
15,1961-03-01,18.42,60.0,561.55,2.87
16,1961-04-01,18.61,60.0,593.5,2.88
17,1961-05-01,19.58,60.0,643.5,2.87


In [54]:
spain_basic_cleaned = spain_5

In [56]:
us_1

Unnamed: 0,International Financial Statistics (IFS),Unnamed: 1,Unnamed: 2
0,,"International Reserves and Liquidity, Reserves...","Prices, Consumer Price Index, All items, Index"
1,Jan 1960,21478.1,13.436946
2,Feb 1960,21395.7,13.482806
3,Mar 1960,21344.7,13.482806
4,Apr 1960,21278,13.528666
...,...,...,...
368,Aug 1990,78908.838357,60.351608
369,Sep 1990,80024.166133,60.856066
370,Oct 1990,82852.196532,61.222946
371,Nov 1990,83059.402774,61.360525


## Using similar codes, repeat for US:

In [57]:
# Repeat first step using code from Spain
us_1 = us_1 \
    .rename(columns={'International Financial Statistics (IFS)': 'date', 'Unnamed: 1': 'reserves', 'Unnamed: 2': 'price_index'}) \
    .dropna()

# Tidy the data using similar process
us_1['date'] = us_1['date'].apply(lambda x: datetime.strptime(x, '%b %Y'))

for column in ['reserves', 'price_index']:
    us_1[column] = pd.to_numeric(us_1[column])
    us_1[column] = us_1[column].round(2)

us_basic_cleaned = us_1

# **Data manipulation**