### IMPORTS

In [1]:
import numpy as np
import pandas as pd

###  1. Reading raw historical exchange rates data from csv

In [2]:
#Reading historical currency exchange data csv file from inputs/ folder

converter_df=pd.read_csv('./raw_data/currency_exchange_rates/historicalCurrencyExchangeRates.csv',parse_dates=['Date'])

print(converter_df.columns)

Index(['Date', 'USD', 'JPY', 'BGN', 'CYP', 'CZK', 'DKK', 'EEK', 'GBP', 'HUF',
       'LTL', 'LVL', 'MTL', 'PLN', 'ROL', 'RON', 'SEK', 'SIT', 'SKK', 'CHF',
       'ISK', 'NOK', 'HRK', 'RUB', 'TRL', 'TRY', 'AUD', 'BRL', 'CAD', 'CNY',
       'HKD', 'IDR', 'ILS', 'INR', 'KRW', 'MXN', 'MYR', 'NZD', 'PHP', 'SGD',
       'THB', 'ZAR', 'VEF', 'Unnamed: 43', 'Unnamed: 44'],
      dtype='object')


### 2. Filling NaN values using Date based interpolation of known values

In [3]:
converter_df.set_index(keys='Date',drop=True, inplace=True)

converter_df.interpolate(method='time',inplace=True)

converter_df.tail(3)

Unnamed: 0_level_0,USD,JPY,BGN,CYP,CZK,DKK,EEK,GBP,HUF,LTL,...,MXN,MYR,NZD,PHP,SGD,THB,ZAR,VEF,Unnamed: 43,Unnamed: 44
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2008-01-04,1.4727,160.86,1.9558,,26.129,7.4509,15.6466,0.74495,253.64,3.4528,...,16.0171,4.8349,1.9043,60.337,2.1102,43.874,10.0643,0.316223,,
2008-01-03,1.4753,160.68,1.9558,,26.175,7.4532,15.6466,0.7448,253.67,3.4528,...,16.1123,4.8523,1.9066,60.6,2.1144,44.062,10.085,0.315665,,
2008-01-02,1.4688,163.83,1.9558,,26.364,7.4552,15.6466,0.7413,253.22,3.4528,...,16.0143,4.8625,1.8949,60.47,2.1135,44.013,10.0143,0.317062,,


### 3. Adding conversion rates for missing dates

In [4]:
#For holiday and weekend dates, carrying forward values from next or previous working day

# Creating date range based on daily freq
date_range = pd.date_range(start='1/1/2008', end='12/31/2018', freq='D')

# Reindexing and renaming index to 'Date'
converter_df= converter_df.reindex(date_range)

pd.DatetimeIndex.rename(converter_df.index,name='Date',inplace=True)


# Filling NaN values corresponding to newly added holiday, weekend dates
converter_df.fillna(method='ffill',inplace=True)

converter_df.fillna(method='bfill',inplace=True)


converter_df.head(3)

Unnamed: 0_level_0,USD,JPY,BGN,CYP,CZK,DKK,EEK,GBP,HUF,LTL,...,MXN,MYR,NZD,PHP,SGD,THB,ZAR,VEF,Unnamed: 43,Unnamed: 44
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2008-01-01,1.4688,163.83,1.9558,,26.364,7.4552,15.6466,0.7413,253.22,3.4528,...,16.0143,4.8625,1.8949,60.47,2.1135,44.013,10.0143,0.317062,,
2008-01-02,1.4688,163.83,1.9558,,26.364,7.4552,15.6466,0.7413,253.22,3.4528,...,16.0143,4.8625,1.8949,60.47,2.1135,44.013,10.0143,0.317062,,
2008-01-03,1.4753,160.68,1.9558,,26.175,7.4532,15.6466,0.7448,253.67,3.4528,...,16.1123,4.8523,1.9066,60.6,2.1144,44.062,10.085,0.315665,,


### 4. Dropping currency columns entirely filled with NaN values

In [5]:
converter_df.dropna(axis=1,inplace=True,thresh=1)

In [6]:
converter_df.head(3)

Unnamed: 0_level_0,USD,JPY,BGN,CZK,DKK,EEK,GBP,HUF,LTL,LVL,...,INR,KRW,MXN,MYR,NZD,PHP,SGD,THB,ZAR,VEF
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2008-01-01,1.4688,163.83,1.9558,26.364,7.4552,15.6466,0.7413,253.22,3.4528,0.6988,...,67.125,1376.19,16.0143,4.8625,1.8949,60.47,2.1135,44.013,10.0143,0.317062
2008-01-02,1.4688,163.83,1.9558,26.364,7.4552,15.6466,0.7413,253.22,3.4528,0.6988,...,67.125,1376.19,16.0143,4.8625,1.8949,60.47,2.1135,44.013,10.0143,0.317062
2008-01-03,1.4753,160.68,1.9558,26.175,7.4532,15.6466,0.7448,253.67,3.4528,0.6987,...,67.125,1382.06,16.1123,4.8523,1.9066,60.6,2.1144,44.062,10.085,0.315665


### 5. Saving to currency exchange rates input csv file

In [7]:
#Writing to currency converter csv file in currencyConversion/ folder
converter_df.to_csv('./input_data/currency_exchange_rates/exRateHist.csv')

#### Note: List of  dropped currency columns from exchange rates csv which entirely contained NaN values

In [8]:
input_df=pd.read_csv('./raw_data/currency_exchange_rates/historicalCurrencyExchangeRates.csv',parse_dates=['Date'])

#'Date is removed from input_df columns as 'Date' is not a currency

total_currencies = set(input_df.columns) - {'Date'}

present_currencies = set(converter_df.columns)

dropped_currencies = total_currencies - present_currencies

print(dropped_currencies)


{'MTL', 'Unnamed: 44', 'ROL', 'TRL', 'SIT', 'CYP', 'Unnamed: 43'}


### 6. Deleting dataframe

In [9]:
del converter_df