In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import os

for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))
file_path = '/kaggle/input/currency-exchange-rates/exchange_rates.csv'

# 📙 1. Load data

In [None]:
dataset = pd.read_csv(file_path, parse_dates=['date'], index_col = 0)
dataset.head()

In [None]:
# some insight about whole dataset
dataset.describe()

# 📙 2. EDA on Currency data

## 2.1 First check if any null values present

In [None]:
dataset.isnull().sum()

## 2.2 Types of Currency data present

In [None]:
dataset['currency'].unique()

In [None]:
# Print currency names
dataset['Country/Currency'].unique()

In [None]:
unique_country = list(dataset['Country/Currency'].unique())
unique_currency = list(dataset['currency'].unique())
for x in range(len(unique_country)):
    print('Country name: {0} -> Currency: {1}'.format(unique_country[x], unique_currency[x]))

## 2.3 Pickup some country code for time series analysis

    1. USA Dollar -> USD
    2. India Rupee -> INR

In [None]:
data_usd = dataset.loc[dataset.currency == 'USD']
data_usd.head()

## 2.4 Let's Visualize

In [None]:
from IPython.display import HTML, display

In [None]:
display_usd = HTML(
'''
<iframe src='https://flo.uri.sh/visualisation/11270785/embed' title='Interactive or visual content'
class='flourish-embed-iframe' frameborder='0' scrolling='no' style='width:100%;height:600px;' 
    sandbox='allow-same-origin allow-forms allow-scripts allow-downloads allow-popups allow-popups-to-escape-sandbox allow-top-navigation-by-user-activation'></iframe>
    <div style='width:100%!;margin-top:4px!important;text-align:right!important;'>
    <a class='flourish-credit' href='https://public.flourish.studio/visualisation/11270785/?utm_source=embed&utm_campaign=visualisation/11270785' target='_top' style='text-decoration:none!important'>
    <img alt='Made with Flourish' src='https://public.flourish.studio/resources/made_with_flourish.svg' style='width:105px!important;height:16px!important;border:none!important;margin:0!important;'> 
    </a></div>
'''
)

display_inr = HTML(
'''
<iframe src='https://flo.uri.sh/visualisation/11270830/embed' title='Interactive or visual content'
class='flourish-embed-iframe' frameborder='0' scrolling='no' style='width:100%;height:600px;' 
    sandbox='allow-same-origin allow-forms allow-scripts allow-downloads allow-popups allow-popups-to-escape-sandbox allow-top-navigation-by-user-activation'></iframe>
    <div style='width:100%!;margin-top:4px!important;text-align:right!important;'>
    <a class='flourish-credit' href='https://public.flourish.studio/visualisation/11270830/?utm_source=embed&utm_campaign=visualisation/11270830' target='_top' style='text-decoration:none!important'>
    <img alt='Made with Flourish' src='https://public.flourish.studio/resources/made_with_flourish.svg' style='width:105px!important;height:16px!important;border:none!important;margin:0!important;'> 
    </a></div>
'''
)

## 2.5 Visualize USD Value

In [None]:
display(display_usd)

In [None]:
data_inr = dataset.loc[dataset.currency == 'INR']
data_inr.head()

## 2.6 Visualize INR Value

In [None]:
display(display_inr)

# 📙 3. Time series Analysis - INR value

In [None]:
data_inr.index = data_inr['date']
data_inr.drop(['date', 'Country/Currency', 'currency'], axis = 1, inplace = True)
data_inr.head()


## 3.1 Check for Stationarity - ADF Test

In [None]:
from statsmodels.tsa.stattools import adfuller

result = adfuller(data_inr['value'].values, autolag='AIC')
print(f'ADF Statistic: {result[0]}')
print(f'p-value: {result[1]}')
for key, value in result[4].items():
    print('Critial Values:')
    print(f'   {key}, {value}')

## Obs - 

    From ADF test, it's observed that, data is not stationary. 
    We can see p-value is 30% (usually it must be less than 0.05 or 5%), 
    and ADF-stat value is greater than all the critical values. 
    
    Both these values confirms the data to be Non-Stationary

## 3.2 ACF and PACF Plot

In [None]:
from statsmodels.graphics.tsaplots import plot_acf
from statsmodels.graphics.tsaplots import plot_pacf
import matplotlib.pyplot as plt

plt.rc("figure", figsize=(10,5))
plot_acf(data_inr['value'])
print()

## Obs - 

    From ACF, we can observe a auto-correlation upto 25 lag values

In [None]:
plt.rc("figure", figsize=(10,5))
plot_pacf(data_inr['value'])
print()

## Obs - 

    Partial auto-correlation shows only few lag values are actually correlated. 

# 📙 4. Price forecasting

## 4.1 Train - Test Split

In [None]:
data = data_inr['value'].values
print('Shape of data: ', data.shape)

## Obs - 

    Very few data points are available, so we will go for either ARIMA or LSTM with fewer nodes. 

In [None]:
# Separate train and test data
train_length = int(len(data) * 0.8)
print('Train length: ', train_length)

train_data, test_data = data[:train_length], data[train_length:]
print('Shape of Train and Test data: ', len(train_data), len(test_data))

## 4.2 Make time-series data supervised

In [None]:
# split a univariate sequence into supervised learning [Input and Output]
from numpy import array
def split_sequence(sequence, n_steps):
    X, y = list(), list()
    for i in range(len(sequence)):
        end_ix = i + n_steps
        if end_ix > len(sequence)-1:
            break
        seq_x, seq_y = sequence[i:end_ix], sequence[end_ix]
        X.append(seq_x)
        y.append(seq_y)
    return array(X), array(y)

## 4.3 Choose Lag value

In [None]:
lag = 3  # Empirically we have choosen
n_features = 1

In [None]:
train_X, train_y = split_sequence(train_data, lag)
test_X, test_y = split_sequence(test_data, lag)

In [None]:
print('Shape of train_X and train_y: ', train_X.shape, train_y.shape)
print('Shape of test_X and test_y: ', test_X.shape, test_y.shape)

In [None]:
train_X = train_X.reshape((train_X.shape[0], train_X.shape[1], n_features))
test_X = test_X.reshape((test_X.shape[0], test_X.shape[1], n_features))

In [None]:
# New shape of train_X and test_X are :-
print('Shape of train_X and train_y: ', train_X.shape, train_y.shape)
print('Shape of test_X and test_y: ', test_X.shape, test_y.shape)


## 4.4 Define Model

In [None]:
from keras.models import Sequential
from keras.layers import LSTM
from keras.layers import Dense

In [None]:
# define model
model = Sequential()
model.add(LSTM(16, activation='relu', return_sequences=False, input_shape=(lag, n_features)))
model.add(Dense(1))
model.compile(optimizer='adam', loss='mse')
model.summary()

## 4.5 Fit the model - with training data

In [None]:
# As you are trying to use function decorator in TF 2.0, 
# please enable run function eagerly by using below line after importing TensorFlow:
import tensorflow as tf
tf.config.run_functions_eagerly(True)

In [None]:
# fit model
history = model.fit(train_X, train_y, epochs = 25, batch_size=8, verbose=1, validation_split= 0.1)

## 4.6 Summarize model Loss

In [None]:
# summarize history for loss
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.show()

## Obs - 

    We could have stopped in 10th epoch

## 4.7 Make prediction - with Test data

In [None]:
train_predict = model.predict(train_X)
test_predict = model.predict(test_X)

print('Shape of train and test predict: ', train_predict.shape, test_predict.shape)

## 4.8 Model evaluation

In [None]:
# root mean squared error or rmse
import math
from sklearn.metrics import mean_squared_error

def measure_rmse(actual, predicted):
    return math.sqrt(mean_squared_error(actual, predicted))

train_score = measure_rmse(train_y, train_predict)
test_score = measure_rmse(test_y, test_predict)

print('Train and Test RMSE: ', train_score, test_score)

## 4.9 Plot test data and Predicted data

In [None]:
plt.rc("figure", figsize=(14,8))
plt.rcParams.update({'font.size': 16})
plt.plot(test_y, label = 'Actual')
plt.plot(test_predict, label = 'Predicted')
plt.xlabel('Time [in days]')
plt.ylabel('INR price')
plt.title('Currency price (INR) prediction using LSTM - Test data')
plt.legend()
plt.show()

## To be Continued ...

    

In [None]:
nan

In [None]:
nan

In [None]:
nan