# Outline
Goal: Predict 'Adj Close' as target with features as follows: 'Open', 'High', 'Low', 'Close' and 'Volume'

# Import libraries and data

In [None]:
# Import basic libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import plotly
import plotly.express as px
from plotly.offline import plot

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, SimpleRNN, LSTM, GRU

In [None]:
# Load data
doge = pd.read_csv('../input/dogecoin-cryptocurrency-dataset/DOGE-USD.csv')
doge

In [None]:
# Sorting DataFrame
doge.sort_values(by='Date', ascending=False, inplace=True)
doge.reset_index(drop=True, inplace=True)
doge

# Data Preprocessing

In [None]:
# Check whether 'doge' contains NaN.
doge.isnull().sum()

In [None]:
# Fill NaN with median values
doge.fillna(doge.median(), inplace=True)

In [None]:
# Normalization - MinMaxScaler
columns = ['Open', 'High', 'Low', 'Close', 'Volume']
for column in columns:
    scaler = MinMaxScaler()
    scaler = scaler.fit(doge[columns])
    doge[columns] = scaler.transform(doge[columns])

In [None]:
# Preprocessed DataFrame
doge

# Create Datasets

In [None]:
# Utility Function
def make_dataset(data, label, window_size=20):
    feature_list = []
    label_list = []
    for i in range(len(data) - window_size):
        feature_list.append(np.array(data.iloc[i:i+window_size]))
        label_list.append(np.array(label.iloc[i+window_size]))
    return np.array(feature_list), np.array(label_list)

In [None]:
# Set feature, label dataset
train_feature = doge[['Open', 'High', 'Low', 'Close', 'Volume']]
train_label = doge['Adj Close']

In [None]:
# train dataset
train_feature, train_label = make_dataset(train_feature, train_label, 10)

In [None]:
# Set X, y
X = train_feature
y = train_label

In [None]:
# Split datasets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=False)
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2, random_state=11)

# Modeling

In [None]:
model = Sequential()
model.add(SimpleRNN(20, activation='relu', input_shape=X_train.shape[1:]))
model.add(Dense(1))

In [None]:
model.summary()

In [None]:
model.compile(loss='mse', optimizer='adam', metrics=['mse'])

In [None]:
history = model.fit(X_train, y_train, batch_size=80, epochs=100, validation_data=(X_val, y_val))

In [None]:
# Plot epochs - loss
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.xlabel('epochs')
plt.ylabel('loss')
plt.legend(['train', 'val'])
plt.show()

In [None]:
# Evaluate model
from sklearn.metrics import mean_squared_error

train_pred = model.predict(X_train)
train_rmse = np.sqrt(mean_squared_error(y_train, train_pred))

test_pred = model.predict(X_test)
test_rmse = np.sqrt(mean_squared_error(y_test, test_pred))

print(f"Train RMSE: {train_rmse.round(2)}")
print(f"Test RMSE: {test_rmse.round(2)}")

In [None]:
# Prediction Plot
import plotly.graph_objects as go

fig = go.Figure()
fig.add_trace(go.Scatter(x=np.arange(len(y_test)), y=y_test.ravel(),
                    mode='lines',
                    name='test'))
fig.add_trace(go.Scatter(x=np.arange(len(y_test)), y=test_pred.ravel(),
                    line=dict(dash='dash', width=3),
                    name='pred'))

fig.update_layout(
    title="Adj Close Prediction of DogeCoin",
    height=500)
fig.show()

# To be continued
**What needs to be improved**
1. drop outliers in order to predict more precisely.
2. Applying various algorithms: LSTM, GRU.
3. Make comparison with three other algorithms.