In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
from matplotlib import pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')

In [None]:
dataset_df = pd.read_csv("/kaggle/input/ethereum-data/ETH-USD.csv")
dataset_df.head()

In [None]:
dataset_df.tail()

In [None]:
dataset_df.drop(["Adj Close"], axis=1, inplace=True)

In [None]:
dataset_df.isnull().sum()

In [None]:
dataset_df.dropna(inplace=True)

In [None]:
dataset_df.info()

In [None]:
dataset_df["Date"] = pd.to_datetime(dataset_df["Date"])
dataset_df["Year"] = dataset_df["Date"].dt.year
dataset_df["Month"] = dataset_df["Date"].dt.month
dataset_df["Day"] = dataset_df["Date"].dt.day

In [None]:
dataset_df['Return'] = dataset_df['Close'] / dataset_df['Open'][0] -1

In [None]:
dataset_df.head()

In [None]:
tmp_df = dataset_df.groupby(["Date"])["Close"].sum().reset_index()
plt.figure(figsize=(20,5))
sns.lineplot(data=tmp_df, x="Date", y="Close", color="dodgerblue")
plt.title('Close Price USD over Date')
plt.show()

In [None]:
import matplotlib.dates as mdates

plt.figure(figsize=(20,5))
sns.barplot(tmp_df['Date'], tmp_df['Close'])
ax = plt.gca()
ax.xaxis.set_major_locator(mdates.MonthLocator(interval=4))
ax.xaxis.set_major_formatter(mdates.DateFormatter('%d-%m-%Y'))
plt.gcf().autofmt_xdate() 
plt.show()

In [None]:
tmp_df = dataset_df.groupby(["Year"])["Close"].sum().reset_index()
plt.figure(figsize=(20,5))
sns.lineplot(data=tmp_df, x="Year", y="Close", color="dodgerblue")
plt.title('Close Price USD over Year')
plt.show()

In [None]:
tmp_df = dataset_df.groupby(["Month"])["Close"].sum().reset_index()
plt.figure(figsize=(20,5))
sns.lineplot(data=tmp_df, x="Month", y="Close", color="dodgerblue")
plt.title('Close Price USD over Month')
plt.show()

In [None]:
tmp_df = dataset_df.groupby(["Date"])["Open"].sum().reset_index()
plt.figure(figsize=(20,5))
sns.lineplot(data=tmp_df, x="Date", y="Open", color="dodgerblue")
plt.title('Close Price USD over Date')
plt.show()

In [None]:
tmp_df = dataset_df.groupby(["Date"])["Volume"].sum().reset_index()
plt.figure(figsize=(20,5))
sns.lineplot(data=tmp_df, x="Date", y="Volume", color="dodgerblue")
plt.title('Close Price USD over Date')
plt.show()

In [None]:
tmp_df = dataset_df.groupby(["Date"])["Open","Close", "Volume", "Return"].sum().reset_index()
tmp_df["Volume"] = np.log(tmp_df["Volume"])
tmp_df["Diff"] = tmp_df["Close"] - tmp_df["Open"] 

plt.figure(figsize=(20,5))
sns.lineplot(data=tmp_df, x="Date", y="Close", color="dodgerblue", label="Close")
sns.lineplot(data=tmp_df, x="Date", y="Open", color="orange", label="Open")
sns.lineplot(data=tmp_df, x="Date", y="Diff", color="red", label="Diff")
sns.lineplot(data=tmp_df, x="Date", y="Return", label="Return")
plt.title('Close Price USD over Date')
plt.legend()
plt.show()

In [None]:
sns.heatmap(dataset_df.corr())

In [None]:
sns.pairplot(dataset_df)

In [None]:
dataset_df = dataset_df[["Date", "Close", "Volume", "Year", "Month", "Day"]]

In [None]:
dataset_df.head()

In [None]:
train_df = dataset_df[dataset_df['Date'] <= '2021'][['Close', 'Date']].set_index('Date')
test_df = dataset_df[dataset_df['Date'] > '2021'][['Close', 'Date']].set_index('Date')
plt.plot(train_df)
plt.plot(test_df)
plt.legend(['train', 'test'])

In [None]:
dataset_df.drop(["Date"], axis=1, inplace=True)

In [None]:
TEST_PERCENT = len(test_df)/len(dataset_df)
TEST_PERCENT

In [None]:
from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler(feature_range=(0,1))
dataset = scaler.fit_transform(dataset_df)

In [None]:
dataset

In [None]:
train_len = int(len(dataset) * (1-TEST_PERCENT))
test_len = len(dataset) - train_len

In [None]:
train = dataset[0:train_len]
train

In [None]:
test = dataset[len(train):]
test

In [None]:
def split_sequence(sequence, n_steps_in, n_steps_out):
    data_x, data_y = list(), list()
    for i in range(len(sequence)):
        end_ix = i + n_steps_in
        out_end_ix = end_ix + n_steps_out
  
        if out_end_ix > len(sequence):
            break
        
        seq_x, seq_y = sequence[i:end_ix], sequence[end_ix:out_end_ix]
        data_x.append(seq_x[:,1:])
        data_y.append(seq_y[:,0])
    return np.array(data_x), np.array(data_y)

In [None]:
n_steps_in, n_steps_out = 180, 1
n_features = 4

X_train, y_train = split_sequence(train, n_steps_in, n_steps_out)
X_test, y_test = split_sequence(test, n_steps_in, n_steps_out)

In [None]:
print(X_train.shape , y_train.shape)
print(X_test.shape , y_test.shape)

In [None]:
!pip install livelossplot

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.layers import LSTM 
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import ReduceLROnPlateau, ModelCheckpoint, EarlyStopping
from livelossplot import PlotLossesKerasTF

In [None]:
model_wieght_file = "best_model.hdf5"

lr_reduce = ReduceLROnPlateau(monitor="val_loss", factor=0, min_delta=0.001, patience=1, verbose=1)

checkpoint = ModelCheckpoint(model_wieght_file, monitor="val_loss", verbose=1, save_best_only=True, mode="min")

es = EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=5)

callbacks = [PlotLossesKerasTF(), es, checkpoint]

In [None]:
model = Sequential()

model.add(LSTM(256 , input_shape = (n_steps_in , n_features) , return_sequences=True))
model.add(Dropout(0.4))
model.add(LSTM(256))
model.add(Dropout(0.4))
model.add(Dense(64 ,  activation = 'relu'))
model.add(Dense(1))

model.summary()

In [None]:
model.compile(loss="mean_squared_error", 
              optimizer=Adam(lr=0.0005), 
              metrics=["mean_squared_error"])

In [None]:
EPOCHS = 20
BATCH_SIZE = 16

history = model.fit(X_train, 
                    y_train, 
                    epochs = EPOCHS,
                    batch_size = BATCH_SIZE,
                    callbacks = callbacks, 
                    validation_data = (X_test,y_test))

In [None]:
import math

def model_score(model, X_train, y_train, X_test, y_test):
    train_score = model.evaluate(X_train, y_train, verbose=1)
    print("Train score: %0.5f MSE(%.2f RMSE)" % (train_score[0], math.sqrt(train_score[0])))
    
    test_score = model.evaluate(X_test, y_test, verbose=1)
    print('Test Score: %.5f MSE (%.2f RMSE)' % (test_score[0], math.sqrt(test_score[0])))

In [None]:
model_score(model, X_train, y_train, X_test, y_test)

In [None]:
def invTransform(scaler, data):
    dummy = pd.DataFrame(np.zeros((len(data), scaler.n_features_in_)))
    dummy[0] = data
    dummy = pd.DataFrame(scaler.inverse_transform(dummy), columns=dummy.columns)
    return dummy[0].values

In [None]:
pred = model.predict(X_test)
pred = invTransform(scaler, pred)
pred[:10]

In [None]:
pred_df = test_df.copy()
pred_df[:-n_steps_in]["Close"] = pred

In [None]:
print("Red - Predicted Ethurem Series  ,  Blue - Ethurem Series")
plt.rcParams["figure.figsize"] = (15,7)
plt.plot(test_df["Close"], 'b')
plt.plot(pred_df["Close"] , 'r')
plt.xlabel('Time')
plt.ylabel('Close')
plt.title('Check the accuracy of the model with time')
plt.grid(True)
plt.show()

In [None]:
print(train_df.iloc[-1])
print(test_df.iloc[0])

In [None]:
print(test_df.iloc[0])
print(pred_df.iloc[0])

In [None]:
test_df.head()

In [None]:
pred_df.head()

In [None]:
plt.plot(train_df, 'g')
plt.plot(test_df, 'b')
plt.plot(pred_df , 'r')
plt.legend(['true trained', 'true tested', 'pred'])