In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

<h1><center style="color: navy"> Bitcoin Price Prediction using LSTM </center></h1>
<center><img src = "https://img.etimg.com/photo/84370297.cms" alt="Brain Scan" width=700px></center>

#### What is Bitcoin?

Bitcoins can be used to buy merchandise anonymously. In addition, international payments are easy and cheap because bitcoins are not tied to any country or subject to regulation. Small businesses may like them because there are no credit card fees. Some people just buy bitcoins as an investment, hoping that they’ll go up in value.

#### How is Bitcoin traded?

Many marketplaces called “bitcoin exchanges” allow people to buy or sell bitcoins using different currencies. Coinbase is a leading exchange, along with Bitstamp and Bitfinex. But security can be a concern: bitcoins worth tens of millions of dollars were stolen from Bitfinex when it was hacked in 2016.

People can send bitcoins to each other using mobile apps or their computers. It’s similar to sending cash digitally.

People compete to “mine” bitcoins using computers to solve complex math puzzles. This is how bitcoins are created. Currently, a winner is rewarded with 6.25 bitcoins roughly every 10 minutes.

## Objective
The aim of this kernal is to train an LSTM model to predict the future price of Bitcoin based on past time series data. This kernal will use LSTM model from the Keras Library

## What is LSTM?

LSTM stands for long short-term memory networks, used in the field of Deep Learning. It is a variety of recurrent neural networks (RNNs) that are capable of learning long-term dependencies, especially in sequence prediction problems. LSTM has feedback connections, i.e., it is capable of processing the entire sequence of data, apart from single data points such as images. This finds application in speech recognition, machine translation, etc. LSTM is a special kind of RNN, which shows outstanding performance on a large variety of problems.

<center><img src = "https://miro.medium.com/max/1400/1*goJVQs-p9kgLODFNyhl9zA.gif" alt="LSTM aniamtion" width=700px></center>



## Checking if GPU is avaliable.

In [None]:
import tensorflow as tf
tf.test.is_gpu_available()

In [None]:
device_name = tf.test.gpu_device_name()
if device_name != '/device:GPU:0':
  raise SystemError('GPU device not found')
print('Found GPU at: {}'.format(device_name))

In [None]:
tf.device('/device:GPU:0')

## Libraries Used
<ul>
    <li>Numpy</li>
    <li>Pandas</li>
    <li>Keras</li>
    <li>Matplotlib</li>
    <li>Sklearn</li>
</ul>

In [None]:
import matplotlib.pyplot as plt
import datetime
import matplotlib.dates as mdates
from datetime import datetime, timedelta
import seaborn as sns
from pandas.plotting import register_matplotlib_converters
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from keras.models import Sequential
from keras.layers import Dense, LSTM, Dropout
import keras_tuner as kt

In [None]:
register_matplotlib_converters()
sns.set(font_scale=1.5, style="whitegrid")

## Loading Dataset and Performing EDA



In [None]:
df = pd.read_csv("../input/historical-bitcoin-data/Bitcoin Historical Data - In Rupees.csv")

In [None]:
df["Date"] = df["Date"].replace(",", "", regex=True)
df["Date"] = df["Date"].replace(" ", "-", regex=True)
df["Date"] = df["Date"].replace("Jan", "01", regex=True)
df["Date"] = df["Date"].replace("Feb", "02", regex=True)
df["Date"] = df["Date"].replace("Mar", "03", regex=True)
df["Date"] = df["Date"].replace("Apr", "04", regex=True)
df["Date"] = df["Date"].replace("May", "05", regex=True)
df["Date"] = df["Date"].replace("Jun", "06", regex=True)
df["Date"] = df["Date"].replace("Jul", "07", regex=True)
df["Date"] = df["Date"].replace("Aug", "08", regex=True)
df["Date"] = df["Date"].replace("Sep", "09", regex=True)
df["Date"] = df["Date"].replace("Oct", "10", regex=True)
df["Date"] = df["Date"].replace("Nov", "11", regex=True)
df["Date"] = df["Date"].replace("Dec", "12", regex=True)

In [None]:
df['Date']

In [None]:
df.head()

In [None]:
df["Price"] = df["Price"].replace(",", "", regex=True)
df["Open"] = df["Open"].replace(",", "", regex=True)
df["High"] = df["High"].replace(",", "", regex=True)
df["Low"] = df["Low"].replace(",", "", regex=True)
df["Change %"] = df["Change %"].replace("%", "", regex=True)

In [None]:
def value_to_float(x):
    if type(x) == float or type(x) == int:
        return x
    if 'K' in x:
        if len(x) > 1:
            return float(x.replace('K', '')) * 1000
        return 1000.0
    if 'M' in x:
        if len(x) > 1:
            return float(x.replace('M', '')) * 1000000
        return 1000000.0
    if 'B' in x:
        return float(x.replace('B', '')) * 1000000000
    return 0.0

df['Vol.'] = df['Vol.'].apply(value_to_float)

In [None]:
df['Date'] = pd.to_datetime(df["Date"], format='%m-%d-%Y')

In [None]:
df.head()

In [None]:
df.describe()

In [None]:
df["Price"] = pd.to_numeric(df["Price"], downcast="float")
df["Open"] = pd.to_numeric(df["Open"], downcast="float")
df["High"] = pd.to_numeric(df["High"], downcast="float")
df["Low"] = pd.to_numeric(df["Low"], downcast="float")
df["Vol."] = pd.to_numeric(df["Vol."], downcast="float")
df["Change %"] = pd.to_numeric(df["Change %"], downcast="float")

In [None]:
df.info()

In [None]:
plt.style.use('seaborn')

## Plot of price variation as per the date
The following plot shows how the prices vary day to day in chronological order.

In [None]:
price_date = df['Date']
price_close = df['Price']
plt.plot_date(price_date, price_close, linestyle='solid')
plt.tight_layout()
plt.show()

## Plot of volume variation as per the date
The following plot shows how the volume vary day to day in chronological order.

In [None]:
fig = plt.figure(figsize = (10, 5))
plt.plot(df["Date"], df["Vol."], color ='maroon')
plt.title("Trading Volume Chart")
plt.show()

Now that we've seen the visualizations for the closing price and the volume traded each day, let's go ahead and caculate the moving average for the stock.

## What was the moving average of BitCoin?

The 50-day moving average is calculated by summing up the past 50 data points and then dividing the result by 50, while the 200-day moving average is calculated by summing the past 200 days and dividing the result by 200.

In [None]:
ma_day = [50, 200]

for ma in ma_day:
    column_name = f"MA for {ma} days"
    df[column_name] = df['Price'].rolling(ma).mean()

In [None]:
plt.style.use("fivethirtyeight")
plt.plot(df['Date'],df[['Price', 'MA for 50 days', 'MA for 200 days']])
plt.legend(['Price', 'MA for 50 days', 'MA for 200 days'])

In [None]:
df = df.set_index('Date')

## What was the daily return of the stock on average?

Now that we've done some baseline analysis, let's go ahead and dive a little deeper. We're now going to analyze the risk of Bitcoin investments. In order to do so we'll need to take a closer look at the daily changes of the crypto currency, and not just its absolute value. Let's go ahead and use pandas to retrieve teh daily returns for Botcoin.

In [None]:
df['Daily Return'] = df['Price'].pct_change()
df['Daily Return'].plot(legend=True, linestyle='--', marker='o')

In [None]:
sns.distplot(df['Daily Return'].dropna(), bins=100, color='purple')
plt.ylabel('Daily Return')

## Predicting the closing price of Bitcoin

In [None]:
plt.figure(figsize=(16,6))
plt.title('Close Price History')
plt.plot(df['Price'])
plt.xlabel('Date', fontsize=18)
plt.ylabel('Close Price USD ($)', fontsize=18)
plt.show()

In [None]:
df.drop(["Change %","MA for 50 days","MA for 200 days","Daily Return"],axis=1,inplace=True)

In [None]:
df

In [None]:
df = df[::-1]

In [None]:
X_train0, X_test0, y_train0, y_test0 = train_test_split(df[["Vol.","Open","High","Low"]], df['Price'], test_size=0.05, random_state=42,shuffle=False)

In [None]:
X_train0

In [None]:


scaler = MinMaxScaler()
scaled_data = scaler.fit_transform(df[["Vol.","Open","High","Low"]])


scaler_y = MinMaxScaler()
scaled_data_y = scaler_y.fit_transform(df[["Price"]])

In [None]:
scaled_data

In [None]:
X_train, X_test, y_train, y_test = train_test_split(scaled_data, scaled_data_y, test_size=0.05, random_state=42,shuffle=False)

In [None]:
X_train

In [None]:
print(X_train.shape)

In [None]:
X_train, X_test, y_train, y_test = np.array(X_train),np.array(X_test), np.array(y_train), np.array(y_test)

In [None]:
X_train = np.reshape(X_train, (X_train.shape[0], X_train.shape[1], 1))
X_test = np.reshape(X_test, (X_test.shape[0], X_test.shape[1], 1))
X_train.shape

In [None]:
y_train = np.reshape(y_train, (y_train.shape[0], 1))
y_test = np.reshape(y_test, (y_test.shape[0],1))
y_train.shape

In [None]:
def model_builder(hp):
    model = Sequential()
    model.add(LSTM(hp.Int('input_unit',min_value=32,max_value=512,step=32), return_sequences=True, input_shape= ( X_train.shape[1],X_train.shape[2])))
    for i in range(hp.Int('n_layers', 1, 4)):
        model.add(LSTM(hp.Int(f'lstm_{i}_units',min_value=32,max_value=512,step=32),return_sequences=True))
    model.add(LSTM(hp.Int('layer_2_neurons',min_value=32,max_value=512,step=32)))
    model.add(Dropout(hp.Float('Dropout_rate',min_value=0,max_value=0.5,step=0.1)))
    model.add(Dense(30, activation=hp.Choice('dense_activation',values=['relu', 'sigmoid'],default='relu')))
    model.add(Dense(1, activation=hp.Choice('dense_activation',values=['relu', 'sigmoid'],default='relu')))
   
    model.compile(loss='mean_squared_error', optimizer='adam',metrics = ['mse'])
    
    return model
    
tuner = kt.RandomSearch(model_builder, objective="mse", max_trials = 4, executions_per_trial =2,directory = "./")

tuner.search(x=X_train, y=y_train, epochs = 20, batch_size =64, validation_data=(X_test, y_test))

In [None]:
best_model = tuner.get_best_models(num_models=1)[0]

In [None]:
tuner.results_summary()

In [None]:
y_pred = best_model.predict(X_test)

In [None]:
# from keras.models import Sequential
# from keras.layers import Dense, LSTM

# # Build the LSTM model
# model = Sequential()
# model.add(LSTM(128, return_sequences=True, input_shape= ( X_train.shape[1],X_train.shape[2])))
# model.add(LSTM(64, return_sequences=False))
# model.add(Dense(25))
# model.add(Dense(1))

# # Compile the model
# model.compile(optimizer='adam', loss='mean_squared_error')

# # Train the model
# model.fit(X_train, y_train, batch_size=2, epochs=100, verbose=0)

In [None]:
y_pred.shape

In [None]:
# y_pred=model.predict(X_test)

In [None]:
y_pred = scaler_y.inverse_transform(y_pred)

In [None]:
type(y_test0)

In [None]:
valid = pd.DataFrame(y_test0)
valid['Predicted'] = y_pred

In [None]:
plt.figure(figsize=(16,6))
plt.title('Model')
plt.xlabel('Date', fontsize=18)
plt.ylabel('Close Price USD ($)', fontsize=18)
plt.plot(y_train0)
plt.plot(valid[['Price', 'Predicted']])
plt.legend(['Train', 'Val', 'Predictions'], loc='lower right')
plt.show()

In [None]:
valid