<a href="https://colab.research.google.com/github/cepdnaclk/e19-co544-Bitcoin-Cost-Forecast-System/blob/main/Models/RNN_Model.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Import Necessary Libraries

In [1]:
import warnings  # Import the warnings module
warnings.filterwarnings("ignore")  # Ignore all warnings

import numpy as np  # Import numpy for numerical computing
import pandas as pd  # Import pandas for data manipulation
import statsmodels.api as sm  # Import statmodels for statistical models
from scipy import stats  # Import stats from SciPy for statistical functions
from sklearn.metrics import mean_squared_error  # Import mean_squared_error from sklearn for model evaluation
from math import sqrt  # Import sqrt from math for square root function
from random import randint  # Import randint from random for generating random integers

from keras.models import Sequential  # Import Sequential from keras for sequential model
from keras.layers import Dense  # Import Dense from keras for fully connected layers
from keras.layers import LSTM  # Import LSTM from keras for LSTM Layers
from keras.layers import GRU  # Import GRU from keras for GRU layers
from keras.callbacks import EarlyStopping  # Import EarlyStopping from keras for early stopping during model training
from keras import initializers  # Import initializers from keras for initializing model parameters

from matplotlib import pyplot as plt  # Import pyplot from matplotlib for plotting
from datetime import datetime  # Import datetime for date and time operations
from datetime import date # Import the date class from the datetime module

import plotly.offline as py  # Import offline module from plotly for offline plotting
import plotly.graph_objs as go  # Import graph_objs from plotly for creating plots
py.init_notebook_mode(connected=True)  # Initialize plotly notebook mode

# %matplotlib inline  # Magic command to display matplotlib plots inline in Jupyter notebooks


# Import the Dataset

In [2]:
import yfinance as yf  # Import the yfinance module, which allows us to download historical market data from yahoo finance

# Define the ticker symbol for BitCoin
ticker = 'BTC-USD'  # Assign the string 'BTC-USD' to the variable 'ticker'.

# Get historical market data
data = yf.Ticker(ticker).history(period = 'max')

# Average Price

In [3]:
# Group the data by the 'Date' column
group = data.groupby('Date')

# Calculate the product of price and volume
data['PV'] = data['Close'] * data['Volume']

# Calculate the cumulative sum of volume
data['cumulative_volume'] = data['Volume'].cumsum()

# Calculate the cumulative sum of PV
data['cumulative_PV'] = data['PV'].cumsum()

# Calculate VWAP
data['Weighted_Price'] = data['cumulative_PV'] / data['cumulative_volume']

Daily_Price = group['Weighted_Price'].mean()

Daily_Price.head()



Date
2014-09-17 00:00:00+00:00    457.334015
2014-09-18 00:00:00+00:00    436.911062
2014-09-19 00:00:00+00:00    419.823580
2014-09-20 00:00:00+00:00    416.734836
2014-09-21 00:00:00+00:00    413.700159
Name: Weighted_Price, dtype: float64

In [4]:
Daily_Price.tail()

Date
2024-05-17 00:00:00+00:00    29673.378562
2024-05-18 00:00:00+00:00    29683.629636
2024-05-19 00:00:00+00:00    29695.219963
2024-05-20 00:00:00+00:00    29725.323465
2024-05-21 00:00:00+00:00    29760.808104
Name: Weighted_Price, dtype: float64

# Split Data

In [10]:
# Define two dates
d0 = date(2014, 9, 17)  # Start date
d1 = date(2024, 5, 21)  # End date

# Calculate the difference between the two dates
delta = d1 - d0  # This will return a time delta object

# Get the number of days from the timedelta object and add 1
days_look = delta.days + 1  # The '+ 1' is to include both end dates in the count

# Print the result
print(days_look)

# Repeat the process for a different date range
d0 = date(2023, 5, 17)
d1 = date(2024, 5, 21)
delta = d1 - d0
days_from_train = delta.days + 1
print(days_from_train)

# Repeat the process for another date range
'''d0 = date(2024, 5, 17)
d1 = date(2024, 5, 21)
delta = d1 - d0
days_from_end = delta.days + 1
print(days_from_end)'''

# Create the training dataset
# The training data is slice from the 'Daily_Price' series. The slice starts from 'days_look' - 'days_from_end' days before thr last record, and ends 'days_from_train' days before the last record.
df_train = Daily_Price[len(Daily_Price) - days_look : len(Daily_Price) - days_from_train]

# Create the testing dataset
# The testing data is a slice from the 'Daily_Price' series. The slice starts from 'days_from_train' days before the last record and includes all records until the end of the series.
df_test = Daily_Price[len(Daily_Price) - days_from_train :]

# Print the length of the training and testing datasets
print(len(df_train), len(df_test))

3535
371
3164 371


# EDA

In [13]:
# Combine the training and testing datasets into a single DataFrame
working_data = [df_train, df_test]
working_data = pd.concat(working_data)

# Reset the index of the DataFrame
working_data = working_data.reset_index()

# Convert the 'Date' column to datetime format
working_data['Date'] = pd.to_datetime(working_data['Date'])

# Set the 'Date' column as the index of the DataFrame
working_data = working_data.set_index('Date')

# Perform seasonal decomposition on the 'Weighted_Price' values using a frequency of 60 (assuming daily data)
s = sm.tsa.seasonal_decompose(working_data.Weighted_Price.values, period = 60)