# BTC price prediction with Liear Regression based on BTC Halving #
By Tanut Apiwong.

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
from datetime import date
from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

In [None]:
filename = '/kaggle/input/bitcoin-historical-data/bitstampUSD_1-min_data_2012-01-01_to_2020-09-14.csv'
df = pd.read_csv(filename)
df.head()

## Convert timestamp to Datetime ##

In [None]:
df.Timestamp = pd.to_datetime(df.Timestamp, unit='s')

## Set Timestamp as an index of the dataframe ##

In [None]:
df.index = df.Timestamp
df.head()

## Find missing % ##

In [None]:
df.isnull().sum().Open / df.shape[0] * 100

## Resample to daily period and find missing values ##

In [None]:
df_day = df.resample('D').mean()
df_day[df_day.isnull().any(axis=1)]

In [None]:
df_day = df.resample('D').mean()

for i, r in df_day.iterrows():
    if r.isnull().sum() > 0:
        print(i)
        print(r)
        print()

print()
print('Data at 2015-01-05')
print(df_day.loc['2015-01-05'])

print()
print('Data at 2015-01-09')
print(df_day.loc['2015-01-09'])

## Replace missing value with average values between them ##

In [None]:
missing_replacement = df['2015-01-05': '2015-01-09'].mean(numeric_only=True)
df_day.loc['2015-01-06'] = missing_replacement
df_day.loc['2015-01-07'] = missing_replacement
df_day.loc['2015-01-08'] = missing_replacement

# Any missing value?
df_day.isnull().sum()

## Make a dataset of each Bitcoin halving ##

In [None]:
df_halving1 = df_day.loc['2011-12-31':'2012-11-28']
df_halving2 = df_day.loc['2012-11-28':'2016-09-09']
df_halving3 = df_day.loc['2016-09-09':'2020-05-11']
df_test = df_day.loc['2020-05-11':]

Shape of each dataframe

In [None]:
print(
    df_day.shape,
    df_halving1.index.values.shape, 
    df_halving2.index.values.shape, 
    df_halving3.index.values.shape,
    df_test.index.values.shape,
)


## The 1st Bitcoin halving ##

In [None]:
reg_halving1 = LinearRegression()
X = df_halving1.index.values.reshape(-1, 1)
y = df_halving1.Weighted_Price.values.reshape(-1, 1)
reg_halving1.fit(X, y)

plt.figure(figsize=(22, 10))
plt.figure(figsize=(22, 10))
plt.title('Bitcoin Prices Prediction Halving #1')
plt.xlabel('Dates')
plt.ylabel('Prices')
plt.plot(X, y, label='Actual Price')
x_halving_float1 = [X[0].astype('float'), X[-1].astype('float')]
x_halving_date1 = [X[0], X[-1]]
predict_halving1 = reg_halving1.predict(x_halving_float1)
plt.plot(x_halving_date1, predict_halving1, 'red', label='Predicted Price')
plt.legend()
plt.show()
print(reg_halving1.coef_, reg_halving1.intercept_)
print(predict_halving1[0], predict_halving1[1])

## The 2nd Bitcoin halving ##

In [None]:
reg_halving2 = LinearRegression()
X = df_halving2.index.values.reshape(-1, 1)
y = df_halving2.Weighted_Price.values.reshape(-1, 1)
reg_halving2.fit(X, y)

plt.figure(figsize=(22, 10))
plt.figure(figsize=(22, 10))
plt.title('Bitcoin Prices Prediction Halving #2')
plt.xlabel('Dates')
plt.ylabel('Prices')
plt.plot(X, y, label='Actual Price')
x_halving_float2 = [X[0].astype('float'), X[-1].astype('float')]
x_halving_date2 = [X[0], X[-1]]
predict_halving2 = reg_halving2.predict(x_halving_float2)
plt.plot(x_halving_date2, predict_halving2, 'green', label='Predicted Price')
plt.legend()
plt.show()
print(reg_halving2.coef_, reg_halving2.intercept_)
print(predict_halving2[0], predict_halving2[1])

## The 3rd Bitcoin halving ##

In [None]:
reg_halving3 = LinearRegression()
X = df_halving3.index.values.reshape(-1, 1)
y = df_halving3.Weighted_Price.values.reshape(-1, 1)
reg_halving3.fit(X, y)

plt.figure(figsize=(22, 10))
plt.title('Bitcoin Prices Prediction Halving #3')
plt.xlabel('Dates')
plt.ylabel('Prices')
plt.plot(X, y, label='Actual Price')
x_halving_float3 = [X[0].astype('float'), X[-1].astype('float')]
x_halving_date3 = [X[0], X[-1]]
predict_halving3 = reg_halving3.predict(x_halving_float3)
plt.plot(x_halving_date3, predict_halving3, 'blue', label='Predicted Price')
plt.legend()
plt.show()
print([X[0].astype('float'), X[-1].astype('float')])
print(reg_halving3.coef_, reg_halving3.intercept_)
print(predict_halving3[0], predict_halving3[1])

## Find the 4th Bitcoin halving's COEF ##

In [None]:
x = np.array([1, 2, 3]).reshape(-1, 1)
X = np.hstack([x, x**2, x**3, x**4])
y = coef = np.array([
    reg_halving1.coef_[0][0], 
    reg_halving2.coef_[0][0], 
    reg_halving3.coef_[0][0]
])
reg = LinearRegression()
reg.fit(X, y)
x_pred = np.r_[X, [[4, 16, 64, 256]]]
y_pred = reg.predict(x_pred)
plt.plot(np.arange(1, 5), y_pred, 'red', label='Predicted COEF')
plt.scatter(x, y, label='Actual COEF')
plt.legend()
plt.show()
coef_halving_4 = y_pred[-1]
print(coef_halving_4)

## Find the 4th Bitcoin halving's starting price ##

In [None]:
x = np.array([1, 2, 3]).reshape(-1, 1)
X = np.hstack([x, x**2, x**3, x**4])
y = np.array([predict_halving1[0], predict_halving2[0], predict_halving3[0]])
reg = LinearRegression()
reg.fit(X, y)
x_pred = np.r_[X, [[4, 16, 64, 256]]]
y_pred = reg.predict(x_pred)
plt.plot(np.arange(1, 5), y_pred, 'red', label='Predicted Price')
plt.scatter(x, y, label='Actual Price')
plt.legend()
plt.show()
predict_start_halving4 = y_pred[-1]
print(predict_start_halving4)

## Making LR pricing model for the 4th Bitcoin halving ##

In [None]:
X = df_test.index.values.reshape(-1, 1)
y = df_test.Weighted_Price.values.reshape(-1, 1)
plt.figure(figsize=(22, 10))
plt.title('Bitcoin Prices Prediction Halving #4')
plt.xlabel('Dates')
plt.ylabel('Prices')
plt.plot(X, y, label='Actual Price')

reg_halving4 = LinearRegression()
reg_halving4.coef_ = np.array([coef_halving_4])
intercept_halving_4 = predict_start_halving4 - coef_halving_4 * X[0].astype('float')
reg_halving4.intercept_ = np.array([intercept_halving_4])

print(reg_halving4.coef_)
print(reg_halving4.intercept_)

predict_halving4 = reg_halving4.predict(X.astype(float)).reshape(-1, 1)
plt.plot(X, predict_halving4, 'orange', label='Predicted Price Halving #4')
plt.legend()
plt.show()

x_halving_date4 = np.array(X)

mean_squared_error(y, predict_halving4, squared=False), mean_squared_error(y, predict_halving4, squared=True), r2_score(y, predict_halving4)

## MAE: 1063.4555647475888 ##
## RMSE: 1130937.7381926132 ##
## R2: -0.07537844862939291 ##

## Plot all together ##

In [None]:
X = df_day.index.values.reshape(-1, 1)
y = df_day.Weighted_Price.values.reshape(-1, 1)
plt.figure(figsize=(22, 10))
plt.title('Bitcoin Prices Prediction')
plt.xlabel('Dates')
plt.ylabel('Prices')
plt.plot(X, y, label='Actual Price')

plt.plot(x_halving_date1, predict_halving1, 'red', label='Predicted Price Halving #1')
plt.plot(x_halving_date2, predict_halving2, 'green', label='Predicted Price Halving #2')
plt.plot(x_halving_date3, predict_halving3, 'blue', label='Predicted Price Halving #3')
plt.plot(x_halving_date4, predict_halving4, 'orange', label='Predicted Price Halving #4')

plt.plot([date(2020, 5, 10), date(2020, 5, 10)], [0, 20000], 'black')

plt.legend()
plt.show()