In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot  as plt

from sklearn.linear_model import LinearRegression, LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error, classification_report

In [None]:
dataset = pd.read_csv("Data/BTC-USD.csv")
print(dataset.shape)
dataset.head()

In [None]:
dataset.describe()

In [None]:
####### Preprocessing #######

In [None]:
#### Scaling ####
scaled_data = dataset[['Open', 'High', 'Low', 'Close', 'Volume']]
scaler = MinMaxScaler(copy=False)
scaled_data[['Open', 'High', 'Low', 'Close', 'Volume']] = scaler.fit_transform(scaled_data[['Open', 'High', 'Low', 'Close', 'Volume']])
scaled_data

In [None]:
X = scaled_data[['Open', 'High', 'Low', 'Volume']]
y = scaled_data['Close']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42, shuffle=False)
print(X_train.shape)
print(y_train.shape)
print(X_test.shape)
print(y_test.shape)

In [None]:
####### Linear Regression #######

In [None]:
#### Training ####
linReg = LinearRegression()
linReg.fit(X_train, y_train)
print("Coefficients: \n", linReg.coef_)
print("Intercept: \n", linReg.intercept_)

In [None]:
#### Testing ####
y_pred_lin = linReg.predict(X_test)
dfp = pd.DataFrame({'Actual_Price': y_test, 'Predicted_Price': y_pred_lin})
dfp.head()

In [None]:
reg_score = linReg.score(X_test, y_test)
print("Linear Regression Score: ", reg_score)
print("Absolute Squared Error: ", mean_absolute_error(y_test, y_pred_lin))
print("Mean Squared Error: ", mean_squared_error(y_test, y_pred_lin))

In [None]:
y = linReg.coef_[0] * X_test['Open'] + linReg.coef_[1] * X_test['High'] + linReg.coef_[2] * X_test['Low'] + linReg.coef_[3] * X_test['Volume'] + linReg.intercept_

In [None]:
#### Plotting ####
plt.subplots(1, figsize=(15, 15))
plt.plot(np.linspace(0, y_test.size, y_test.size), y_test, color='orange', label='Actual Data')
plt.plot(np.linspace(0, y_test.size, y_test.size), y, label='Best fit line')
plt.grid(color='#000000', linestyle='-', linewidth=0.5)
plt.legend(loc="upper left")

In [None]:
plt.scatter(dfp['Actual_Price'], dfp['Predicted_Price'])

In [None]:
####### Logistic Regression #######

In [None]:
X_log = scaled_data[['Open', 'High', 'Low', 'Volume']]
tmp =  scaled_data.Close.gt(scaled_data.Close.shift())
tmp = np.where(tmp == False, 0, 1)
y_log = tmp
y_log

In [None]:
X_train_log, X_test_log, y_train_log, y_test_log = train_test_split(X_log, y_log, test_size=0.3, random_state=42, shuffle=False)
print(X_train_log.shape)
print(y_train_log.shape)
print(X_test_log.shape)
print(y_test_log.shape)

In [None]:
#### Training ####
logReg = LogisticRegression()
logReg.fit(X_train_log, y_train_log)
y_pred_log = logReg.predict_proba(X_test_log)
y_pred_log

In [None]:
dfp = pd.DataFrame({'Actual_Price': y_test, 'Predicted_Price': y_pred_lin})
dfp

In [None]:
#### Testing ####
print("Coefficients: \n", logReg.coef_)
print("Mean squared error: ", mean_squared_error(y_test_log, y_pred_log))
print("Classification report: ", classification_report(y_test_log, y_pred_log))

In [None]:
scaled_data['Close']

In [None]:
y_test_log.size

In [None]:
#### Plotting ####
plt.subplots(1, figsize=(15, 15))
plt.scatter(np.linspace(0, 549, 549), y_test_log, color='red', label='Actual Data')
plt.scatter(np.linspace(0, 549, 549), y_pred_log, label='Predicted Data')
plt.grid(color='#000000', linestyle='-', linewidth=0.5)
plt.legend(loc="upper left")