In [2]:
# Initial Imports
import pandas as pd
import numpy as np
from pathlib import Path
from dotenv import load_dotenv
import time
from joblib import dump,load # Save Models
from numpy import random
import os
from datetime import date, datetime, timedelta
import matplotlib.pyplot as plt

In [4]:
btcusd_6h_historical_csv = Path('df_candles_kraken_btcusd_6h_append_9102.csv')
btcusd_6h_historical = pd.read_csv(btcusd_6h_historical_csv,index_col="time")
btcusd_6h_historical
# make sure rows are every 6hrs, if there is no row-then make one and forward fill data (shrimpy doesn't print candle if there is no tick)

Unnamed: 0_level_0,close
time,Unnamed: 1_level_1
2013-10-06 18:00:00+00:00,122.00
2013-10-07 18:00:00+00:00,123.61
2013-10-08 00:00:00+00:00,124.18
2013-10-09 06:00:00+00:00,123.84
2013-10-10 18:00:00+00:00,125.86
...,...
2020-09-24 00:00:00+00:00,10286.70
2020-09-24 06:00:00+00:00,10410.80
2020-09-24 12:00:00+00:00,10676.40
2020-09-24 18:00:00+00:00,10740.20


In [5]:
# make sure rows are every 6hrs, if there is no row-then make one and forward fill data (shrimpy doesn't print candle if there is no tick)
def calc_feature_dataframe(prices_df):
    # ----------------------- Price Dynamics --------------------------------
    # x(t) = log(S(t))   where S(t) is the price of the instrument
    # ------------ Assumption: Returns of financial instruments are lognormally distributed
    # v(t) = R(t) = dx(t)/dt where v(t) is the velocity of the instrument in the log price space, x(t)
    
    ## cumulative returns as velocity
    ## Log returns as velocity
    ## partials?
    ## Lags?
    ## Technical Indicators
    
    df_features = prices_df
    df_features['returns'] = df_features['close'].pct_change()
    df_features['price_velocity_2'] = df_features['close'].pct_change(2)
    df_features['price_velocity_3'] = df_features['close'].pct_change(3)
    df_features['price_velocity_4'] = df_features['close'].pct_change(4)
    df_features['price_velocity_7'] = df_features['close'].pct_change(7)
    df_features['price_velocity_30'] = df_features['close'].pct_change(30)
    
    df_features['price_acceleration_1'] = df_features['returns'].pct_change(1)
    df_features['price_acceleration_2'] = df_features['price_velocity_2'].pct_change(2)
    df_features['price_acceleration_3'] = df_features['price_velocity_3'].pct_change(3)
    df_features['price_acceleration_4'] = df_features['price_velocity_4'].pct_change(4)
    df_features['price_acceleration_7'] = df_features['price_velocity_7'].pct_change(7)
    df_features['price_acceleration_30'] = df_features['price_velocity_30'].pct_change(30)

    df_features['rolling_mean_velocity_2'] = df_features['returns'].rolling(window=2).mean()
    df_features['rolling_mean_velocity_3'] = df_features['returns'].rolling(window=3).mean()
    df_features['rolling_mean_velocity_4'] = df_features['returns'].rolling(window=4).mean()
    df_features['rolling_mean_velocity_7'] = df_features['returns'].rolling(window=7).mean()
    df_features['rolling_mean_velocity_30'] = df_features['returns'].rolling(window=30).mean()
    
    df_features['rolling_mean_acceleration_2'] = df_features['price_acceleration_1'].rolling(window=2).mean()
    df_features['rolling_mean_acceleration_3'] = df_features['price_acceleration_1'].rolling(window=3).mean()
    df_features['rolling_mean_acceleration_4'] = df_features['price_acceleration_1'].rolling(window=4).mean()
    df_features['rolling_mean_acceleration_7'] = df_features['price_acceleration_1'].rolling(window=7).mean()
    df_features['rolling_mean_acceleration_30'] = df_features['price_acceleration_1'].rolling(window=30).mean()

    
    df_features.dropna(inplace=True)
    return df_features

In [6]:
df_features = calc_feature_dataframe(btcusd_6h_historical)

In [7]:
df_features.isna().sum()

close                           0
returns                         0
price_velocity_2                0
price_velocity_3                0
price_velocity_4                0
price_velocity_7                0
price_velocity_30               0
price_acceleration_1            0
price_acceleration_2            0
price_acceleration_3            0
price_acceleration_4            0
price_acceleration_7            0
price_acceleration_30           0
rolling_mean_velocity_2         0
rolling_mean_velocity_3         0
rolling_mean_velocity_4         0
rolling_mean_velocity_7         0
rolling_mean_velocity_30        0
rolling_mean_acceleration_2     0
rolling_mean_acceleration_3     0
rolling_mean_acceleration_4     0
rolling_mean_acceleration_7     0
rolling_mean_acceleration_30    0
dtype: int64

In [8]:
X = df_features.copy()
X.drop(["close", "returns"], axis=1, inplace=True)
y = df_features["returns"].values.reshape(-1, 1)

In [9]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=78)
scaler = MinMaxScaler()
X_scaler = scaler.fit(X_train)
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)
# Fit the MinMaxScaler object with the target data Y
scaler.fit(y)

# Scale the target training and testing sets
y_train_scaled = scaler.transform(y_train)
y_test_scaled = scaler.transform(y_test)

## ML Models: Gradient Boost - Baseline Establishment & Feature Evaluation

In [54]:
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.model_selection import GridSearchCV

# How did we choose estimators, learning_rates, depths, subsamples, random_states, n__jobs...?
GBR=GradientBoostingRegressor()
search_grid={'n_estimators':[500,1000,2000],'learning_rate':[.001,0.01,.1],'max_depth':[1,2,4],'subsample':[.5,.75,1],'random_state':[1]}
search=GridSearchCV(estimator=GBR,param_grid=search_grid,scoring='neg_mean_squared_error',n_jobs=1)

In [55]:
search.fit(X_train_scaled,y_train_scaled)
search.best_params_

  return f(**kwargs)
  return f(**kwargs)
  return f(**kwargs)
  return f(**kwargs)
  return f(**kwargs)
  return f(**kwargs)
  return f(**kwargs)
  return f(**kwargs)
  return f(**kwargs)
  return f(**kwargs)
  return f(**kwargs)
  return f(**kwargs)
  return f(**kwargs)
  return f(**kwargs)
  return f(**kwargs)
  return f(**kwargs)
  return f(**kwargs)
  return f(**kwargs)
  return f(**kwargs)
  return f(**kwargs)
  return f(**kwargs)
  return f(**kwargs)
  return f(**kwargs)
  return f(**kwargs)
  return f(**kwargs)
  return f(**kwargs)
  return f(**kwargs)
  return f(**kwargs)
  return f(**kwargs)
  return f(**kwargs)
  return f(**kwargs)
  return f(**kwargs)
  return f(**kwargs)
  return f(**kwargs)
  return f(**kwargs)
  return f(**kwargs)
  return f(**kwargs)
  return f(**kwargs)
  return f(**kwargs)
  return f(**kwargs)
  return f(**kwargs)
  return f(**kwargs)
  return f(**kwargs)
  return f(**kwargs)
  return f(**kwargs)
  return f(**kwargs)
  return f(**kwargs)
  return f(**

{'learning_rate': 0.01,
 'max_depth': 4,
 'n_estimators': 2000,
 'random_state': 1,
 'subsample': 0.5}

In [None]:
# Best Parameters
{'learning_rate': 0.01,
 'max_depth': 4,
 'n_estimators': 2000,
 'random_state': 1,
 'subsample': 0.5}

In [10]:
score_gbr2

NameError: name 'score_gbr2' is not defined

In [None]:
GBR2.fit(X_trained_scaled, y_train_scaled)

In [None]:
# WHAT MATH IS GOING ON HERE? Linear Algebra
predictions = GBR2.predict(X_test_scaled)
predicted_returns = scaler.inverse_transform(predictions.reshape(-1,1))
real_returns = scaler.inverse_transform(y_test_scaled.reshape(-1, 1))

In [None]:
returns_gbr = pd.DataFrame({
    "Real": real_returns.ravel(),
    "y_test": y_test.ravel(),
    "Predicted": predicted_returns.ravel()
})
returns_gbr.head(5)


In [None]:
returns_gbr_final = returns_gbr[['Predicted', 'Real']]
returns_gbr_final.plot(figsize=[10,10])

## GBR Forecast

In [None]:
returns_gbr_final['forecast_class_predicted'] = pd.cut(returns_gbr.Predicted,
                     bins=[returns_gbr['Predicted'].min(), -.05, 0, .05, returns_gbr['Predicted'].max()],
                     labels=["very bear", "bear", "bull", "very bull"])
returns_gbr_final['forecast_class_real'] = pd.cut(returns_gbr.Real,
                     bins=[returns_gbr['Real'].min(), -.05, 0, .05, returns_gbr['Real'].max()],
                     labels=["very bear", "bear", "bull", "very bull"])
returns_gbr_final['Accuracy_Test'] = np.where(returns_gbr_final['forecast_class_predicted'] == returns_gbr_final['forecast_class_real'], True, False)
returns_gbr_final.head()

In [None]:
returns_gbr_final['Accuracy_Test'].value_counts()

In [None]:
accuracy = 198/(198+34)
print(accuracy)