## Using Linear models (from `sklearn.linear_model`) on crypto price trend prediction

In [1]:
# Preliminary code needed for importing from parent directory
import os,sys,inspect
currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
parentdir = os.path.dirname(currentdir)
sys.path.insert(0,parentdir) 

# Import Karan's data API
from data import series

import numpy as np

# This classifier first converts the target values into {-1, 1} and then treats the problem as a regression task
# (multi-output regression in the multiclass case).
from sklearn.linear_model import RidgeClassifier
from sklearn.linear_model import LogisticRegression

from sklearn.model_selection import train_test_split

In [2]:
# Create a series for BTC-USDT pair on 1h candles
btc1h = series.DataSeries('BTCUSDT', '1h')
data = btc1h.getData()
print(data.keys())

price_close = data['close']
print(len(price_close))

dict_keys(['_', 'open_time', 'open', 'high', 'low', 'close', 'volume', 'close_time', 'quote_asset_volume', 'num_trades', 'taker_buy_base_asset_volume', 'taker_buy_quote_asset_volume', 'ignore'])
30673


In [3]:
prev5 = np.concatenate([
        # pivot timeframe
        price_close[np.newaxis, 5:],
        # previous 5 timeframes
        price_close[np.newaxis, 4:-1], # 1 frame ago
        price_close[np.newaxis, 3:-2], # 2 frame ago
        price_close[np.newaxis, 2:-3], # 3 frame ago
        price_close[np.newaxis, 1:-4], # 4 frame ago
        price_close[np.newaxis, :-5],  # 5 frame ago
    ],
    axis = 0
)

# Generate truth values (y)
y = prev5[0, :] > np.amin(prev5[1:, :], axis = 0)
print(y.shape)
print('number of times where trend is up: ', y[y == True].shape)
print('number of times where trend is down: ', y[y == False].shape)

(30668,)
number of times where trend is up:  (24655,)
number of times where trend is down:  (6013,)


In [4]:
btc1h.addIndicator('RSI', data['close'], 30) # 30-timeframe RSI

btc1h.addIndicator('EMA', data['close'], 30) # 30-timeframe EMA
# btc1h.addIndicator('EMA', btc1h.getData()['close'], 50) # 50-timeframe EMA

## MFI: https://www.investopedia.com/terms/m/mfi.asp
btc1h.addIndicator('MFI', data['high'], data['low'], data['close'], data['volume'], 10) # 10-timeframe MFI

## MACD: https://www.investopedia.com/terms/m/macd.asp
btc1h.addIndicator('MACD', data['close'], 12, 26) # fast = 12, slow = 26

indicators = btc1h.getIndicators()
for indicator in indicators.keys():
    print(indicator)

RSI
EMA
MFI
MACD


In [5]:
time_cut = 50

# Each technical indicator consists one column of X.
X = np.concatenate(
    (
        indicators['RSI'][np.newaxis, time_cut:].T,
        indicators['EMA'][np.newaxis, time_cut:].T,
        indicators['MFI'][np.newaxis, time_cut:].T
    ),
    axis = 1
)
# print('shape of X:', X.shape)
# print('shape of y:', y.shape)

y_truncate = y[(time_cut - 5):]

# Split train/test sets
X_train, X_test, y_train, y_test = train_test_split(X, y_truncate)
print('train set size:', y_train.shape)
print('test set size:', y_test.shape)

train set size: (22967,)
test set size: (7656,)


## Ridge Classifier

In [6]:
ridge_clf = RidgeClassifier().fit(X_train, y_train)

print('Accuracy of Ridge Classifier')
print('training set accuracy:', ridge_clf.score(X_train, y_train))
print('test set accuracy:', ridge_clf.score(X_test, y_test))

Accuracy of Ridge Classifier
training set accuracy: 0.8060260373579483
test set accuracy: 0.8047283176593522


## Logistic Regression

In [7]:
logi_clf = LogisticRegression(random_state=0).fit(X_train, y_train)

print('Accuracy of Logistic Regression')
print('training set accuracy:', logi_clf.score(X_train, y_train))
print('test set accuracy:', logi_clf.score(X_test, y_test))

Accuracy of Logistic Regression
training set accuracy: 0.8045891931902295
test set accuracy: 0.8025078369905956


According to the above results from `Ridge Classifier` & `Logistic Regression`, these **linear** models are working quite well on the price data..