In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import recall_score, precision_score, f1_score, accuracy_score
from sklearn.svm import SVC
from sklearn import svm
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier, AdaBoostClassifier 
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB, MultinomialNB
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import ShuffleSplit
from sklearn.pipeline import make_pipeline
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix , classification_report
import time


In [2]:
df = pd.read_csv('data/BTC.csv')
df

Unnamed: 0,timestamp,open,high,low,close,volume
0,2018-01-01 00:00:00+00:00,13850.49,13921.53,12877.67,13444.88,1.057522e+09
1,2018-01-02 00:00:00+00:00,13444.88,15306.13,12934.16,14754.13,1.956783e+09
2,2018-01-03 00:00:00+00:00,14754.09,15435.01,14579.71,15156.62,1.604207e+09
3,2018-01-04 00:00:00+00:00,15156.49,15408.66,14244.67,15180.08,1.656715e+09
4,2018-01-05 00:00:00+00:00,15180.08,17126.95,14832.36,16954.78,2.283989e+09
...,...,...,...,...,...,...
1915,2023-03-31 00:00:00+00:00,28037.46,28650.47,27541.23,28477.29,1.436740e+09
1916,2023-04-01 00:00:00+00:00,28477.29,28810.95,28265.42,28465.30,5.632994e+08
1917,2023-04-02 00:00:00+00:00,28465.30,28538.36,27880.95,28186.76,5.925811e+08
1918,2023-04-03 00:00:00+00:00,28186.76,28494.64,27290.26,27810.08,1.467725e+09


In [3]:
df['pct_change'] = df['close'].pct_change(periods=3).mul(100)
df['target'] = 0
df.loc[df['pct_change'] > 2, 'target'] = 1
df.loc[df['pct_change'] < -2, 'target'] = -1
df['target'].value_counts()
df.drop(['pct_change'], axis = 1, inplace = True)
# df.drop(['close'], axis=1, inplace=True)
df.fillna(0, inplace=True)

In [4]:
df['target'].value_counts()

 1    671
 0    643
-1    606
Name: target, dtype: int64

In [5]:
result_df = pd.DataFrame(index=df.index)
result_df['timestamp'] = df['timestamp']

result_df['close'] = df['close']
result_df['target'] = df['target']
result_df.fillna(0, inplace=True)

result_df.head()

Unnamed: 0,timestamp,close,target
0,2018-01-01 00:00:00+00:00,13444.88,0
1,2018-01-02 00:00:00+00:00,14754.13,0
2,2018-01-03 00:00:00+00:00,15156.62,0
3,2018-01-04 00:00:00+00:00,15180.08,1
4,2018-01-05 00:00:00+00:00,16954.78,1


In [6]:
result_df['timestamp'] = pd.to_datetime(result_df['timestamp']).dt.date
result_df

Unnamed: 0,timestamp,close,target
0,2018-01-01,13444.88,0
1,2018-01-02,14754.13,0
2,2018-01-03,15156.62,0
3,2018-01-04,15180.08,1
4,2018-01-05,16954.78,1
...,...,...,...
1915,2023-03-31,28477.29,1
1916,2023-04-01,28465.30,0
1917,2023-04-02,28186.76,0
1918,2023-04-03,27810.08,-1


In [7]:
opt_params = pd.DataFrame(index=['chaikin_oscillator','macd_strategy','williams_r','mfi','stockhastic_oscillator','keltner_channels','cci'],
                          columns=['short','long','slow','fast','period','high','low','oversold','overbought','n','m'])
opt_params

Unnamed: 0,short,long,slow,fast,period,high,low,oversold,overbought,n,m
chaikin_oscillator,,,,,,,,,,,
macd_strategy,,,,,,,,,,,
williams_r,,,,,,,,,,,
mfi,,,,,,,,,,,
stockhastic_oscillator,,,,,,,,,,,
keltner_channels,,,,,,,,,,,
cci,,,,,,,,,,,


In [8]:
def chaikin_oscillator(df: pd.DataFrame, result_df: pd.DataFrame, short: int = 3, long: int = 10) -> pd.DataFrame:
    # calculate the Chaikin Oscillator
    adl = ((2 * df['close'] - df['high'] - df['low']) / (df['high'] - df['low'])) * df['volume']
    adl_3_ema = adl.ewm(span=short).mean()
    adl_10_ema = adl.ewm(span=long).mean()
    chaikin_oscillator = adl_3_ema - adl_10_ema
    # Trading signals -> 1 indicates Buy, 0 indicates Hold and -1 indicates Sell
    result_df['Chaikin_oscillator'] = chaikin_oscillator
    return result_df
def data_preprocess(df, indicator_name):
    df.fillna(0, inplace=True)
    X = df[indicator_name]
    y = df['target']
    X = np.reshape(X.values, (-1, 1))
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=15, stratify=None, shuffle= False)
    return X_train, X_test, y_train, y_test
def run_ml(X_train, X_test, y_train, y_test, results):
    model = RandomForestClassifier(max_depth= 7, max_features= 'sqrt',n_estimators= 50, n_jobs= -1,
                                   min_samples_leaf= 2, min_samples_split= 10,
                                   random_state= 42)
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    results['Accuracy'] = accuracy
    return results

res = pd.DataFrame()
results = pd.DataFrame({'Accuracy'})
start = time.time()
for short in range(1,20):
    for long in range(short,40):
        result_df = chaikin_oscillator(df, result_df, short, long)
        X_train, X_test, y_train, y_test = data_preprocess(result_df, 'Chaikin_oscillator')
        results = run_ml(X_train, X_test, y_train, y_test, results)
        r = pd.DataFrame({'Short': short, 'Long': long, 'Accuracy': results['Accuracy']}) # type: ignore
        res = pd.concat([r, res])
end = time.time()
print(f"Total Time: {end - start}")

Total Time: 174.48530554771423


In [9]:
res.sort_values(by='Accuracy', ascending=False).head(0)

Unnamed: 0,Short,Long,Accuracy


In [10]:
res.sort_values(by='Accuracy', ascending=False).head(1).Short.values[0]

6

In [11]:
opt_params.loc['chaikin_oscillator', 'short'] = res.sort_values(by='Accuracy', ascending=False).head(1).Short.values[0]
opt_params.loc['chaikin_oscillator', 'long'] = res.sort_values(by='Accuracy', ascending=False).head(1).Long.values[0]
opt_params

Unnamed: 0,short,long,slow,fast,period,high,low,oversold,overbought,n,m
chaikin_oscillator,6.0,39.0,,,,,,,,,
macd_strategy,,,,,,,,,,,
williams_r,,,,,,,,,,,
mfi,,,,,,,,,,,
stockhastic_oscillator,,,,,,,,,,,
keltner_channels,,,,,,,,,,,
cci,,,,,,,,,,,


In [12]:
def macd_strategy(data,result_df, fast_period=12, slow_period=26, signal_period=9):

    ema_fast = df['close'].ewm(span=fast_period, adjust=False).mean()
    ema_slow = df['close'].ewm(span=slow_period, adjust=False).mean()
    macd = ema_fast - ema_slow
    signal = macd.ewm(span=signal_period, adjust=False).mean()
    # Generate trading signals based on the MACD and signal line
    result_df['MACD'] = 0
    result_df['MACD'] = np.where(macd > signal, 1, 0)
    result_df['MACD'] = np.where(macd < signal, -1, result_df['MACD'])
    return result_df
result_df = macd_strategy(df, result_df)

res = pd.DataFrame()
results = pd.DataFrame({'Accuracy'})
start = time.time()
for slow_period in range(1,10,2):
    for fast_period in range(slow_period, 50, 2):
        for signal_period in range(slow_period, 50, 2):
            result_df = macd_strategy(df, result_df, fast_period, slow_period, signal_period)
            X_train, X_test, y_train, y_test = data_preprocess(result_df, 'MACD')
            results = run_ml(X_train, X_test, y_train, y_test, results)
            r = pd.DataFrame({'signal_period': signal_period, 'slow_period': slow_period, 'fast_period':fast_period, 'Accuracy': results['Accuracy']}) # type: ignore
            res = pd.concat([r, res])
end = time.time()
print(f"Total Time: {end - start}")

Total Time: 393.8867335319519


In [13]:
res.sort_values(by='Accuracy', ascending=False).head()

Unnamed: 0,signal_period,slow_period,fast_period,Accuracy
0,3,3,47,0.581597
0,3,3,41,0.579861
0,7,3,5,0.579861
0,5,3,7,0.579861
0,3,3,49,0.579861


In [14]:
opt_params.loc['macd_strategy', 'slow'] = res.sort_values(by='Accuracy', ascending=False).head(1).slow_period.values[0]
opt_params.loc['macd_strategy', 'fast'] = res.sort_values(by='Accuracy', ascending=False).head(1).fast_period.values[0]
opt_params.loc['macd_strategy', 'period'] = res.sort_values(by='Accuracy', ascending=False).head(1).signal_period.values[0]
opt_params

Unnamed: 0,short,long,slow,fast,period,high,low,oversold,overbought,n,m
chaikin_oscillator,6.0,39.0,,,,,,,,,
macd_strategy,,,3.0,47.0,3.0,,,,,,
williams_r,,,,,,,,,,,
mfi,,,,,,,,,,,
stockhastic_oscillator,,,,,,,,,,,
keltner_channels,,,,,,,,,,,
cci,,,,,,,,,,,


In [15]:
def williams_r(df: pd.DataFrame,result_df, period: int = 14, high:int = -80, low:int = -20) -> pd.DataFrame:
    highest_high = df['high'].rolling(window = period).max()
    lowest_low = df['low'].rolling(window = period).min()
    wpr = -100 * (highest_high - df['close']) / (highest_high - lowest_low)
    result_df['williams_percentage'] = np.where(wpr < high, 1, 0)
    result_df['williams_percentage'] = np.where(wpr > low, -1, result_df['williams_percentage'])
    return result_df


res = pd.DataFrame()
results = pd.DataFrame({'Accuracy'})
start = time.time()
for high in range(-100,-60,2):
    for low in range(-40,-10, 2):
        for period in range(1,16):
            result_df = williams_r(df, result_df, period, high, low)
            X_train, X_test, y_train, y_test = data_preprocess(result_df, 'williams_percentage')
            results = run_ml(X_train, X_test, y_train, y_test, results)
            r = pd.DataFrame({'High': high, 'Low': low, 'Period':period, 'Accuracy': results['Accuracy']}) # type: ignore
            res = pd.concat([r, res])
end = time.time()
print(f"Total Time: {end - start}")

Total Time: 744.4945123195648


In [16]:
res.sort_values(by='Accuracy', ascending=False).head(3)

Unnamed: 0,High,Low,Period,Accuracy
0,-62,-32,4,0.71875
0,-70,-32,4,0.717014
0,-72,-28,5,0.715278


In [17]:
res.sort_values(by='Accuracy', ascending=False).tail(3)

Unnamed: 0,High,Low,Period,Accuracy
0,-92,-14,1,0.328125
0,-94,-12,1,0.310764
0,-96,-12,1,0.302083


In [18]:
opt_params.loc['williams_r', 'high'] = res.sort_values(by='Accuracy', ascending=False).head(1).High.values[0]
opt_params.loc['williams_r', 'low'] = res.sort_values(by='Accuracy', ascending=False).head(1).Low.values[0]
opt_params.loc['williams_r', 'period'] = res.sort_values(by='Accuracy', ascending=False).head(1).Period.values[0]
opt_params

Unnamed: 0,short,long,slow,fast,period,high,low,oversold,overbought,n,m
chaikin_oscillator,6.0,39.0,,,,,,,,,
macd_strategy,,,3.0,47.0,3.0,,,,,,
williams_r,,,,,4.0,-62.0,-32.0,,,,
mfi,,,,,,,,,,,
stockhastic_oscillator,,,,,,,,,,,
keltner_channels,,,,,,,,,,,
cci,,,,,,,,,,,


In [19]:
def money_flow_index(df: pd.DataFrame, result_df: pd.DataFrame, period: int = 14) -> pd.DataFrame:

    typical_price = (df['high'] + df['low'] + df['close']) / 3
    raw_money_flow = typical_price * df['volume']
    positive_flow = np.where(typical_price > typical_price.shift(1), raw_money_flow, 0)
    negative_flow = np.where(typical_price < typical_price.shift(1), raw_money_flow, 0)
    positive_mf = pd.Series(positive_flow).rolling(period).sum()
    negative_mf = pd.Series(negative_flow).rolling(period).sum()
    mfi = 100 - (100 / (1 + (positive_mf / negative_mf)))
    mfi_signal = pd.Series(mfi).rolling(period).mean()
    result_df['MFI'] = np.where(mfi > mfi_signal, 1, 0)
    result_df['MFI'] = np.where(mfi < mfi_signal, -1, result_df['MFI'])
    return result_df

res = pd.DataFrame()
results = pd.DataFrame({'Accuracy'})
start = time.time()
for period in range(1,31):
    result_df = money_flow_index(df, result_df, period)
    X_train, X_test, y_train, y_test = data_preprocess(result_df, 'MFI')
    results = run_ml(X_train, X_test, y_train, y_test, results)
    r = pd.DataFrame({'Period':period, 'Accuracy': results['Accuracy']}) # type: ignore
    res = pd.concat([r, res])
end = time.time()
print(f"Total Time: {end - start}")

Total Time: 9.586329936981201


In [20]:
res.sort_values(by='Accuracy', ascending=False).head(3)

Unnamed: 0,Period,Accuracy
0,5,0.510417
0,4,0.501736
0,6,0.496528


In [21]:
res.sort_values(by='Accuracy', ascending=False).tail(3)

Unnamed: 0,Period,Accuracy
0,24,0.385417
0,2,0.380208
0,1,0.293403


In [22]:
opt_params.loc['mfi', 'period'] = res.sort_values(by='Accuracy', ascending=False).head(1).Period.values[0]

In [23]:
def stochastic_oscillator_strategy(df, result_df, period=14, oversold=20, overbought=80):
    highs = df['high']
    lows = df['low']
    closes = df['close']
    stochastic_oscillator = 100 * ((closes - lows.rolling(period).min()) / (highs.rolling(period).max() - lows.rolling(period).min()))
    result_df['Stochastic_oscillator'] = 0
    result_df['Stochastic_oscillator'] = np.where(stochastic_oscillator < oversold, 1, 0)
    result_df['Stochastic_oscillator'] = np.where(stochastic_oscillator > overbought, -1, result_df['Stochastic_oscillator'])
    return result_df
res = pd.DataFrame()
results = pd.DataFrame({'Accuracy'})

start = time.time()
for period in range(1,31,2):
    for oversold in range(1,40,5):
        for overbought in range(60,100,5):
            result_df = stochastic_oscillator_strategy(df, result_df, period, oversold, overbought)
            X_train, X_test, y_train, y_test = data_preprocess(result_df, 'Stochastic_oscillator')
            results = run_ml(X_train, X_test, y_train, y_test, results)
            r = pd.DataFrame({'Oversold': oversold, 'Overbought': overbought, 'Period':period, 'Accuracy': results['Accuracy']}) # type: ignore
            res = pd.concat([r, res])
            
end = time.time()
print(f"Total Time: {end - start}")

Total Time: 191.0252857208252


In [24]:
res.sort_values(by='Accuracy', ascending=False).head(3)

Unnamed: 0,Oversold,Overbought,Period,Accuracy
0,31,75,5,0.699653
0,26,75,5,0.699653
0,36,75,5,0.697917


In [25]:
res.sort_values(by='Accuracy', ascending=False).tail(3)

Unnamed: 0,Oversold,Overbought,Period,Accuracy
0,1,95,3,0.293403
0,1,95,7,0.293403
0,1,95,5,0.293403


In [26]:
opt_params.loc['stockhastic_oscillator', 'period'] = res.sort_values(by='Accuracy', ascending=False).head(1).Period.values[0]
opt_params.loc['stockhastic_oscillator', 'oversold'] = res.sort_values(by='Accuracy', ascending=False).head(1).Oversold.values[0]
opt_params.loc['stockhastic_oscillator', 'overbought'] = res.sort_values(by='Accuracy', ascending=False).head(1).Overbought.values[0]
opt_params

Unnamed: 0,short,long,slow,fast,period,high,low,oversold,overbought,n,m
chaikin_oscillator,6.0,39.0,,,,,,,,,
macd_strategy,,,3.0,47.0,3.0,,,,,,
williams_r,,,,,4.0,-62.0,-32.0,,,,
mfi,,,,,5.0,,,,,,
stockhastic_oscillator,,,,,5.0,,,31.0,75.0,,
keltner_channels,,,,,,,,,,,
cci,,,,,,,,,,,


In [27]:
def keltner_channels(df: pd.DataFrame, result_df: pd.DataFrame, n: int = 10, m: int = 1) -> pd.DataFrame:
    middle_line = df['close'].ewm(span=n).mean()
    tr1 = df['high'] - df['low']
    tr2 = abs(df['high'] - df['close'].shift())
    tr3 = abs(df['low'] - df['close'].shift())
    true_range = pd.DataFrame({'tr1': tr1, 'tr2': tr2, 'tr3': tr3}).max(axis=1)
    upper_band = middle_line + m * true_range.ewm(span=n).mean()
    lower_band = middle_line - m * true_range.ewm(span=n).mean()
    # Generate signals
    result_df['Keltner_channels'] = 0
    result_df.loc[(df['close'] > upper_band), 'Keltner_channels'] = -1  # Sell signal
    result_df.loc[(df['close'] < lower_band), 'Keltner_channels'] = 1  # Buy signal
    return result_df

res = pd.DataFrame()
results = pd.DataFrame({'Accuracy'})
start = time.time()
m_rng = np.arange(1,5,0.1)
for n in range(1,20):
    for m in m_rng:
        result_df = keltner_channels(df, result_df, n, m)
        X_train, X_test, y_train, y_test = data_preprocess(result_df, 'Keltner_channels')
        results = run_ml(X_train, X_test, y_train, y_test, results)
        r = pd.DataFrame({'n': n, 'm': m, 'Accuracy': results['Accuracy']}) # type: ignore
        res = pd.concat([r, res])
end = time.time()
print(f"Total Time: {end - start}")

Total Time: 110.00204253196716


In [28]:
res.sort_values(by='Accuracy', ascending=False).head(3)

Unnamed: 0,n,m,Accuracy
0,13,1.0,0.585069
0,12,1.0,0.583333
0,11,1.0,0.583333


In [29]:
res.sort_values(by='Accuracy', ascending=False).tail(3)

Unnamed: 0,n,m,Accuracy
0,10,4.0,0.293403
0,10,4.1,0.293403
0,1,1.0,0.293403


In [30]:
opt_params.loc['keltner_channels', 'n'] = res.sort_values(by='Accuracy', ascending=False).head(1).n.values[0]
opt_params.loc['keltner_channels', 'm'] = res.sort_values(by='Accuracy', ascending=False).head(1).m.values[0]
opt_params

Unnamed: 0,short,long,slow,fast,period,high,low,oversold,overbought,n,m
chaikin_oscillator,6.0,39.0,,,,,,,,,
macd_strategy,,,3.0,47.0,3.0,,,,,,
williams_r,,,,,4.0,-62.0,-32.0,,,,
mfi,,,,,5.0,,,,,,
stockhastic_oscillator,,,,,5.0,,,31.0,75.0,,
keltner_channels,,,,,,,,,,13.0,1.0
cci,,,,,,,,,,,


In [31]:
def commodity_channel_index(df: pd.DataFrame, result_df: pd.DataFrame, period: int = 20) -> pd.DataFrame:
    typical_price = (df['low'] + df['high'] + df['close']) / 3
    moving_average = typical_price.rolling(window=period).mean()
    mean_deviation = typical_price.rolling(window=period).apply(lambda x: np.fabs(x - x.mean()).mean())
    cci = (typical_price - moving_average) / (0.015 * mean_deviation)

    result_df['CCI'] = np.where(cci > 100, 1, 0)
    result_df['CCI'] = np.where(cci < -100, -1, result_df['CCI'])
    return result_df
res = pd.DataFrame()
results = pd.DataFrame({'Accuracy'})
start = time.time()
for period in range(1, 50,2):
    result_df = commodity_channel_index(df, result_df, period)
    X_train, X_test, y_train, y_test = data_preprocess(result_df, 'CCI')
    results = run_ml(X_train, X_test, y_train, y_test, results)
    r = pd.DataFrame({'Period':period, 'Accuracy': results['Accuracy']}) # type: ignore
    res = pd.concat([r, res])
end = time.time()
print(f"Total Time: {end - start}")

Total Time: 21.46188712120056


In [32]:
res.sort_values(by='Accuracy', ascending=False).head(3)

Unnamed: 0,Period,Accuracy
0,9,0.644097
0,11,0.635417
0,7,0.630208


In [33]:
opt_params.loc['cci', 'period'] = res.sort_values(by='Accuracy', ascending=False).head(1).Period.values[0]
opt_params

Unnamed: 0,short,long,slow,fast,period,high,low,oversold,overbought,n,m
chaikin_oscillator,6.0,39.0,,,,,,,,,
macd_strategy,,,3.0,47.0,3.0,,,,,,
williams_r,,,,,4.0,-62.0,-32.0,,,,
mfi,,,,,5.0,,,,,,
stockhastic_oscillator,,,,,5.0,,,31.0,75.0,,
keltner_channels,,,,,,,,,,13.0,1.0
cci,,,,,9.0,,,,,,


In [34]:
opt_params.to_csv('opt_params.csv')