In [1]:
import pandas as pd
import numpy as np

# Loading dataset

In [2]:
data=pd.read_csv(r"C:\Users\RatanBiswakarmakar\Downloads\binance.csv")

In [3]:
data

Unnamed: 0,datetime,symbol,open,high,low,close,volume
0,2024-09-23 06:20:00,BINANCE:BTCUSD,63523.59,63528.68,63523.59,63528.68,0.00173
1,2024-09-23 06:21:00,BINANCE:BTCUSD,63527.02,63527.02,63519.63,63519.63,0.02417
2,2024-09-23 06:22:00,BINANCE:BTCUSD,63495.11,63763.61,63495.11,63763.61,0.46918
3,2024-09-23 06:23:00,BINANCE:BTCUSD,63767.96,63767.96,63728.52,63729.17,0.21281
4,2024-09-23 06:24:00,BINANCE:BTCUSD,63726.34,63726.34,63681.37,63682.41,0.40335
...,...,...,...,...,...,...,...
9995,2024-09-30 13:57:00,BINANCE:BTCUSD,64337.20,64338.08,64318.21,64318.22,0.06156
9996,2024-09-30 13:58:00,BINANCE:BTCUSD,64281.16,64314.90,64259.71,64295.77,0.36236
9997,2024-09-30 13:59:00,BINANCE:BTCUSD,64294.68,64322.99,64283.54,64316.09,0.13002
9998,2024-09-30 14:00:00,BINANCE:BTCUSD,64297.19,64313.30,64267.61,64312.29,0.09853


In [4]:
#data = df.copy()

# Feature engineering

In [5]:
def calculate_obv(data):
    obv = [0]

    for i in range(1, len(data)):
        if data['close'][i] > data['close'][i-1]:  
            obv.append(obv[-1] + data['volume'][i])
        elif data['close'][i] < data['close'][i-1]:  
            obv.append(obv[-1] - data['volume'][i])
        else:  
            obv.append(obv[-1])

    return obv

data['OBV'] = calculate_obv(data)
data.head()


Unnamed: 0,datetime,symbol,open,high,low,close,volume,OBV
0,2024-09-23 06:20:00,BINANCE:BTCUSD,63523.59,63528.68,63523.59,63528.68,0.00173,0.0
1,2024-09-23 06:21:00,BINANCE:BTCUSD,63527.02,63527.02,63519.63,63519.63,0.02417,-0.02417
2,2024-09-23 06:22:00,BINANCE:BTCUSD,63495.11,63763.61,63495.11,63763.61,0.46918,0.44501
3,2024-09-23 06:23:00,BINANCE:BTCUSD,63767.96,63767.96,63728.52,63729.17,0.21281,0.2322
4,2024-09-23 06:24:00,BINANCE:BTCUSD,63726.34,63726.34,63681.37,63682.41,0.40335,-0.17115


In [6]:
def calculate_rsi(data, period=14):
    delta = data['close'].diff()

    gain = delta.where(delta > 0, 0)
    loss = -delta.where(delta < 0, 0)

    avg_gain = gain.rolling(window=period, min_periods=1).mean()
    avg_loss = loss.rolling(window=period, min_periods=1).mean()

    rs = avg_gain / avg_loss

    rsi = 100 - (100 / (1 + rs))

    return rsi

data['RSI'] = calculate_rsi(data)
data.head()


Unnamed: 0,datetime,symbol,open,high,low,close,volume,OBV,RSI
0,2024-09-23 06:20:00,BINANCE:BTCUSD,63523.59,63528.68,63523.59,63528.68,0.00173,0.0,
1,2024-09-23 06:21:00,BINANCE:BTCUSD,63527.02,63527.02,63519.63,63519.63,0.02417,-0.02417,0.0
2,2024-09-23 06:22:00,BINANCE:BTCUSD,63495.11,63763.61,63495.11,63763.61,0.46918,0.44501,96.423349
3,2024-09-23 06:23:00,BINANCE:BTCUSD,63767.96,63767.96,63728.52,63729.17,0.21281,0.2322,84.871465
4,2024-09-23 06:24:00,BINANCE:BTCUSD,63726.34,63726.34,63681.37,63682.41,0.40335,-0.17115,72.997636


In [7]:
def calculate_fibonacci_levels(data, period=14):
    rolling_high = data['high'].rolling(window=period, min_periods=1).max()
    rolling_low = data['low'].rolling(window=period, min_periods=1).min()

    data['Fib_23.6%'] = rolling_low + (rolling_high - rolling_low) * 0.236
    data['Fib_38.2%'] = rolling_low + (rolling_high - rolling_low) * 0.382
    data['Fib_50%'] = rolling_low + (rolling_high - rolling_low) * 0.5
    data['Fib_61.8%'] = rolling_low + (rolling_high - rolling_low) * 0.618

    return data

data = calculate_fibonacci_levels(data)
data.head()


Unnamed: 0,datetime,symbol,open,high,low,close,volume,OBV,RSI,Fib_23.6%,Fib_38.2%,Fib_50%,Fib_61.8%
0,2024-09-23 06:20:00,BINANCE:BTCUSD,63523.59,63528.68,63523.59,63528.68,0.00173,0.0,,63524.79124,63525.53438,63526.135,63526.73562
1,2024-09-23 06:21:00,BINANCE:BTCUSD,63527.02,63527.02,63519.63,63519.63,0.02417,-0.02417,0.0,63521.7658,63523.0871,63524.155,63525.2229
2,2024-09-23 06:22:00,BINANCE:BTCUSD,63495.11,63763.61,63495.11,63763.61,0.46918,0.44501,96.423349,63558.476,63597.677,63629.36,63661.043
3,2024-09-23 06:23:00,BINANCE:BTCUSD,63767.96,63767.96,63728.52,63729.17,0.21281,0.2322,84.871465,63559.5026,63599.3387,63631.535,63663.7313
4,2024-09-23 06:24:00,BINANCE:BTCUSD,63726.34,63726.34,63681.37,63682.41,0.40335,-0.17115,72.997636,63559.5026,63599.3387,63631.535,63663.7313


In [8]:
def calculate_stochastic_oscillator(data, k_period=14, d_period=3):
    low_min = data['low'].rolling(window=k_period).min()
    high_max = data['high'].rolling(window=k_period).max()

    data['%K'] = ((data['close'] - low_min) / (high_max - low_min)) * 100

    data['%D'] = data['%K'].rolling(window=d_period).mean()

    return data

data = calculate_stochastic_oscillator(data)
data.head()


Unnamed: 0,datetime,symbol,open,high,low,close,volume,OBV,RSI,Fib_23.6%,Fib_38.2%,Fib_50%,Fib_61.8%,%K,%D
0,2024-09-23 06:20:00,BINANCE:BTCUSD,63523.59,63528.68,63523.59,63528.68,0.00173,0.0,,63524.79124,63525.53438,63526.135,63526.73562,,
1,2024-09-23 06:21:00,BINANCE:BTCUSD,63527.02,63527.02,63519.63,63519.63,0.02417,-0.02417,0.0,63521.7658,63523.0871,63524.155,63525.2229,,
2,2024-09-23 06:22:00,BINANCE:BTCUSD,63495.11,63763.61,63495.11,63763.61,0.46918,0.44501,96.423349,63558.476,63597.677,63629.36,63661.043,,
3,2024-09-23 06:23:00,BINANCE:BTCUSD,63767.96,63767.96,63728.52,63729.17,0.21281,0.2322,84.871465,63559.5026,63599.3387,63631.535,63663.7313,,
4,2024-09-23 06:24:00,BINANCE:BTCUSD,63726.34,63726.34,63681.37,63682.41,0.40335,-0.17115,72.997636,63559.5026,63599.3387,63631.535,63663.7313,,


In [9]:
def calculate_parabolic_sar(data):
    data['SAR'] = 0.0
    data['EP'] = 0.0  
    data['AF'] = 0.02  

    # Initial trend
    uptrend = data['close'][1] > data['close'][0]

    # Initial EP
    if uptrend:
        data.at[0, 'EP'] = data['high'][0]
    else:
        data.at[0, 'EP'] = data['low'][0]

    # Initial SAR
    data.at[1, 'SAR'] = data['EP'][0]

    for i in range(2, len(data)):
        if uptrend:
            data.at[i, 'SAR'] = data['SAR'][i - 1] + data['AF'][i - 1] * (data['EP'][i - 1] - data['SAR'][i - 1])
        else:
            data.at[i, 'SAR'] = data['SAR'][i - 1] - data['AF'][i - 1] * (data['SAR'][i - 1] - data['EP'][i - 1])

        if uptrend:
            data.at[i, 'SAR'] = min(data.at[i, 'SAR'], data['low'][i - 1], data['low'][i])
            new_high = data['high'][i] > data['EP'][i - 1]
            data.at[i, 'EP'] = max(data['high'][i], data['EP'][i - 1])
        else:
            data.at[i, 'SAR'] = max(data.at[i, 'SAR'], data['high'][i - 1], data['high'][i])
            new_low = data['low'][i] < data['EP'][i - 1]
            data.at[i, 'EP'] = min(data['low'][i], data['EP'][i - 1])

        if (uptrend and new_high) or (not uptrend and new_low):
            data.at[i, 'AF'] = min(data['AF'][i - 1] + 0.02, 0.2)
        else:
            if (uptrend and data['low'][i] < data['SAR'][i]) or (not uptrend and data['high'][i] > data['SAR'][i]):
                uptrend = not uptrend
                data.at[i, 'SAR'] = data['EP'][i - 1]
                data.at[i, 'AF'] = 0.02
                data.at[i, 'EP'] = data['high'][i] if uptrend else data['low'][i]
            else:
                data.at[i, 'AF'] = data['AF'][i - 1]
                data.at[i, 'EP'] = data['EP'][i - 1]

    return data

data = calculate_parabolic_sar(data)
data.head()


Unnamed: 0,datetime,symbol,open,high,low,close,volume,OBV,RSI,Fib_23.6%,Fib_38.2%,Fib_50%,Fib_61.8%,%K,%D,SAR,EP,AF
0,2024-09-23 06:20:00,BINANCE:BTCUSD,63523.59,63528.68,63523.59,63528.68,0.00173,0.0,,63524.79124,63525.53438,63526.135,63526.73562,,,0.0,63523.59,0.02
1,2024-09-23 06:21:00,BINANCE:BTCUSD,63527.02,63527.02,63519.63,63519.63,0.02417,-0.02417,0.0,63521.7658,63523.0871,63524.155,63525.2229,,,63523.59,0.0,0.02
2,2024-09-23 06:22:00,BINANCE:BTCUSD,63495.11,63763.61,63495.11,63763.61,0.46918,0.44501,96.423349,63558.476,63597.677,63629.36,63661.043,,,63763.61,0.0,0.02
3,2024-09-23 06:23:00,BINANCE:BTCUSD,63767.96,63767.96,63728.52,63729.17,0.21281,0.2322,84.871465,63559.5026,63599.3387,63631.535,63663.7313,,,63767.96,0.0,0.02
4,2024-09-23 06:24:00,BINANCE:BTCUSD,63726.34,63726.34,63681.37,63682.41,0.40335,-0.17115,72.997636,63559.5026,63599.3387,63631.535,63663.7313,,,63767.96,0.0,0.02


In [10]:
def calculate_adx(data, period=14):
    plus_dm = data['high'].diff()
    minus_dm = data['low'].diff()
    plus_dm[plus_dm < 0] = 0
    minus_dm[minus_dm > 0] = 0
    minus_dm = minus_dm.abs()

    tr1 = data['high'] - data['low']
    tr2 = (data['high'] - data['close'].shift()).abs()
    tr3 = (data['low'] - data['close'].shift()).abs()
    tr = pd.DataFrame({'tr1': tr1, 'tr2': tr2, 'tr3': tr3}).max(axis=1)

    smooth_plus_dm = plus_dm.rolling(window=period).sum()
    smooth_minus_dm = minus_dm.rolling(window=period).sum()
    smooth_tr = tr.rolling(window=period).sum()

    data['+DI'] = (smooth_plus_dm / smooth_tr) * 100
    data['-DI'] = (smooth_minus_dm / smooth_tr) * 100

    dx = (abs(data['+DI'] - data['-DI']) / (data['+DI'] + data['-DI'])) * 100
    data['ADX'] = dx.rolling(window=period).mean()

    return data

data = calculate_adx(data)
data.head()


Unnamed: 0,datetime,symbol,open,high,low,close,volume,OBV,RSI,Fib_23.6%,...,Fib_50%,Fib_61.8%,%K,%D,SAR,EP,AF,+DI,-DI,ADX
0,2024-09-23 06:20:00,BINANCE:BTCUSD,63523.59,63528.68,63523.59,63528.68,0.00173,0.0,,63524.79124,...,63526.135,63526.73562,,,0.0,63523.59,0.02,,,
1,2024-09-23 06:21:00,BINANCE:BTCUSD,63527.02,63527.02,63519.63,63519.63,0.02417,-0.02417,0.0,63521.7658,...,63524.155,63525.2229,,,63523.59,0.0,0.02,,,
2,2024-09-23 06:22:00,BINANCE:BTCUSD,63495.11,63763.61,63495.11,63763.61,0.46918,0.44501,96.423349,63558.476,...,63629.36,63661.043,,,63763.61,0.0,0.02,,,
3,2024-09-23 06:23:00,BINANCE:BTCUSD,63767.96,63767.96,63728.52,63729.17,0.21281,0.2322,84.871465,63559.5026,...,63631.535,63663.7313,,,63767.96,0.0,0.02,,,
4,2024-09-23 06:24:00,BINANCE:BTCUSD,63726.34,63726.34,63681.37,63682.41,0.40335,-0.17115,72.997636,63559.5026,...,63631.535,63663.7313,,,63767.96,0.0,0.02,,,


In [11]:
def calculate_macd(data, short_period=12, long_period=26, signal_period=9):
    short_ema = data['close'].ewm(span=short_period, adjust=False).mean()
    long_ema = data['close'].ewm(span=long_period, adjust=False).mean()

    data['MACD'] = short_ema - long_ema

    data['Signal Line'] = data['MACD'].ewm(span=signal_period, adjust=False).mean()

    data['MACD Histogram'] = data['MACD'] - data['Signal Line']

    return data

data = calculate_macd(data)
data.head()


Unnamed: 0,datetime,symbol,open,high,low,close,volume,OBV,RSI,Fib_23.6%,...,%D,SAR,EP,AF,+DI,-DI,ADX,MACD,Signal Line,MACD Histogram
0,2024-09-23 06:20:00,BINANCE:BTCUSD,63523.59,63528.68,63523.59,63528.68,0.00173,0.0,,63524.79124,...,,0.0,63523.59,0.02,,,,0.0,0.0,0.0
1,2024-09-23 06:21:00,BINANCE:BTCUSD,63527.02,63527.02,63519.63,63519.63,0.02417,-0.02417,0.0,63521.7658,...,,63523.59,0.0,0.02,,,,-0.721937,-0.144387,-0.57755
2,2024-09-23 06:22:00,BINANCE:BTCUSD,63495.11,63763.61,63495.11,63763.61,0.46918,0.44501,96.423349,63558.476,...,,63763.61,0.0,0.02,,,,18.183461,3.521182,14.662279
3,2024-09-23 06:23:00,BINANCE:BTCUSD,63767.96,63767.96,63728.52,63729.17,0.21281,0.2322,84.871465,63559.5026,...,,63767.96,0.0,0.02,,,,30.040814,8.825109,21.215706
4,2024-09-23 06:24:00,BINANCE:BTCUSD,63726.34,63726.34,63681.37,63682.41,0.40335,-0.17115,72.997636,63559.5026,...,,63767.96,0.0,0.02,,,,35.258275,14.111742,21.146533


In [12]:
def calculate_bollinger_bands(data, period=20):
    data['Middle Band'] = data['close'].rolling(window=period).mean()

    std_dev = data['close'].rolling(window=period).std()

    data['Upper Band'] = data['Middle Band'] + (std_dev * 2)
    data['Lower Band'] = data['Middle Band'] - (std_dev * 2)

    return data

data = calculate_bollinger_bands(data)
data.head()


Unnamed: 0,datetime,symbol,open,high,low,close,volume,OBV,RSI,Fib_23.6%,...,AF,+DI,-DI,ADX,MACD,Signal Line,MACD Histogram,Middle Band,Upper Band,Lower Band
0,2024-09-23 06:20:00,BINANCE:BTCUSD,63523.59,63528.68,63523.59,63528.68,0.00173,0.0,,63524.79124,...,0.02,,,,0.0,0.0,0.0,,,
1,2024-09-23 06:21:00,BINANCE:BTCUSD,63527.02,63527.02,63519.63,63519.63,0.02417,-0.02417,0.0,63521.7658,...,0.02,,,,-0.721937,-0.144387,-0.57755,,,
2,2024-09-23 06:22:00,BINANCE:BTCUSD,63495.11,63763.61,63495.11,63763.61,0.46918,0.44501,96.423349,63558.476,...,0.02,,,,18.183461,3.521182,14.662279,,,
3,2024-09-23 06:23:00,BINANCE:BTCUSD,63767.96,63767.96,63728.52,63729.17,0.21281,0.2322,84.871465,63559.5026,...,0.02,,,,30.040814,8.825109,21.215706,,,
4,2024-09-23 06:24:00,BINANCE:BTCUSD,63726.34,63726.34,63681.37,63682.41,0.40335,-0.17115,72.997636,63559.5026,...,0.02,,,,35.258275,14.111742,21.146533,,,


In [13]:
data.columns

Index(['datetime', 'symbol', 'open', 'high', 'low', 'close', 'volume', 'OBV',
       'RSI', 'Fib_23.6%', 'Fib_38.2%', 'Fib_50%', 'Fib_61.8%', '%K', '%D',
       'SAR', 'EP', 'AF', '+DI', '-DI', 'ADX', 'MACD', 'Signal Line',
       'MACD Histogram', 'Middle Band', 'Upper Band', 'Lower Band'],
      dtype='object')

# Create labels Buy or Sell

In [37]:
# Create labels based on future price movements
data['signal'] = 0
data['signal'][data['close'].shift(-1) > data['close']] = 1  # Buy signal
data['signal'][data['close'].shift(-1) < data['close']] = 0 

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['signal'] = 0
You are setting values through chained assignment. Currently this works in certain cases, but when using Copy-on-Write (which will become the default behaviour in pandas 3.0) this will never work to update the original DataFrame or Series, because the intermediate object on which we are setting values will behave as a copy.
A typical example is when you are setting values in a column of a DataFrame, like:

df["col"][row_indexer] = value

Use `df.loc[row_indexer, "col"] = values` instead, to perform the assignment in a single step and ensure this keeps updating the original `df`.

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-vers

In [39]:
data["signal"]

0       0
1       1
2       0
3       0
4       0
       ..
9990    0
9991    0
9992    1
9993    1
9994    0
Name: signal, Length: 9995, dtype: int64

In [41]:
data.columns

Index(['datetime', 'symbol', 'open', 'high', 'low', 'close', 'volume', 'OBV',
       'RSI', 'Fib_23.6%', 'Fib_38.2%', 'Fib_50%', 'Fib_61.8%', '%K', '%D',
       'SAR', 'EP', 'AF', '+DI', '-DI', 'ADX', 'MACD', 'Signal Line',
       'MACD Histogram', 'Middle Band', 'Upper Band', 'Lower Band',
       'future_return', 'signal'],
      dtype='object')

# Split the data

In [43]:
from sklearn.model_selection import train_test_split

# Define features and labels
X = data.drop(columns=["signal","datetime","symbol"])
y = data['signal']

# Split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)


# Model

In [45]:
from sklearn.ensemble import RandomForestClassifier

# Create the model
model = RandomForestClassifier(n_estimators=100, random_state=42)

# Train the model
model.fit(X_train, y_train)

In [156]:
from sklearn.metrics import classification_report, confusion_matrix

# # Predict on the test set
# y_pred = model.predict(X_test)

# # Evaluate the model
# print(classification_report(y_test, y_pred))
# print(confusion_matrix(y_test, y_pred))

In [49]:
from xgboost import XGBClassifier
from sklearn.metrics import accuracy_score

In [59]:
data.isnull().sum()

datetime          0
symbol            0
open              0
high              0
low               0
close             0
volume            0
OBV               0
RSI               0
Fib_23.6%         0
Fib_38.2%         0
Fib_50%           0
Fib_61.8%         0
%K                0
%D                0
SAR               0
EP                0
AF                0
+DI               0
-DI               0
ADX               0
MACD              0
Signal Line       0
MACD Histogram    0
Middle Band       0
Upper Band        0
Lower Band        0
future_return     0
signal            0
dtype: int64

In [57]:
data.dropna(inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data.dropna(inplace=True)


In [69]:
# Define the features (excluding 'datetime', 'symbol', 'future_return', 'signal')
features = ['open', 'high', 'low', 'close', 'volume', 'OBV', 'RSI', 'Fib_23.6%', 'Fib_38.2%', 
            'Fib_50%', 'Fib_61.8%', '%K', '%D', 'SAR', 'EP', 'AF', '+DI', '-DI', 'ADX', 'MACD', 
            'Signal Line', 'MACD Histogram', 'Middle Band', 'Upper Band', 'Lower Band']


In [71]:
data.columns

Index(['datetime', 'symbol', 'open', 'high', 'low', 'close', 'volume', 'OBV',
       'RSI', 'Fib_23.6%', 'Fib_38.2%', 'Fib_50%', 'Fib_61.8%', '%K', '%D',
       'SAR', 'EP', 'AF', '+DI', '-DI', 'ADX', 'MACD', 'Signal Line',
       'MACD Histogram', 'Middle Band', 'Upper Band', 'Lower Band',
       'future_return', 'signal'],
      dtype='object')

In [77]:
import pandas as pd
import numpy as np
from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier,AdaBoostClassifier,GradientBoostingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
import xgboost as xgb
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import BaggingClassifier
from sklearn.svm import SVC


X = data.drop(columns=["signal","datetime","symbol"])
y = data["signal"]

X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.2,random_state=343)

pipeline_lr = Pipeline([('scaler',StandardScaler()),
                    ('classifier',LogisticRegression())
                    ])
pipeline_rf = Pipeline([('scaler',StandardScaler()),
                    ('classifier',RandomForestClassifier(n_estimators=100,min_samples_split=10,max_depth=3))
                    ])
pipeline_xg = Pipeline([('scaler',StandardScaler()),
                    ('classifier',xgb.XGBClassifier(n_estimators=1000,learning_rate=0.09,max_depth=4,gamma=1,min_child_weight=1))
                    ])

pipeline_dt = Pipeline([('scaler',StandardScaler()),
                    ('classifier',DecisionTreeClassifier(criterion="gini",max_depth=4,min_samples_split=5))
                    ])

# pipeline_Bg = Pipeline([('scaler',StandardScaler()),
#     ('classifier',BaggingClassifier(n_estimators=100,max_samples=0.8,max_features=10))
#                     ])

pipeline_Bg = BaggingClassifier(n_estimators=1000,max_samples=0.8,max_features=10)
pipeline_svm = Pipeline([('scaler',StandardScaler()),
                    ('classifier',SVC(kernel="rbf",gamma=0.001))
                    ])
pipeline_ab = Pipeline([('scaler',StandardScaler()),
                    ('classifier',AdaBoostClassifier(n_estimators=100,learning_rate=0.09))
                    ])
# pipeline_gb = Pipeline([('scaler',StandardScaler()),
#                     ('classifier',GradientBoostingClassifier(n_estimators=1000,learning_rate=0.09,max_depth=4,min_samples_split=5))
#                     ])
pipeline_lr.fit(X_train,y_train)
print("Logistic Regression is running")
y_pred_test = pipeline_lr.predict(X_test)
print("Logistic Regression Classification Report Below!!!")
print(classification_report(y_test,y_pred_test))
print("Logistic Regression Pipeline is ended")

print("Random Forest Starts Now!!!!!!!!!!!")
pipeline_rf.fit(X_train,y_train)
print("Random Forestis running")
y_pred_test = pipeline_rf.predict(X_test)
print("Random Forest classification Report Below!!!!")
print(classification_report(y_test,y_pred_test))

print("xgboost is running")
pipeline_xg.fit(X_train,y_train)
y_pred_test =pipeline_xg.predict(X_test)
print("xgboost classification Report Below!!!!")
print(classification_report(y_test,y_pred_test))

print("decision tree is running")
pipeline_dt.fit(X_train,y_train)
y_pred_test = pipeline_dt.predict(X_test)
print("decision classification Report Below!!!!")
print(classification_report(y_test,y_pred_test))

print("Bagging is running")
pipeline_Bg.fit(X_train,y_train)
y_pred_test = pipeline_Bg.predict(X_test)
print("Bagging classification Report Below!!!!")
print(classification_report(y_test,y_pred_test))

print("SVM is running")
pipeline_svm.fit(X_train,y_train)
y_pred_test = pipeline_svm.predict(X_test)
print("SVM classification Report Below!!!!")
print(classification_report(y_test,y_pred_test))

print("Adaboosting is running")
pipeline_ab.fit(X_train,y_train)
y_pred_test = pipeline_ab.predict(X_test)
print("Adaboosting classification Report Below!!!!")
print(classification_report(y_test,y_pred_test))

# print("Gradient is running")
# pipeline_gb.fit(X_train,y_train)
# y_pred_test = pipeline_gb.predict(X_test)
# print("Gradientboosting classification Report Below!!!!")
# print(classification_report(y_test,y_pred_test))

Logistic Regression is running
Logistic Regression Classification Report Below!!!
              precision    recall  f1-score   support

           0       0.63      0.67      0.65       987
           1       0.65      0.60      0.63      1007

    accuracy                           0.64      1994
   macro avg       0.64      0.64      0.64      1994
weighted avg       0.64      0.64      0.64      1994

Logistic Regression Pipeline is ended
Random Forest Starts Now!!!!!!!!!!!
Random Forestis running
Random Forest classification Report Below!!!!
              precision    recall  f1-score   support

           0       0.63      0.69      0.66       987
           1       0.66      0.60      0.63      1007

    accuracy                           0.64      1994
   macro avg       0.65      0.65      0.64      1994
weighted avg       0.65      0.64      0.64      1994

xgboost is running
xgboost classification Report Below!!!!
              precision    recall  f1-score   support

      



Adaboosting classification Report Below!!!!
              precision    recall  f1-score   support

           0       0.63      0.70      0.66       987
           1       0.67      0.59      0.63      1007

    accuracy                           0.64      1994
   macro avg       0.65      0.64      0.64      1994
weighted avg       0.65      0.64      0.64      1994



In [79]:
data.isnull().sum()

datetime          0
symbol            0
open              0
high              0
low               0
close             0
volume            0
OBV               0
RSI               0
Fib_23.6%         0
Fib_38.2%         0
Fib_50%           0
Fib_61.8%         0
%K                0
%D                0
SAR               0
EP                0
AF                0
+DI               0
-DI               0
ADX               0
MACD              0
Signal Line       0
MACD Histogram    0
Middle Band       0
Upper Band        0
Lower Band        0
future_return     0
signal            0
dtype: int64

In [93]:
data.columns

Index(['datetime', 'symbol', 'open', 'high', 'low', 'close', 'volume', 'OBV',
       'RSI', 'Fib_23.6%', 'Fib_38.2%', 'Fib_50%', 'Fib_61.8%', '%K', '%D',
       'SAR', 'EP', 'AF', '+DI', '-DI', 'ADX', 'MACD', 'Signal Line',
       'MACD Histogram', 'Middle Band', 'Upper Band', 'Lower Band',
       'future_return', 'signal'],
      dtype='object')

In [125]:
# Define the function to calculate future return
def calculate_future_return(last_row):
    future_return_value = (last_row['close'] * 1.01) - last_row['close']
    return future_return_value / last_row['close']  # Percentage return

# Create new data by modifying existing values
last_row = data.iloc[-1]
new_data = last_row.copy()

In [127]:
new_data['future_return'] = calculate_future_return(new_data)

# Convert new_data Series back to DataFrame
new_data_df = pd.DataFrame([new_data])



# Calculate future return using the defined function
new_data['future_return'] = calculate_future_return(new_data)
data = pd.concat([data, new_data_df], ignore_index=True)

In [129]:
data.columns

Index(['datetime', 'symbol', 'open', 'high', 'low', 'close', 'volume', 'OBV',
       'RSI', 'Fib_23.6%', 'Fib_38.2%', 'Fib_50%', 'Fib_61.8%', '%K', '%D',
       'SAR', 'EP', 'AF', '+DI', '-DI', 'ADX', 'MACD', 'Signal Line',
       'MACD Histogram', 'Middle Band', 'Upper Band', 'Lower Band',
       'future_return', 'signal'],
      dtype='object')

In [131]:
data.head(2)

Unnamed: 0,datetime,symbol,open,high,low,close,volume,OBV,RSI,Fib_23.6%,...,-DI,ADX,MACD,Signal Line,MACD Histogram,Middle Band,Upper Band,Lower Band,future_return,signal
0,2024-09-23 06:48:00,BINANCE:BTCUSD,63757.02,63757.02,63746.09,63748.11,0.00868,-0.17566,59.293393,63697.97004,...,21.796934,23.025742,43.313107,48.042383,-4.729276,63708.2535,63904.651959,63511.855041,-0.001347,1
1,2024-09-23 06:51:00,BINANCE:BTCUSD,63870.56,63880.18,63783.56,63789.93,0.3825,0.20684,58.483378,63710.39268,...,20.386474,25.495326,43.771851,47.188276,-3.416426,63717.36,63911.017464,63523.702536,-0.001621,0


# generate buy/sell signals on new data.

In [149]:
features = ['open', 'high', 'low', 'close', 'volume', 'OBV',
       'RSI', 'Fib_23.6%', 'Fib_38.2%', 'Fib_50%', 'Fib_61.8%', '%K', '%D',
       'SAR', 'EP', 'AF', '+DI', '-DI', 'ADX', 'MACD', 'Signal Line',
       'MACD Histogram', 'Middle Band', 'Upper Band', 'Lower Band',
       'future_return', 'signal']  # Define your features
X = data[features]
y = np.where(data['future_return'] > 0, 1, -1)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Scale features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Train the model (e.g., MLPClassifier)
model = MLPClassifier(hidden_layer_sizes=(50,), max_iter=1000, random_state=42)
model.fit(X_train_scaled, y_train)

# Predict on test set
y_pred = model.predict(X_test_scaled)

# Calculate accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy * 100:.2f}%")

# Example of creating new data (modifying the last row for prediction)
new_data = X.iloc[-1].copy()  # Copy last row
new_data['open'] *= 1.01  # Adjust open price
new_data['high'] *= 1.01  # Adjust high price
new_data['low'] *= 0.99   # Adjust low price
new_data['close'] *= 1.01  # Adjust close price
new_data['volume'] *= 1.05  # Adjust volume

# Convert new data to a DataFrame and scale it
new_data_df = pd.DataFrame([new_data])
new_data_scaled = scaler.transform(new_data_df)

# Use the trained model to generate a signal on new data
signal = model.predict(new_data_scaled)

# Output the generated signal
print("Generated signal (1=Buy, -1=Sell):", signal)

Accuracy: 99.85%
Generated signal (1=Buy, -1=Sell): [1]


In [165]:
new_data_df.shape

(1, 27)

In [167]:
new_data_scaled

array([[ 3.10783755e-01,  3.40747642e-01, -9.12927536e-01,
         3.24443067e-01,  2.28739179e+00,  2.23822493e-01,
        -1.46874705e+00, -2.70755322e-01, -2.61627827e-01,
        -2.54180513e-01, -2.46672267e-01, -5.40627123e-01,
        -9.37200459e-01, -2.88959424e-01,  0.00000000e+00,
         0.00000000e+00, -1.33366572e+00,  8.17905745e-01,
         8.30277209e-03, -1.76287319e+00, -1.60022954e+00,
        -8.26240162e-01, -2.15667672e-01, -1.88348300e-01,
        -2.41630276e-01,  8.93080024e+00, -9.96745516e-01]])