<a href="https://colab.research.google.com/github/redfear08/bot_trade_ml/blob/main/trading_bot_colab.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [3]:
# Install dependencies
!pip install -q pandas scikit-learn tensorflow kiteconnect

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m771.5/771.5 kB[0m [31m13.4 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m247.8/247.8 kB[0m [31m18.9 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.2/3.2 MB[0m [31m63.0 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m74.6/74.6 kB[0m [31m8.3 MB/s[0m eta [36m0:00:00[0m
[?25h

In [11]:
# Authentication and data fetching
import pandas as pd
from kiteconnect import KiteConnect
import datetime as dt
import time
import numpy as np

api_key = 'klz728yv89qrljzs'
api_secret = '4vhxunujbp17i8da0y1tiy7ayde4h5o8'
kite = KiteConnect(api_key=api_key)
print("Login URL:", kite.login_url())
request_token = input("Enter request token: ")
data = kite.generate_session(request_token, api_secret=api_secret)
access_token = data["access_token"]
kite.set_access_token(access_token)

# Fetch historical data
def fetch_historical_data(kite, instrument_token, start_date, end_date, interval, csv_filename):
    delta = dt.timedelta(days=60)
    current_date = start_date
    all_data = []

    while current_date < end_date:
        to_date = min(current_date + delta, end_date)
        data = kite.historical_data(instrument_token, current_date, to_date, interval)
        all_data.extend(data)
        current_date = to_date + dt.timedelta(days=1)
        time.sleep(1)  # Avoid hitting API rate limits

    df = pd.DataFrame(all_data)
    df.to_csv(csv_filename, mode='w', index=False, header=True)
    return df

# Fetch historical data
instrument_token = '738561' #INFY
start_date = dt.datetime(2017, 1, 1)
end_date = dt.datetime(2023, 12, 31)
interval = 'minute'
csv_filename = 'historical_data.csv'

df = fetch_historical_data(kite, instrument_token, start_date, end_date, interval, csv_filename)
print(df.head())

Login URL: https://kite.zerodha.com/connect/login?api_key=klz728yv89qrljzs&v=3
Enter request token: rI3J4wbhiSKVoLoKbwKda903z8QHKb83
                       date    open    high     low   close  volume
0 2017-01-02 09:15:00+05:30  511.45  512.70  510.55  510.80   32510
1 2017-01-02 09:16:00+05:30  511.25  511.55  510.95  511.30   15816
2 2017-01-02 09:17:00+05:30  511.45  511.45  507.30  507.30   56062
3 2017-01-02 09:18:00+05:30  507.40  509.80  506.30  509.80   36745
4 2017-01-02 09:19:00+05:30  509.85  510.05  509.35  509.75   22138


In [12]:
# Feature Engineering
def add_features(df):
    df['date'] = pd.to_datetime(df['date'])
    df.set_index('date', inplace=True)
    df['SMA_50'] = df['close'].rolling(window=50).mean()
    df['SMA_200'] = df['close'].rolling(window=200).mean()
    df['RSI'] = calculate_rsi(df, 14)['RSI']
    df['Bollinger_Upper'] = calculate_bollinger_bands(df, 20)['Upper_Band']
    df['Bollinger_Lower'] = calculate_bollinger_bands(df, 20)['Lower_Band']
    df['MACD'] = calculate_macd(df)['MACD']
    df['MACD_Signal'] = calculate_macd(df)['Signal_Line']
    df['Stochastic_%K'] = calculate_stochastic_oscillator(df, 14)['%K']
    df['Stochastic_%D'] = calculate_stochastic_oscillator(df, 14)['%D']
    df['Momentum'] = df['close'] / df['close'].shift(10) - 1
    df['VWAP'] = calculate_vwap(df)['VWAP']
    df['Target'] = np.where(df['close'].shift(-1) > df['close'], 1, 0)
    df.dropna(inplace=True)
    return df

def calculate_rsi(df, window=14):
    delta = df['close'].diff(1)
    gain = (delta.where(delta > 0, 0)).fillna(0)
    loss = (-delta.where(delta < 0, 0)).fillna(0)
    avg_gain = gain.rolling(window=window).mean()
    avg_loss = loss.rolling(window=window).mean()
    rs = avg_gain / avg_loss
    df['RSI'] = 100 - (100 / (1 + rs))
    return df

def calculate_bollinger_bands(df, window=20):
    df['SMA'] = df['close'].rolling(window=window).mean()
    df['STD'] = df['close'].rolling(window=window).std()
    df['Upper_Band'] = df['SMA'] + (df['STD'] * 2)
    df['Lower_Band'] = df['SMA'] - (df['STD'] * 2)
    return df

def calculate_macd(df, short_window=12, long_window=26, signal_window=9):
    df['EMA_12'] = df['close'].ewm(span=short_window, adjust=False).mean()
    df['EMA_26'] = df['close'].ewm(span=long_window, adjust=False).mean()
    df['MACD'] = df['EMA_12'] - df['EMA_26']
    df['Signal_Line'] = df['MACD'].ewm(span=signal_window, adjust=False).mean()
    return df

def calculate_stochastic_oscillator(df, window=14):
    df['L14'] = df['low'].rolling(window=window).min()
    df['H14'] = df['high'].rolling(window=window).max()
    df['%K'] = (df['close'] - df['L14']) * 100 / (df['H14'] - df['L14'])
    df['%D'] = df['%K'].rolling(window=3).mean()
    return df

def calculate_vwap(df):
    df['Cumulative_TP_Volume'] = (df['close'] * df['volume']).cumsum()
    df['Cumulative_Volume'] = df['volume'].cumsum()
    df['VWAP'] = df['Cumulative_TP_Volume'] / df['Cumulative_Volume']
    return df

# Add features
df = add_features(df)
print(df.head())

                             open    high     low   close  volume   SMA_50  \
date                                                                         
2017-01-02 12:34:00+05:30  514.10  514.15  513.80  514.00    4783  514.129   
2017-01-02 12:35:00+05:30  514.00  514.00  513.75  513.80    4384  514.121   
2017-01-02 12:36:00+05:30  513.80  514.00  513.70  513.75    5097  514.108   
2017-01-02 12:37:00+05:30  513.75  514.00  513.70  513.70    4546  514.095   
2017-01-02 12:38:00+05:30  513.70  513.75  513.70  513.70    7186  514.081   

                             SMA_200        RSI       SMA       STD  ...  \
date                                                                 ...   
2017-01-02 12:34:00+05:30  512.93875  52.941176  513.9775  0.129244  ...   
2017-01-02 12:35:00+05:30  512.95375  42.857143  513.9650  0.133870  ...   
2017-01-02 12:36:00+05:30  512.96600  40.000000  513.9500  0.140488  ...   
2017-01-02 12:37:00+05:30  512.99800  38.888889  513.9400  0.150962  ... 

In [13]:
# Training Random Forest Model
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

X = df[['SMA_50', 'SMA_200', 'RSI', 'Bollinger_Upper', 'Bollinger_Lower', 'MACD', 'MACD_Signal', 'Stochastic_%K', 'Stochastic_%D', 'Momentum', 'VWAP']]
y = df['Target']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

rf = RandomForestClassifier(n_estimators=100, max_depth=10, random_state=42)
rf.fit(X_train, y_train)

y_pred = rf.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"Random Forest Model Accuracy: {accuracy}")

# Save Random Forest Model
import joblib
joblib.dump(rf, 'best_rf_model.joblib')

Random Forest Model Accuracy: 0.5410891672228985


['best_rf_model.joblib']

In [14]:
# Training Neural Network Model
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import Adam

# Train Neural Network Model Incrementally
def create_nn_model(input_dim):
    model = Sequential()
    model.add(Dense(64, input_dim=input_dim, activation='relu'))
    model.add(Dense(32, activation='relu'))
    model.add(Dense(1, activation='sigmoid'))

    model.compile(optimizer=Adam(learning_rate=0.001),
                  loss='binary_crossentropy',
                  metrics=['accuracy'])
    return model

# Check if a pre-trained model exists
model_path = 'best_nn_model.h5'
if os.path.exists(model_path):
    nn_model = load_model(model_path)
    print("Loaded pre-trained model.")
else:
    nn_model = create_nn_model(X_train.shape[1])
    print("Created new model.")

# Continue training the model
nn_model.fit(X_train, y_train, epochs=10, batch_size=32, validation_split=0.1, verbose=1)

loss, accuracy = nn_model.evaluate(X_test, y_test)
print(f"Neural Network Model Accuracy: {accuracy}")

# Save Neural Network Model
nn_model.save(model_path)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Neural Network Model Accuracy: 0.5275668501853943


  saving_api.save_model(


In [1]:
# Backtesting Strategy with Balance Check, Stop Loss, and Target Profit
def backtest_strategy(model, data, initial_balance=100000, stop_loss_pct=0.02, target_profit_pct=0.05):
    balance = initial_balance
    position = None
    buy_price = 0
    trade_log = []

    for index, row in data.iterrows():
        if position is None:
            features = row[['SMA_50', 'SMA_200', 'RSI', 'Bollinger_Upper', 'Bollinger_Lower', 'MACD', 'MACD_Signal', 'Stochastic_%K', 'Stochastic_%D', 'Momentum', 'VWAP']].values.reshape(1, -1)
            prediction = model.predict(features)[0]

            if prediction == 1:
                buy_price = row['close']
                position = 'long'
                trade_log.append((index, 'buy', buy_price))
                print(f"Buying at {buy_price}")

        elif position == 'long':
            current_price = row['close']
            if current_price <= buy_price * (1 - stop_loss_pct):
                balance -= (buy_price - current_price)
                position = None
                trade_log.append((index, 'stop_loss', current_price))
                print(f"Stop loss at {current_price}")
            elif current_price >= buy_price * (1 + target_profit_pct):
                balance += (current_price - buy_price)
                position = None
                trade_log.append((index, 'target_profit', current_price))
                print(f"Target profit at {current_price}")

    return balance, trade_log


In [3]:

# Fetch historical data for backtest

# Authentication and data fetching
import pandas as pd
from kiteconnect import KiteConnect
import datetime as dt
import time
import numpy as np

api_key = 'klz728yv89qrljzs'
api_secret = '4vhxunujbp17i8da0y1tiy7ayde4h5o8'
kite = KiteConnect(api_key=api_key)
print("Login URL:", kite.login_url())
request_token = input("Enter request token: ")
data = kite.generate_session(request_token, api_secret=api_secret)
access_token = data["access_token"]
kite.set_access_token(access_token)

# Fetch historical data
def fetch_historical_data(kite, instrument_token, start_date, end_date, interval, csv_filename):
    delta = dt.timedelta(days=60)
    current_date = start_date
    all_data = []

    while current_date < end_date:
        to_date = min(current_date + delta, end_date)
        data = kite.historical_data(instrument_token, current_date, to_date, interval)
        all_data.extend(data)
        current_date = to_date + dt.timedelta(days=1)
        time.sleep(1)  # Avoid hitting API rate limits

    df = pd.DataFrame(all_data)
    df.to_csv(csv_filename, mode='w', index=False, header=True)
    return df

instrument_token = '738561' #INFY
start_date = dt.datetime(2017, 1, 1)
end_date = dt.datetime(2023, 12, 31)
interval = 'minute'
csv_filename = 'historical_data.csv'

df_backtest = fetch_historical_data(kite, instrument_token, start_date, end_date, interval, 'backtest_data.csv')
df_backtest = add_features(df_backtest)


NameError: name 'fetch_historical_data' is not defined

In [None]:
# Perform Backtesting with Random Forest Model
final_balance, trade_log = backtest_strategy(rf, df_backtest)
print(f"Final Balance (Random Forest): {final_balance}")
print(trade_log)



In [None]:
# Perform Backtesting with Neural Network Model
final_balance, trade_log = backtest_strategy(nn_model, df_backtest)
print(f"Final Balance (Neural Network): {final_balance}")
print(trade_log)