In [9]:
import yfinance as yf
import pandas as pd
import os

sp500 = yf.Ticker("HDFCBANK.NS")
sp500 = sp500.history(period="max")

sp500["Tomorrow"] = sp500["Close"].shift(-1)
sp500["Target"] = (sp500["Tomorrow"] > sp500["Close"]).astype(int)

In [10]:
sp500

Unnamed: 0_level_0,Open,High,Low,Close,Volume,Dividends,Stock Splits,Tomorrow,Target
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
1996-01-01 00:00:00+05:30,2.370141,2.370141,2.288007,2.331029,350000,0.0,0.0,2.327118,0
1996-01-02 00:00:00+05:30,2.331029,2.366230,2.307563,2.327118,412000,0.0,0.0,2.334940,1
1996-01-03 00:00:00+05:30,2.327118,2.342762,2.307562,2.334940,284000,0.0,0.0,2.319295,0
1996-01-04 00:00:00+05:30,2.334940,2.331029,2.299740,2.319295,282000,0.0,0.0,2.315384,0
1996-01-05 00:00:00+05:30,2.319295,2.331029,2.307562,2.315384,189000,0.0,0.0,2.288007,0
...,...,...,...,...,...,...,...,...,...
2024-05-06 00:00:00+05:30,1501.496428,1513.828034,1496.711788,1502.137695,14396698,0.0,0.0,1485.859985,0
2024-05-07 00:00:00+05:30,1501.447064,1503.518870,1483.886928,1485.859985,14240301,0.0,0.0,1462.676514,0
2024-05-08 00:00:00+05:30,1475.797271,1476.783799,1460.407450,1462.676514,20843931,0.0,0.0,1428.000000,0
2024-05-09 00:00:00+05:30,1455.080159,1466.721244,1425.829685,1428.000000,23140639,0.0,0.0,1437.900024,1


In [11]:
cols = ["Dividends", "Stock Splits"]
sp500 = sp500.drop(columns=cols)
sp500.index = pd.to_datetime(sp500.index)

In [ ]:

# this where the prediction will start
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import precision_score

def backtest(data, model, predictors, start=500, step=250):
    all_predictions = []
    for i in range(start, data.shape[0], step):
        train = data.iloc[0:i].copy()
        test = data.iloc[i:(i + step)].copy()
        if train.shape[0] > 0 and test.shape[0] > 0:
            predictions = predict(train, test, predictors, model)
            all_predictions.append(predictions)
        return pd.concat(all_predictions)

horizons = [2, 5, 60, 250]
new_predictors = []

for horizon in horizons:
    rolling_averages = sp500.rolling(horizon).mean()

    ratio_column = f"Close_Ratio_{horizon}"
    sp500[ratio_column] = sp500["Close"] / rolling_averages["Close"]

    trend_column = f"Trend_{horizon}"
    sp500[trend_column] = sp500.shift(1).rolling(horizon).sum()["Target"]

    new_predictors += [ratio_column, trend_column]
sp500 = sp500.dropna(subset=sp500.columns[sp500.columns != "Tomorrow"])
sp500
model = RandomForestClassifier(n_estimators=200, min_samples_split=50, random_state=1)


def predict(train, test, predictors, model):
    model.fit(train[predictors], train["Target"])
    preds = model.predict_proba(test[predictors])[:, 1]
    preds[preds >= .6] = 1
    preds[preds < .6] = 0
    preds = pd.Series(preds, index=test.index, name="Predictions")
    combined = pd.concat([test["Target"], preds], axis=1)
    return combined


predictions = backtest(sp500, model, new_predictors)
predictions["Predictions"].value_counts()
print(precision_score(predictions["Target"], predictions["Predictions"]))

