In [154]:
import yfinance as yf
import pandas as pd
import ta
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from sklearn.model_selection import GridSearchCV
import xgboost as xgb
import lightgbm as lgb


In [155]:
def fetching_data(ticker, period='1y',interval='1d'):
    stocks=yf.Ticker(ticker)
    history=stocks.history(period=period,interval=interval)
    if history.empty:
        print(f"[WARN] No data found for {ticker}")
        return pd.DataFrame()

    if 'Close' not in history.columns or 'Volume' not in history.columns:
        print(f"[WARN] Missing Close/Volume for {ticker}")
        return pd.DataFrame()
    
    history.dropna(subset=['Close', 'Volume'], inplace=True)
    return history


stocks_to_watch= ['RELIANCE.NS','TCS.NS','INFY.NS']
datas={ticker:fetching_data(ticker) for ticker in stocks_to_watch}

In [156]:
print(datas['RELIANCE.NS'].head())

                                  Open         High          Low        Close  \
Date                                                                            
2024-08-14 00:00:00+05:30  1452.797202  1461.185070  1443.019474  1451.109619   
2024-08-16 00:00:00+05:30  1457.859836  1469.523509  1447.337739  1467.339600   
2024-08-19 00:00:00+05:30  1476.602572  1492.339768  1474.610528  1482.479126   
2024-08-20 00:00:00+05:30  1491.268976  1497.942396  1484.097666  1489.999023   
2024-08-21 00:00:00+05:30  1487.309789  1501.876610  1484.072718  1492.713257   

                             Volume  Dividends  Stock Splits  
Date                                                          
2024-08-14 00:00:00+05:30   6267466        0.0           0.0  
2024-08-16 00:00:00+05:30   9416902        0.0           0.0  
2024-08-19 00:00:00+05:30  13797742        5.0           0.0  
2024-08-20 00:00:00+05:30   8411808        0.0           0.0  
2024-08-21 00:00:00+05:30   7769996        0.0        

In [157]:
def adding_indicators(history):

    history=history.copy()

    history['RSI']= ta.momentum.RSIIndicator(history['Close'], window=50).rsi()
    history['50DAY_MA']=history['Close'].rolling(window=50).mean()
    history['200DAY_MA']=history['Close'].rolling(window=200).mean()
    history['MA_Crossover_Signal'] = history['50DAY_MA'] > history['200DAY_MA']
    history.dropna(inplace=True)

    return history

for ticker in stocks_to_watch:
    datas[ticker]=adding_indicators(datas[ticker])
    


In [158]:
print(datas['RELIANCE.NS'].head(10))


                                  Open         High          Low        Close  \
Date                                                                            
2025-06-04 00:00:00+05:30  1409.072238  1420.028480  1400.506474  1417.837280   
2025-06-05 00:00:00+05:30  1422.319369  1448.913107  1417.936848  1436.662109   
2025-06-06 00:00:00+05:30  1435.267636  1445.725866  1431.283548  1437.757690   
2025-06-09 00:00:00+05:30  1451.204075  1451.702086  1439.550666  1443.036743   
2025-06-10 00:00:00+05:30  1444.231840  1448.116350  1430.287533  1432.777588   
2025-06-11 00:00:00+05:30  1440.048587  1462.757814  1437.359255  1443.136230   
2025-06-12 00:00:00+05:30  1447.717920  1451.203997  1428.693925  1435.865234   
2025-06-13 00:00:00+05:30  1418.335338  1429.789592  1408.673974  1422.219849   
2025-06-16 00:00:00+05:30  1421.223780  1436.662121  1418.733726  1432.080444   
2025-06-17 00:00:00+05:30  1442.239809  1442.239809  1420.128170  1425.506592   

                           

In [159]:
def preparing_ml_model_prediction(history):
     
    history=history.copy()

    history['MACD']= ta.trend.MACD(history['Close']).macd()
    history['RSI']= ta.momentum.RSIIndicator(history['Close'], window=14).rsi() 
    history['OBV']=ta.volume.OnBalanceVolumeIndicator(history['Close'],history['Volume']).on_balance_volume()
    history['TARGET']=(history['Close'].shift(-1) > history['Close']).astype(int)

    history.dropna(subset=['RSI','MACD','OBV','TARGET'],inplace=True)
    
    indicators= ['RSI','MACD','OBV']
    x= history[indicators]
    y= history['TARGET']
    return train_test_split(x,y,test_size=0.2,random_state=42,shuffle=False)

X_train, X_test, y_train, y_test=preparing_ml_model_prediction(datas['TCS.NS'])

model=DecisionTreeClassifier()
model.fit(X_train,y_train)
accuracy= accuracy_score(y_test,model.predict(X_test))
print(f"Prediction Accuracy: {accuracy:.2f}")

Prediction Accuracy: 0.67


In [160]:
def preparing_ml_model_prediction(history):

    history = history.dropna(subset=['Close', 'Volume'])

    history['MACD']= ta.trend.MACD(history['Close']).macd()
    history['RSI']= ta.momentum.RSIIndicator(history['Close'], window=14).rsi() 
    history['OBV']=ta.volume.OnBalanceVolumeIndicator(history['Close'],history['Volume']).on_balance_volume()
    history['TARGET']=(history['Close'].shift(-1) > history['Close']).astype(int)
    
    history.dropna(inplace=True)
    
    indicators= ['RSI','MACD','OBV']
    x= history[indicators]
    y= history['TARGET']
    return train_test_split(x,y,test_size=0.2,random_state=42,shuffle=False)

X_train, X_test, y_train, y_test=preparing_ml_model_prediction(datas['TCS.NS'])

model=LogisticRegression()
model.fit(X_train,y_train)

accuracy= accuracy_score(y_test,model.predict(X_test))
print(f"Prediction Accuracy: {accuracy:.2f}")

Prediction Accuracy: 0.67


In [161]:
def preparing_ml_model_prediction(history):

    history = history.dropna(subset=['Close', 'Volume'])

    history['MACD'] = ta.trend.MACD(history['Close']).macd()
    history['RSI'] = ta.momentum.RSIIndicator(history['Close'], window=14).rsi() 
    history['OBV'] = ta.volume.OnBalanceVolumeIndicator(history['Close'], history['Volume']).on_balance_volume()
    history['TARGET'] = (history['Close'].shift(-1) > history['Close']).astype(int)

    history.dropna(inplace=True)
    
    indicators = ['RSI', 'MACD', 'OBV']
    X = history[indicators]
    y = history['TARGET']

    return train_test_split(X, y, test_size=0.2, random_state=42,shuffle=False)

# Use the function and model
X_train, X_test, y_train, y_test = preparing_ml_model_prediction(datas['RELIANCE.NS'])

model = LogisticRegression()
model.fit(X_train, y_train)
accuracy = accuracy_score(y_test, model.predict(X_test))

print(f"Prediction Accuracy: {accuracy:.2f}")


Prediction Accuracy: 0.83


In [162]:
def preparing_ml_model_prediction(history):

    history = history.dropna(subset=['Close', 'Volume'])

    history['MACD'] = ta.trend.MACD(history['Close']).macd()
    history['RSI'] = ta.momentum.RSIIndicator(history['Close'], window=14).rsi() 
    history['OBV'] = ta.volume.OnBalanceVolumeIndicator(history['Close'], history['Volume']).on_balance_volume()
    history['TARGET'] = (history['Close'].shift(-1) > history['Close']).astype(int)

    history.dropna(inplace=True)
    
    indicators = ['RSI', 'MACD', 'OBV']
    X = history[indicators]
    y = history['TARGET']

    return train_test_split(X, y, test_size=0.2, random_state=42,shuffle=False)


model_params={
    "DecisionTreeClassifier": {
        'model': DecisionTreeClassifier(),
        'params':{
            'max_depth': [3, 5, 7, None],
            'min_samples_split': [2, 5]
        }
    },
    "LogisticRegression": {
        'model':LogisticRegression(solver='liblinear'),
        'params': {
            'C': [0.01, 0.1, 1, 10],      
            'penalty': ['l1', 'l2'],     
            'max_iter': [100, 200, 500] 
        }
    },
    "RandomForestClassifier": {
        'model':RandomForestClassifier(),
        'params': {
            'n_estimators': [50, 100],
            'max_depth': [5, 10, None]
        }
    },
    "Xgboost": {
        'model': xgb.XGBClassifier(),
        'params':{
            'n_estimators':[100],
            'learning_rate':[0.1],
            'max_depth':[3],
            'random_state':[42]
        }
    },
    "lightgbm": {
        'model': lgb.LGBMClassifier(),
        'params': {
            'n_estimators': [100],
            'learning_rate': [0.1],
            'max_depth': [3],
            'random_state': [42]
        }
    }
}

scores=[]

for stock, history in datas.items():
    X_train, X_test, y_train, y_test = preparing_ml_model_prediction(history)
     
    for model_name,mp in model_params.items():
        clf= GridSearchCV(mp['model'],mp['params'],cv=3,n_jobs=-1)
        clf.fit(X_train,y_train)

        scores.append({
        'stock': stock,
        'model': model_name,
        'best_cv_score': clf.best_score_,
        'best_params': clf.best_params_,         
        'test_accuracy': clf.score(X_test, y_test)
    })

results_df = pd.DataFrame(scores)
print(results_df)


[LightGBM] [Info] Number of positive: 7, number of negative: 14
[LightGBM] [Info] Total Bins 0
[LightGBM] [Info] Number of data points in the train set: 21, number of used features: 0
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.333333 -> initscore=-0.693147
[LightGBM] [Info] Start training from score -0.693147
[LightGBM] [Info] Number of positive: 5, number of negative: 16
[LightGBM] [Info] Total Bins 0
[LightGBM] [Info] Number of data points in the train set: 21, number of used features: 0
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.238095 -> initscore=-1.163151
[LightGBM] [Info] Start training from score -1.163151
[LightGBM] [Info] Number of positive: 8, number of negative: 13
[LightGBM] [Info] Total Bins 0
[LightGBM] [Info] Number of data points in the train set: 21, number of used features: 0
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.380952 -> initscore=-0.485508
[LightGBM] [Info] Start training from score -0.485508
          stock                   model  best_

In [163]:
import pandas as pd
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import accuracy_score
import xgboost as xgb
import lightgbm as lgb

# Example dataset (replace with your features)
data = pd.DataFrame({
    'rsi': [25, 40, 35, 50, 28, 70, 65, 20],
    'macd': [1.2, -0.5, 0.3, -0.2, 1.0, -1.0, -0.8, 1.5],
    'volume': [1000, 1500, 1200, 1300, 1100, 1700, 1600, 900],
    'target': [1, 0, 0, 0, 1, 0, 0, 1]  # 1=BUY, 0=NO BUY
})

X = data[['rsi', 'macd', 'volume']]
y = data['target']

# Split train-test
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Models dictionary
models = {
    "Decision Tree": {
        'model': DecisionTreeClassifier(),
        'params': {
            'max_depth': [3, 5, None]
        }
    },
    "Logistic Regression": {
        'model': LogisticRegression(max_iter=500),
        'params': {
            'C': [0.1, 1, 10]
        }
    },
    "XGBoost": {
        'model': xgb.XGBClassifier(eval_metric='logloss'),
        'params': {
            'n_estimators': [100],
            'learning_rate': [0.1],
            'max_depth': [3],
            'random_state': [42]
        }
    },
    "LightGBM": {
        'model': lgb.LGBMClassifier(),
        'params': {
            'n_estimators': [100],
            'learning_rate': [0.1],
            'max_depth': [3],
            'random_state': [42]
        }
    }
}

best_model = None
best_score = 0

# Loop through models
for name, mp in models.items():
    grid = GridSearchCV(mp['model'], mp['params'], cv=3, scoring='accuracy')
    grid.fit(X_train, y_train)
    
    score = accuracy_score(y_test, grid.predict(X_test))
    print(f"{name} Accuracy: {score:.4f} | Best Params: {grid.best_params_}")
    
    if score > best_score:
        best_score = score
        best_model = grid.best_estimator_

# Final model prediction for tomorrow
latest_features = [[28, 1.0, 1100]]  # RSI, MACD, Volume
prediction = best_model.predict(latest_features)[0]
print("\nBest Model:", best_model)
print("Prediction for tomorrow:", "BUY" if prediction == 1 else "NO BUY")


Decision Tree Accuracy: 1.0000 | Best Params: {'max_depth': 3}
Logistic Regression Accuracy: 1.0000 | Best Params: {'C': 0.1}
XGBoost Accuracy: 1.0000 | Best Params: {'learning_rate': 0.1, 'max_depth': 3, 'n_estimators': 100, 'random_state': 42}
[LightGBM] [Info] Number of positive: 2, number of negative: 2
[LightGBM] [Info] Total Bins 0
[LightGBM] [Info] Number of data points in the train set: 4, number of used features: 0
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000
[LightGBM] [Info] Number of positive: 2, number of negative: 2
[LightGBM] [Info] Total Bins 0
[LightGBM] [Info] Number of data points in the train set: 4, number of used features: 0
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000
[LightGBM] [Info] Number of positive: 2, number of negative: 2
[LightGBM] [Info] Total Bins 0
[LightGBM] [Info] Number of data points in the train set: 4, number of used features: 0
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.

