In [1]:
import pandas as pd
import os
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error
import joblib

os.makedirs("models", exist_ok=True)

def load_processed_data(data_folder="processed_data"):
    """Loads all processed stock data into a dictionary of DataFrames."""
    stock_data = {}
    for file in os.listdir(data_folder):
        if file.endswith("_processed.csv"):
            stock_name = file.replace("_processed.csv", "")
            df = pd.read_csv(os.path.join(data_folder, file), index_col="Date", parse_dates=True)
            stock_data[stock_name] = df
    return stock_data

def prepare_data(df):
    """Prepares features and target variables for training."""
    df = df.dropna()  # Remove missing values
    features = df[["Close", "SMA_50", "SMA_200", "EMA_20", "EMA_50", "RSI_14", "MACD", "Bollinger_Upper", "Bollinger_Lower"]]
    target = df["Close"].shift(-1)  # Predict next day's close price
    features, target = features.iloc[:-1], target.dropna()  # Align indices
    return train_test_split(features, target, test_size=0.2, random_state=42)

def train_random_forest(X_train, y_train):
    """Trains a Random Forest model for stock prediction."""
    model = RandomForestRegressor(n_estimators=100, random_state=42)
    model.fit(X_train, y_train)
    return model

def evaluate_model(model, X_test, y_test):
    """Evaluates the trained model and prints metrics."""
    predictions = model.predict(X_test)
    mae = mean_absolute_error(y_test, predictions)
    rmse = np.sqrt(mean_squared_error(y_test, predictions))
    print(f"MAE: {mae:.2f}, RMSE: {rmse:.2f}")

def main():
    stock_data = load_processed_data()
    for stock, df in stock_data.items():
        print(f"Training model for {stock}...")
        X_train, X_test, y_train, y_test = prepare_data(df)
        model = train_random_forest(X_train, y_train)
        evaluate_model(model, X_test, y_test)
        joblib.dump(model, f"models/{stock}_rf_model.pkl")  
        print(f"Model saved for {stock}.")
    print("All models trained and saved successfully!")

if __name__ == "__main__":
    main()


Training model for AZN.L...
MAE: 113.39, RMSE: 149.93
Model saved for AZN.L.
Training model for BATS.L...
MAE: 21.83, RMSE: 30.14
Model saved for BATS.L.
Training model for BP.L...
MAE: 4.92, RMSE: 6.47
Model saved for BP.L.
Training model for GSK.L...
MAE: 14.58, RMSE: 20.30
Model saved for GSK.L.
Training model for HSBA.L...
MAE: 4.99, RMSE: 6.86
Model saved for HSBA.L.
Training model for LSEG.L...
MAE: 83.17, RMSE: 109.06
Model saved for LSEG.L.
Training model for REL.L...
MAE: 23.16, RMSE: 31.19
Model saved for REL.L.
Training model for RIO.L...
MAE: 70.26, RMSE: 90.35
Model saved for RIO.L.
Training model for SHEL.L...
MAE: 23.23, RMSE: 30.46
Model saved for SHEL.L.
Training model for ULVR.L...
MAE: 32.99, RMSE: 48.96
Model saved for ULVR.L.
All models trained and saved successfully!
