# Import necessary libraries

In [1]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import joblib

# Load the dataset

In [2]:
df = pd.read_excel("dataset/yahoo_data.xlsx", engine="openpyxl")

# Feature Engineering

In [3]:
# Build a new feature: Simple Moving Average (SMA) 
df['SMA_10'] = df['Close*'].rolling(window=10).mean()
df['SMA_50'] = df['Close*'].rolling(window=50).mean()
df.dropna(inplace=True)
df['Target'] = df['Close*'].shift(-1)
df.dropna(inplace=True)

# Define Features and Target

In [4]:
X = df[['Open', 'High', 'Low', 'Volume', 'SMA_10', 'SMA_50']]
y = df['Target'].values.reshape(-1, 1)

# Feature Scaling

In [5]:
# Scale features
scaler_X = StandardScaler()
X_scaled = scaler_X.fit_transform(X)

# Scale target variable
scaler_y = StandardScaler()
y_scaled = scaler_y.fit_transform(y)

In [6]:
# Split Data
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y_scaled, test_size=0.2, random_state=42)

# Model

In [7]:
# Initialize Random Forest Regressor
model = RandomForestRegressor(n_estimators=100, random_state=42)

# Train the model

In [8]:
# Train Model
model.fit(X_train, y_train)

  return fit_method(estimator, *args, **kwargs)


In [9]:
joblib.dump(model, 'stock_model.pkl')

['stock_model.pkl']

# Predicting the Test set results

In [10]:
# Predict on Test Data
y_pred = model.predict(X_test)
np.set_printoptions(precision=2)
print(np.concatenate((y_pred.reshape(len(y_pred),1), y_test.reshape(len(y_test),1)),1))

[[ 0.19  0.18]
 [-0.03 -0.03]
 [ 1.56  1.58]
 [ 0.69  0.67]
 [-0.71 -0.69]
 [ 1.08  1.08]
 [ 1.13  1.15]
 [ 1.12  1.12]
 [-0.03 -0.04]
 [-1.02 -1.02]
 [-1.48 -1.49]
 [ 1.27  1.27]
 [ 0.43  0.39]
 [ 1.49  1.48]
 [-0.84 -0.79]
 [ 0.5   0.51]
 [ 1.4   1.39]
 [ 1.37  1.36]
 [ 1.12  1.14]
 [ 0.12  0.1 ]
 [ 1.06  1.07]
 [-1.6  -1.57]
 [-1.08 -1.09]
 [-0.86 -0.9 ]
 [-2.14 -2.3 ]
 [ 1.4   1.36]
 [ 0.9   0.93]
 [ 1.32  1.33]
 [-0.3  -0.31]
 [ 1.66  1.65]
 [-0.97 -0.92]
 [-1.9  -1.94]
 [-0.74 -0.73]
 [ 1.52  1.53]
 [-0.56 -0.54]
 [-1.14 -1.15]
 [ 0.08  0.11]
 [ 1.21  1.13]
 [-0.43 -0.42]
 [-0.72 -0.72]
 [-1.28 -1.27]
 [ 1.4   1.4 ]
 [ 0.29  0.31]
 [ 0.33  0.33]
 [-1.41 -1.44]
 [ 0.68  0.59]
 [-1.15 -1.18]
 [ 0.68  0.63]
 [ 1.05  1.05]
 [ 1.26  1.24]
 [ 0.21  0.3 ]
 [-0.78 -0.77]
 [-1.38 -1.39]
 [-0.91 -0.93]
 [-1.27 -1.32]
 [ 0.43  0.43]
 [ 1.15  1.18]
 [-0.72 -0.72]
 [-0.22 -0.24]
 [ 1.04  1.06]
 [ 1.14  1.16]
 [ 1.07  1.06]
 [-0.68 -0.67]
 [ 1.72  1.7 ]
 [ 1.5   1.5 ]
 [-0.75 -0.92]
 [-1.15 -1

# Model Evaluation

In [11]:
# Evaluate Performance
mae = mean_absolute_error(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f"Mean Absolute Error (MAE): {mae}")
print(f"Mean Squared Error (MSE): {mse}")
print(f"R² Score: {r2}")


Mean Absolute Error (MAE): 0.027704412196790994
Mean Squared Error (MSE): 0.002611220171480381
R² Score: 0.9975883427146481
