In [5]:
!pip install -r requirements.txt

import numpy as np
import pandas as pd
import yfinance as yf
from sklearn.preprocessing import MinMaxScaler
from sklearn.ensemble import RandomForestRegressor  # Import Random Forest Regressor
from sklearn.metrics import mean_absolute_error, mean_squared_error
import plotly.graph_objs as go
from sklearn.ensemble import RandomForestRegressor
from joblib import dump

# Define stock data retrieval parameters
start_date = '2000-01-01'
end_date = '2024-02-01'
stock_symbol = 'MSFT'

# Download stock data
data = yf.download(stock_symbol, start=start_date, end=end_date)

# Reset index and drop missing values
data.reset_index(inplace=True)
data.dropna(inplace=True)

# Calculate moving averages (keep for potential features)
data['MA100'] = data['Close'].rolling(100).mean()
data['MA200'] = data['Close'].rolling(200).mean()

# Assuming the feature is Close price and target is future Close price
# For simplicity, we're predicting the next day's close price based on the current day's close price
# This is a simplification and might not lead to accurate predictions

# Split data into train and test sets
train_size = int(len(data) * 0.8)
train_data = data[:train_size]
test_data = data[train_size:]

# Scale data
scaler = MinMaxScaler(feature_range=(0, 1))
data_scaled = scaler.fit_transform(data[['Close']])

def create_dataset_rf(data, look_back=1):
    X, Y = [], []
    for i in range(len(data) - look_back-1):  # Adjust for -1 to account for predicting the next value
        X.append(data[i:(i + look_back), 0])
        Y.append(data[i + look_back, 0])  # Predicting the next value
    return np.array(X), np.array(Y)

# Adjusting the create_dataset function for Random Forest
x_train, y_train = create_dataset_rf(data_scaled[:train_size])
x_test, y_test = create_dataset_rf(data_scaled[train_size:])

# Initialize and train the Random Forest Regressor
model_rf = RandomForestRegressor(n_estimators=100, random_state=42)
model_rf.fit(x_train, y_train)

dump(model_rf, 'randomforest_model.pkl')

# Predictions
y_pred_train = model_rf.predict(x_train)
y_pred_test = model_rf.predict(x_test)

# Inverse transform the predictions and actual values for evaluation
y_pred_test_inv = scaler.inverse_transform(y_pred_test.reshape(-1, 1))
y_test_inv = scaler.inverse_transform(y_test.reshape(-1, 1))

# Calculate evaluation metrics
mae = mean_absolute_error(y_test_inv, y_pred_test_inv)
mse = mean_squared_error(y_test_inv, y_pred_test_inv)
print("Random Forest Mean Absolute Error (MAE):", mae)
print("Random Forest Mean Squared Error (MSE):", mse)

# Plotting original vs predicted prices for test set
fig4 = go.Figure()
fig4.add_trace(go.Scatter(x=np.arange(len(y_test)), y=y_test_inv.flatten(), mode='lines', name='Original Price'))
fig4.add_trace(go.Scatter(x=np.arange(len(y_test)), y=y_pred_test_inv.flatten(), mode='lines', name='Predicted Price'))
fig4.update_layout(title='Random Forest - Original vs Predicted Prices',
                   xaxis_title='Time',
                   yaxis_title='Price')
fig4.show()




[*********************100%%**********************]  1 of 1 completed


Random Forest Mean Absolute Error (MAE): 127.8364718494259
Random Forest Mean Squared Error (MSE): 21187.519175981306
