# Baseline Model with Real Stock Data

In [4]:
# Import libraries for data acquisition, manipulation, and modeling
import yfinance as yf
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report

# Download historical stock data for Apple Inc.
ticker = "AAPL"
data = yf.download(ticker, start="2020-01-01", end="2025-01-01")

# Check the available columns in the data
print("Columns available:", data.columns)

# Use 'Adj Close' if available, otherwise fall back to 'Close'
price_column = 'Adj Close' if 'Adj Close' in data.columns else 'Close'

# Feature Engineering: Create technical indicators
data['Return'] = data[price_column].pct_change()
data['MA20'] = data[price_column].rolling(window=20).mean()       # 20-day moving average
data['Volatility'] = data['Return'].rolling(window=20).std()      # 20-day volatility
data.dropna(inplace=True)

# Define market trend: bullish if daily return > 0, bearish otherwise
data['Trend'] = np.where(data['Return'] > 0, 'bullish', 'bearish')

# Features and target variable
features = ['MA20', 'Volatility']
X = data[features]
y = data['Trend']

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)

# Initialize the baseline Random Forest model
baseline_rf = RandomForestClassifier(n_estimators=100, random_state=42)
baseline_rf.fit(X_train, y_train)

# Make predictions and evaluate the baseline model
predictions = baseline_rf.predict(X_test)
print("Baseline Model Accuracy:", accuracy_score(y_test, predictions))
print("Baseline Classification Report:\n", classification_report(y_test, predictions))


[*********************100%***********************]  1 of 1 completed


Columns available: MultiIndex([( 'Close', 'AAPL'),
            (  'High', 'AAPL'),
            (   'Low', 'AAPL'),
            (  'Open', 'AAPL'),
            ('Volume', 'AAPL')],
           names=['Price', 'Ticker'])
Baseline Model Accuracy: 0.44516129032258067
Baseline Classification Report:
               precision    recall  f1-score   support

     bearish       0.39      0.38      0.39       141
     bullish       0.49      0.50      0.49       169

    accuracy                           0.45       310
   macro avg       0.44      0.44      0.44       310
weighted avg       0.44      0.45      0.44       310



#  Hyperparameter Tuning with GridSearchCV

In [6]:
# Import GridSearchCV for hyperparameter tuning
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import make_scorer

# Define a grid of hyperparameters to search
param_grid = {
    'n_estimators': [100, 200, 300],
    'max_depth': [None, 10, 20, 30],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4]
}

# Setup GridSearchCV with cross-validation
grid_search = GridSearchCV(
    estimator=RandomForestClassifier(random_state=42),
    param_grid=param_grid,
    cv=5,
    scoring=make_scorer(accuracy_score),
    n_jobs=-1  # Utilize all available cores
)

# Execute grid search on the training data
grid_search.fit(X_train, y_train)

# Retrieve and display the best hyperparameters
best_params = grid_search.best_params_
print("Best Hyperparameters:", best_params)

# Evaluate the tuned model on the test set
best_rf = grid_search.best_estimator_
predictions_advanced = best_rf.predict(X_test)
accuracy_advanced = accuracy_score(y_test, predictions_advanced)
print("Advanced Model Accuracy:", accuracy_advanced)
print("Advanced Classification Report:\n", classification_report(y_test, predictions_advanced))


Best Hyperparameters: {'max_depth': 10, 'min_samples_leaf': 4, 'min_samples_split': 10, 'n_estimators': 100}
Advanced Model Accuracy: 0.45806451612903226
Advanced Classification Report:
               precision    recall  f1-score   support

     bearish       0.40      0.37      0.38       141
     bullish       0.50      0.53      0.52       169

    accuracy                           0.46       310
   macro avg       0.45      0.45      0.45       310
weighted avg       0.45      0.46      0.46       310

