# Basic Random Forest for Market Trend Prediction

In [4]:
import yfinance as yf
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report

# Download data
ticker = "AAPL"
data = yf.download(ticker, start="2020-01-01", end="2022-01-01")

# Use 'Adj Close' if available, else fallback to 'Close'
price_col = 'Adj Close' if 'Adj Close' in data.columns else 'Close'

# Check if your chosen price_col exists to be safe
if price_col not in data.columns:
    raise ValueError(f"{price_col} is not in dataframe columns {data.columns}")

# Feature engineering
data['Return'] = data[price_col].pct_change()
data['MA20'] = data[price_col].rolling(window=20).mean()
data['Volatility'] = data['Return'].rolling(window=20).std()

data.dropna(inplace=True)

# Define trend labels
data['Trend'] = np.where(data['Return'] > 0, 'bullish', 'bearish')

# Features and labels
features = ['MA20', 'Volatility']
X = data[features]
y = data['Trend']

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)

# Random forest classifier
rf = RandomForestClassifier(n_estimators=100, random_state=42)
rf.fit(X_train, y_train)

# Predictions and evaluation
predictions = rf.predict(X_test)
print("Basic Random Forest Accuracy:", accuracy_score(y_test, predictions))
print("Classification Report:\n", classification_report(y_test, predictions))


[*********************100%***********************]  1 of 1 completed


Basic Random Forest Accuracy: 0.4672131147540984
Classification Report:
               precision    recall  f1-score   support

     bearish       0.49      0.42      0.45        64
     bullish       0.45      0.52      0.48        58

    accuracy                           0.47       122
   macro avg       0.47      0.47      0.47       122
weighted avg       0.47      0.47      0.47       122



# Advanced Random Forest Techniques

In [6]:
# Import additional libraries for advanced tuning
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import make_scorer

# Define parameter grid for hyperparameter tuning
param_grid = {
    'n_estimators': [100, 200, 300],
    'max_depth': [None, 10, 20, 30],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4]
}

# Setup GridSearchCV with accuracy as the scoring metric
grid_search = GridSearchCV(
    estimator=RandomForestClassifier(random_state=42),
    param_grid=param_grid,
    cv=5,
    scoring=make_scorer(accuracy_score)
)
grid_search.fit(X_train, y_train)

# Retrieve the best estimator
best_rf = grid_search.best_estimator_
print("Best Hyperparameters:", grid_search.best_params_)

# Evaluate the tuned model
predictions_advanced = best_rf.predict(X_test)
accuracy_advanced = accuracy_score(y_test, predictions_advanced)
print("Advanced Random Forest Accuracy:", accuracy_advanced)
print("Advanced Classification Report:\n", classification_report(y_test, predictions_advanced))

# Analyze feature importance
feature_importances = pd.Series(best_rf.feature_importances_, index=features)
print("Feature Importances:\n", feature_importances.sort_values(ascending=False))


Best Hyperparameters: {'max_depth': 20, 'min_samples_leaf': 1, 'min_samples_split': 10, 'n_estimators': 100}
Advanced Random Forest Accuracy: 0.4426229508196721
Advanced Classification Report:
               precision    recall  f1-score   support

     bearish       0.46      0.34      0.39        64
     bullish       0.43      0.55      0.48        58

    accuracy                           0.44       122
   macro avg       0.45      0.45      0.44       122
weighted avg       0.45      0.44      0.44       122

Feature Importances:
 Volatility    0.521065
MA20          0.478935
dtype: float64
