In [1]:
import yfinance as yf
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.metrics import classification_report, roc_auc_score
import numpy as np

# Load data
data = yf.download("AAPL", start="2010-01-01", end="2020-12-31")

# Feature Engineering
data['MA10'] = data['Close'].rolling(window=10).mean()
data['MA50'] = data['Close'].rolling(window=50).mean()
data['Volume_MA10'] = data['Volume'].rolling(window=10).mean()
data['Daily_Change'] = data['Close'].pct_change()
data['Daily_Change_MA10'] = data['Daily_Change'].rolling(window=10).mean()

# Target variable
data['Target'] = (data['Close'].shift(-1) > data['Close']).astype(int)

# Drop NaN values
data = data.dropna()

# Define predictors and target
predictors = ['Close', 'MA10', 'MA50', 'Volume', 'Volume_MA10', 'Daily_Change', 'Daily_Change_MA10']
X = data[predictors]
y = data['Target']

# Split data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Model initialization and training
model = RandomForestClassifier(n_estimators=100, min_samples_split=50, random_state=42)
model.fit(X_train, y_train)

# Predictions and evaluations
predictions = model.predict(X_test)
proba_predictions = model.predict_proba(X_test)[:, 1]

# Performance metrics
print(classification_report(y_test, predictions))
print(f"ROC AUC Score: {roc_auc_score(y_test, proba_predictions)}")

# Cross-validation
cv_scores = cross_val_score(model, X, y, cv=5, scoring='roc_auc')
print(f"Average ROC AUC Score from CV: {np.mean(cv_scores)}")


[*********************100%%**********************]  1 of 1 completed


              precision    recall  f1-score   support

           0       0.48      0.37      0.42       257
           1       0.53      0.64      0.58       287

    accuracy                           0.51       544
   macro avg       0.51      0.51      0.50       544
weighted avg       0.51      0.51      0.51       544

ROC AUC Score: 0.5152591548150055
Average ROC AUC Score from CV: 0.4523231372994642
