In [2]:
import numpy as np
import pandas as pd
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score
import yfinance as yf

In [10]:
# Download AAPL data with 'Adj Close' column included
data = yf.download('AAPL', start='2020-01-01', end='2023-01-01', auto_adjust=False)

[*********************100%***********************]  1 of 1 completed


In [None]:
# Calculate log return and volatility using 'Adj Close'
data['log_return'] = np.log(data['Adj Close'] / data['Adj Close'].shift(1))
data['volatility'] = data['log_return'].rolling(window=21).std() * np.sqrt(252)
data = data.dropna()

In [None]:
# Create lag features
data['return_lag1'] = data['log_return'].shift(1)
data['return_lag2'] = data['log_return'].shift(2)
data['vol_lag1'] = data['volatility'].shift(1)
data['vol_lag2'] = data['volatility'].shift(2)
data = data.dropna()

In [None]:
model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

In [None]:
y_pred = model.predict(X_test)

print("RMSE:", np.sqrt(mean_squared_error(y_test, y_pred)))
print("R2 Score:", r2_score(y_test, y_pred))

In [None]:
latest_input = X.iloc[-1:]
predicted_vol = model.predict(latest_input)[0]
print("Predicted Next-Day Volatility:", predicted_vol)