In [61]:
import yfinance as yf
import numpy as np
from sklearn.linear_model import LinearRegression, BayesianRidge
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.model_selection import train_test_split, cross_val_score, KFold
from sklearn.metrics import root_mean_squared_error
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from ta import momentum, trend

In [62]:
df = yf.download('^FTSE', start='2010-01-01', end='2020-12-31')
df.columns = ['Close', 'High', 'Low', 'Open', 'Volume']
df['RSI'] = momentum.rsi(df['Close'])
df['MACD'] = trend.macd(df['Close'])
df['Up'] = trend.aroon_up(df['High'], df['Low'])
df['Down'] = trend.aroon_down(df['High'], df['Low'])
df['Aroon'] = df['Up'] - df['Down']
df['Close'] = df['Close'].shift(1)
df['Close'] = df['Close'].shift(1)
df['LogRet'] = np.log(df['Close'] / df['Close'].shift(1))
df = df[::-1].dropna()
df.drop(['Close', 'High', 'Low', 'Open', 'Volume', 'Up', 'Down'], axis=1, inplace=True)
df

[*********************100%***********************]  1 of 1 completed


Unnamed: 0_level_0,RSI,MACD,Aroon,LogRet
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2020-12-30,58.411260,70.587833,68.0,0.000969
2020-12-29,62.627047,72.944843,68.0,0.012314
2020-12-24,56.352561,70.116917,28.0,-0.017443
2020-12-23,55.922214,75.730491,28.0,-0.003349
2020-12-21,50.164952,82.435403,28.0,-0.003018
...,...,...,...,...
2010-02-12,36.700776,-85.448701,-80.0,0.003944
2010-02-11,38.131180,-88.743582,-80.0,0.003822
2010-02-10,34.447727,-93.592779,-80.0,0.006185
2010-02-09,31.868558,-95.417131,-80.0,-0.015373


In [63]:
# Features and target
X = df[['Aroon', 'RSI', 'MACD']]
y = df['LogRet']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=False)

# Scale features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [64]:
# Linear Regression
lr = LinearRegression()
lr.fit(X_train_scaled, y_train)
y_pred = lr.predict(X_test_scaled)

print("Linear Regression RMSE:", root_mean_squared_error(y_test, y_pred))

Linear Regression RMSE: 0.011340349293498194


In [65]:
# Gradient Boosting Regressor
gbr = GradientBoostingRegressor(random_state=42)
gbr.fit(X_train_scaled, y_train)
y_pred = gbr.predict(X_test_scaled)

print("Gradient Boosting Regressor RMSE:", root_mean_squared_error(y_test, y_pred))

Gradient Boosting Regressor RMSE: 0.011139275292018741


In [66]:
# Bayesian Ridge Regression
br = BayesianRidge()
br.fit(X_train_scaled, y_train)
y_pred = br.predict(X_test_scaled)

print("Bayesian Ridge Regression RMSE:", root_mean_squared_error(y_test, y_pred))

Bayesian Ridge Regression RMSE: 0.011344763127851903


In [67]:
# Define cross-validation strategy
cv = KFold(n_splits=5, shuffle=False)

# Helper to compute RMSE
def rmse_cv(model, X, y):
    neg_mse = cross_val_score(model, X, y, scoring="neg_mean_squared_error", cv=cv)
    rmse = np.sqrt(-neg_mse)
    return rmse.mean()

# Models wrapped with scaling where needed
models = {
    "Linear Regression": Pipeline([("scaler", StandardScaler()), ("reg", LinearRegression())]),
    "Gradient Boosting": Pipeline([("scaler", StandardScaler()), ("reg", GradientBoostingRegressor(random_state=42))]),
    "Bayesian Ridge": Pipeline([("scaler", StandardScaler()), ("reg", BayesianRidge())])
}

# Evaluate
for name, model in models.items():
    score = rmse_cv(model, X, y)
    print(f"{name} RMSE: {score:.4f}")

Linear Regression RMSE: 0.0098
Gradient Boosting RMSE: 0.0097
Bayesian Ridge RMSE: 0.0097
