In [1]:
import pandas as pd
import numpy as np
 
from statsmodels.api import OLS, add_constant
import pandas_datareader.data as web
import yfinance as yf
from sklearn.linear_model import LinearRegression

In [2]:
# Define default parameters
ticker = 'AAPL'
start_date = '2000-01-01'
end_date = '2023-05-01'

# For training and determining the signal to trade
train_portion = 0.7
decision_threshold = 0.01

In [3]:
# Getting the fama-french research results
ff_factor = 'F-F_Research_Data_5_Factors_2x3_daily'
ff_factor_data = web.DataReader(ff_factor, 'famafrench', start=start_date, end=end_date)[0]
ff_factor_data = ff_factor_data.div(100)

ff_factor_data.head()

Unnamed: 0_level_0,Mkt-RF,SMB,HML,RMW,CMA,RF
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2000-01-03,-0.0071,-0.0006,-0.0142,-0.015,-0.0063,0.00021
2000-01-04,-0.0406,0.0033,0.0206,0.0047,0.0145,0.00021
2000-01-05,-0.0009,0.0033,0.0016,0.0041,0.0111,0.00021
2000-01-06,-0.0073,-0.0004,0.0126,0.0065,0.0121,0.00021
2000-01-07,0.0321,-0.0093,-0.0142,-0.0088,-0.0096,0.00021


In [4]:
# Getting the stock price data
ticker_data = yf.download(ticker, start_date, end_date, interval='1d')
daily_return = ticker_data['Adj Close'].pct_change().dropna()
daily_return.name = 'Returns'

daily_return.head()

[*********************100%***********************]  1 of 1 completed


Date
2000-01-04   -0.084310
2000-01-05    0.014633
2000-01-06   -0.086538
2000-01-07    0.047369
2000-01-10   -0.017588
Name: Returns, dtype: float64

In [5]:
# Merge the dataframes
ff_data = ff_factor_data.merge(daily_return, on='Date')

ff_data.head()

Unnamed: 0_level_0,Mkt-RF,SMB,HML,RMW,CMA,RF,Returns
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2000-01-04,-0.0406,0.0033,0.0206,0.0047,0.0145,0.00021,-0.08431
2000-01-05,-0.0009,0.0033,0.0016,0.0041,0.0111,0.00021,0.014633
2000-01-06,-0.0073,-0.0004,0.0126,0.0065,0.0121,0.00021,-0.086538
2000-01-07,0.0321,-0.0093,-0.0142,-0.0088,-0.0096,0.00021,0.047369
2000-01-10,0.0176,0.005,-0.0153,-0.0198,-0.0024,0.00021,-0.017588


In [6]:
# Define our X and Y variables
X = ff_data.drop('Returns', axis=1)
Y = ff_data['Returns']

In [7]:
# Split into testing/training data
split = int(train_portion * len(X))
X_train = X[: split]
X_test = X[split :]
Y_train = Y[: split]
Y_test = Y[split :]

In [8]:
# Using the linear model from Sklearn
lr_model = LinearRegression(fit_intercept=True)
lr_model = lr_model.fit(X_train, Y_train)
predictions = lr_model.predict(X_test)

In [9]:
# Do comparisons betweeen the prediction and testing data
results = Y_test.to_frame()
results['Prediction'] = predictions
results.head()

Unnamed: 0_level_0,Returns,Prediction
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2016-04-13,0.014488,0.017544
2016-04-14,0.000535,0.002411
2016-04-15,-0.020071,-0.000871
2016-04-18,-0.021575,0.009504
2016-04-19,-0.005303,-0.003454


In [10]:
# Computing the rolling weekly average
window = 5
rolling_w_avg = results.rolling(window).mean().dropna()

# Construct the signal to buy if rolling weekly of prediction is higher than returns by the decision threshold
# sell if vice versa
signals = rolling_w_avg
signals['Buy'] = np.where(signals['Prediction'] > signals['Returns'] + decision_threshold, 1.0, 0.0)
signals['Sell'] = np.where(signals['Returns'] > signals['Prediction'] + decision_threshold, 1.0, 0.0)
signals


Unnamed: 0_level_0,Returns,Prediction,Buy,Sell
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2016-04-19,-0.006385,0.005027,1.0,0.0
2016-04-20,-0.008871,0.002098,1.0,0.0
2016-04-21,-0.011144,0.002123,1.0,0.0
2016-04-22,-0.007677,0.000708,0.0,0.0
2016-04-25,-0.004497,-0.001221,0.0,0.0
...,...,...,...,...
2023-03-27,0.001164,0.004588,0.0,0.0
2023-03-28,-0.002020,-0.002373,0.0,0.0
2023-03-29,0.003758,0.006265,0.0,0.0
2023-03-30,0.004343,0.004644,0.0,0.0
