In [1]:
import pandas as pd
import numpy as np
import json

from scipy.optimize import minimize
 
from statsmodels.api import OLS, add_constant
import pandas_datareader.data as web
import yfinance as yf
from sklearn.linear_model import LinearRegression

In [2]:
# Define default parameters
tickers = ['AAPL', 'MSFT', 'TSLA', 'META', 'AMZN']
start_date = '2000-01-01'
end_date = '2023-05-02'

# For setting up investment parameters
initial_capital_invested = 0.8

# For training and determining the signal to trade
testing_date = '2023-03-01'
decision_threshold = 0.001
transaction_amount = 1

#### Preparing the return data

In [3]:
# Get the return data for each of these stocks
# Getting the stock price data
returns = pd.DataFrame(pd.date_range(start_date, end_date), columns = ['Date'])
returns.set_index('Date', inplace=True)

for ticker in tickers:
  ticker_data = yf.download(ticker, start_date, end_date, interval='1d')
  daily_return = ticker_data['Adj Close'].pct_change().dropna()
  daily_return.name = ticker

  returns = returns.merge(daily_return, on='Date')

returns.tail()

[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed


Unnamed: 0_level_0,AAPL,MSFT,TSLA,META,AMZN
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2023-04-25,-0.009436,-0.022536,-0.011566,-0.024625,-0.034272
2023-04-26,-6.1e-05,0.072435,-0.04307,0.008913,0.023496
2023-04-27,0.028395,0.032028,0.041886,0.139255,0.046104
2023-04-28,0.007541,0.007972,0.025719,0.007378,-0.039792
2023-05-01,-0.00053,-0.005533,-0.015093,0.011901,-0.032243


#### Find the maximum sharpe ratio portfolio

In [4]:
# Defining key functions for the optimizer
# Calculate the sharpe ratio from returns data
def get_sr_info(weights, returns):
  weights = np.array(weights)
  ret = np.sum(returns.mean() * weights)
  vol = np.sqrt(np.dot(weights.T, np.dot(returns.cov(), weights)))
  sr = ret/vol
  return [ret, vol, sr]

# minimize negative Sharpe Ratio
def neg_sr(weights): 
	return -1 * get_sr_info(weights, returns)[2]

# check allocation sums to 1
def check_weight_sum(weights): 
	return np.sum(weights) - 1

In [5]:
# Preparing for the optimizer
# create constraint variable
cons = ({'type':'eq','fun':check_weight_sum})

# create weight boundaries
bounds = tuple((0,1) for i in range(len(tickers)))

# create initial guess
guess = [1.0/len(tickers) for i in range(len(tickers))]

In [6]:
# Get the optimized results 
opt_results = minimize(neg_sr, guess, method='SLSQP', bounds=bounds, constraints=cons)
opt_weights = opt_results.x
opt_weights

array([0.20490851, 0.45125632, 0.26578041, 0.01260961, 0.06544515])

#### Use the Fama French model for an trading algorithm

In [7]:
# Getting the fama-french research results
ff_factor = 'F-F_Research_Data_5_Factors_2x3_daily'
ff_factor_data = web.DataReader(ff_factor, 'famafrench', start=start_date, end=end_date)[0]
ff_factor_data = ff_factor_data.div(100)

ff_factor_data.tail()

Unnamed: 0_level_0,Mkt-RF,SMB,HML,RMW,CMA,RF
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2023-03-27,0.0027,0.0051,0.0102,-0.0028,0.0035,0.00016
2023-03-28,-0.0017,-0.0003,0.0074,0.0008,0.0055,0.00016
2023-03-29,0.0139,-0.0034,-0.005,-0.009,-0.0054,0.00016
2023-03-30,0.0051,-0.0061,-0.0059,0.002,-0.0009,0.00016
2023-03-31,0.0153,0.0051,-0.0077,-0.0046,-0.0076,0.00016


In [8]:
# Getting the stock price data
excess_return = returns.sub(ff_factor_data['RF'], axis=0)
ff_data = ff_factor_data.merge(excess_return, on='Date')
ff_data = ff_data.dropna(axis=0)

ff_data.tail()

Unnamed: 0_level_0,Mkt-RF,SMB,HML,RMW,CMA,RF,AAPL,MSFT,TSLA,META,AMZN
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
2023-03-27,0.0027,0.0051,0.0102,-0.0028,0.0035,0.00016,-0.012453,-0.015094,0.007193,-0.015548,-0.001077
2023-03-28,-0.0017,-0.0003,0.0074,0.0008,0.0055,0.00016,-0.00414,-0.004321,-0.013819,-0.010809,-0.00832
2023-03-29,0.0139,-0.0034,-0.005,-0.009,-0.0054,0.00016,0.019631,0.019024,0.02463,0.023111,0.030794
2023-03-30,0.0051,-0.0061,-0.0059,0.002,-0.0009,0.00016,0.00973,0.01246,0.007061,0.011966,0.017296
2023-03-31,0.0153,0.0051,-0.0077,-0.0046,-0.0076,0.00016,0.015484,0.014802,0.062212,0.019567,0.012487


In [9]:
# Define our X and Y variables
X = ff_data[['Mkt-RF', 'SMB', 'HML', 'RMW', 'CMA']]
Y = ff_data.drop(columns={'Mkt-RF', 'SMB', 'HML', 'RMW', 'CMA', 'RF'}, axis=1)

In [10]:
# Split into testing/training data
X_train = X[X.index < testing_date]
X_test = X[X.index >= testing_date]
Y_train = Y[Y.index < testing_date]
Y_test = Y[Y.index >= testing_date]

In [11]:
# Using the linear model from Sklearn
lr_model = LinearRegression(fit_intercept=True)
lr_model = lr_model.fit(X_train, Y_train)
predictions = lr_model.predict(X_test)
predictions = predictions.transpose()

In [12]:
# Do comparisons betweeen the prediction and testing data
results = Y_test.copy()

for ind in range(len(tickers)): 
  results[tickers[ind] + '_Pred'] = predictions[ind]
  
results.head()

Unnamed: 0_level_0,AAPL,MSFT,TSLA,META,AMZN,AAPL_Pred,MSFT_Pred,TSLA_Pred,META_Pred,AMZN_Pred
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2023-03-01,-0.014406,-0.012789,-0.014452,-0.008849,-0.022021,-0.007109,-0.009736,-0.011776,-0.01269,-0.013817
2023-03-02,0.003969,0.019493,-0.058699,0.006241,-0.000594,0.010756,0.010502,0.00979,0.007757,0.008444
2023-03-03,0.03493,0.016486,0.035932,0.061262,0.029906,0.019675,0.022438,0.032889,0.026289,0.026296
2023-03-06,0.018379,0.006029,-0.020282,-0.002049,-0.012278,0.003967,0.007316,-0.006481,0.002005,0.005816
2023-03-07,-0.014656,-0.010749,-0.031634,-0.002269,-0.002293,-0.015471,-0.016953,-0.008812,-0.012152,-0.012045


In [14]:
# Computing the rolling weekly average
window = 5
rolling_w_avg = results.rolling(window).mean().dropna()

# Construct the signal to buy if rolling weekly of prediction is higher than returns by the decision threshold
# sell if vice versa
signals = rolling_w_avg.copy()
decision = pd.DataFrame(index=signals.index)

for ticker in tickers:
  
  signals[ticker + '_Buy'] = np.where(signals[ticker + '_Pred'] > signals[ticker] + decision_threshold, 1.0 * transaction_amount, 0.0)
  signals[ticker + '_Sell'] = np.where(signals[ticker + '_Pred'] < signals[ticker] - decision_threshold, -1.0 * transaction_amount, 0.0)
  
  decision[ticker] = signals[ticker + '_Buy'] + signals[ticker + '_Sell']

signals.tail()

Unnamed: 0_level_0,AAPL,MSFT,TSLA,META,AMZN,AAPL_Pred,MSFT_Pred,TSLA_Pred,META_Pred,AMZN_Pred,AAPL_Buy,AAPL_Sell,MSFT_Buy,MSFT_Sell,TSLA_Buy,TSLA_Sell,META_Buy,META_Sell,AMZN_Buy,AMZN_Sell
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
2023-03-27,0.001004,0.002944,0.009678,0.005004,0.000641,0.002184,0.003016,0.003794,0.003502,0.003851,1.0,0.0,0.0,0.0,0.0,-1.0,0.0,-1.0,1.0,0.0
2023-03-28,-0.00218,0.000973,-0.008694,-0.001524,-0.006927,-0.00104,-0.001545,-0.004182,-0.003339,-0.002894,1.0,0.0,0.0,-1.0,1.0,0.0,0.0,-1.0,1.0,0.0
2023-03-29,0.003598,0.005898,0.002773,0.005455,0.003061,0.004225,0.004853,0.008027,0.004995,0.004847,0.0,0.0,0.0,-1.0,1.0,0.0,0.0,0.0,1.0,0.0
2023-03-30,0.004183,0.004478,0.003098,0.003406,0.006532,0.004276,0.004632,0.006245,0.003561,0.003238,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,-1.0
2023-03-31,0.00565,0.005374,0.017455,0.005657,0.010236,0.007147,0.008125,0.013873,0.009166,0.008839,1.0,0.0,1.0,0.0,0.0,-1.0,1.0,0.0,0.0,-1.0


In [15]:
decision.reset_index(inplace=True)
decision['Date'] = decision['Date'].astype(str)
decision.tail()

Unnamed: 0,Date,AAPL,MSFT,TSLA,META,AMZN
14,2023-03-27,1.0,0.0,-1.0,-1.0,1.0
15,2023-03-28,1.0,-1.0,1.0,-1.0,1.0
16,2023-03-29,0.0,-1.0,1.0,0.0,1.0
17,2023-03-30,0.0,0.0,1.0,0.0,-1.0
18,2023-03-31,1.0,1.0,-1.0,1.0,-1.0


#### Export decisions as a JSON file

In [17]:
# Convert the decisions to JSON
out_file = 'fama_french.json'

info = {'tickers' : tickers,
        'initial_capital_invested' : initial_capital_invested,
        'initial_weight' : opt_weights.tolist(),
        'start_date' : testing_date,
        'end_date' : end_date}

with open(out_file, mode='w') as f:
    json.dump({'info' : info, 'decision' : decision.to_dict('r')}, fp=f, indent=2)

  json.dump({'info' : info, 'decision' : decision.to_dict('r')}, fp=f, indent=2)
