In [1]:
import pandas as pd
import scipy.optimize as sco
import numpy as np
import pandas_datareader as web
from scipy import stats
from sklearn.linear_model import LinearRegression
import statsmodels.api as smf
import urllib.request
import zipfile

In [2]:
def get_fama_french():
    # Web url
    ff_url = "https://mba.tuck.dartmouth.edu/pages/faculty/ken.french/ftp/F-F_Research_Data_Factors_CSV.zip"
    
    # Download the file and save it  
    urllib.request.urlretrieve(ff_url,'fama_french.zip')
    zip_file = zipfile.ZipFile('fama_french.zip', 'r')
    
    # Extact the file data
    zip_file.extractall()
    zip_file.close()
    
    ff_factors = pd.read_csv('F-F_Research_Data_Factors.csv', skiprows = 3, index_col = 0)
   
    # Skip null rows
    ff_row = ff_factors.isnull().any(1).nonzero()[0][0]
    
    # Read the csv file again with skipped rows
    ff_factors = pd.read_csv('F-F_Research_Data_Factors.csv', skiprows = 3, nrows = ff_row, index_col = 0)
    
    # Format the date index
    ff_factors.index = pd.to_datetime(ff_factors.index, format= '%Y%m')
    
    # Format dates to end of month
    ff_factors.index = ff_factors.index + pd.offsets.MonthEnd()
    
    # Convert from percent to decimal
    ff_factors = ff_factors.apply(lambda x: x/ 100)
    return ff_factors

In [3]:
def get_return_data(price_data, period = "M"):
    
    # Resample the data to monthly price
    price = price_data.resample(period).last()
    
    # Calculate the percent change
    ret_data = price.pct_change()[1:]
    
    # Convert from series to dataframe
    ret_data = pd.DataFrame(ret_data)
    
    return ret_data

In [4]:
def get_mu_Q(price_data):
    # Calculate excess returns and factor returns for CAPM model
    exRets = get_return_data(price_data)

    ff_data = get_fama_french()
    ff_data = ff_data.iloc[:,0:3] 
    
    ff_last = ff_data.index[ff_data.shape[0] - 1].date()
    exRets = exRets.loc[:ff_last]
    price_first = exRets.index[0].date()
    ff_data = ff_data.loc[price_first:]
    
    # Calculate the factor expected excess return from historical data using the geometric mean
    gmean = stats.gmean(ff_data + 1,axis=0) - 1
    expmkt_rf = gmean[0]
    expSMB = gmean[1]
    expHML = gmean[2]
    
    # Calculate the factor covariance matrix
    F = ff_data.cov()
    
    # Set up X and Y to determine alpha and beta
    ff_data['Ones'] = [1 for i in range(len(ff_data))]
    X = ff_data
    Y = exRets
    X = X.to_numpy()
    Y = Y.to_numpy()
    
   # Determine alpha and beta
    model = LinearRegression().fit(X,Y)
    alpha = model.intercept_
    beta = model.coef_[:,0:3]

#     soln = np.matmul(np.matmul(np.linalg.inv(np.matmul(X.T, X)),X.T),Y)
#     alpha = soln[3,:].T
#     beta = soln[0:3, :].T
    
    betamkt_rf = beta[:,0]
    betaSMB = beta[:,1]
    betaHML = beta[:,2]

    # Calculate the residuals    
    #epsilon = exRets.to_numpy() - np.matmul(X,soln)
    epsilon = exRets.to_numpy() - np.matmul(X,[betamkt_rf, betaSMB, betaHML, alpha])
    
    # Calculate the residual variance with "N - p - 1" degrees of freedom
    p = 3
    sigmaEp = np.sum(epsilon**2, axis=0) / (len(exRets) - 1 - p)
    
    #  Calculate the asset expected excess returns
    mu = model.predict([[expmkt_rf, expSMB, expHML, 1]])[0]
    #mu = alpha + betamkt_rf*expmkt_rf + betaSMB*expSMB + betaHML*expHML
    
    # Calculate the diagonal matrix of residuals and the asset covariance matrix
    D = np.diag(sigmaEp)
    
    # Calculate the covariance matrix
    Q = np.matmul(np.matmul(beta,F.to_numpy()),beta.T)+D

    
    return mu, Q

In [5]:
def portfolio_volatility(weights, mean_returns, cov_matrix):
    returns = np.sum(mean_returns*weights ) *252
    std = np.sqrt(np.dot(weights.T, np.dot(cov_matrix, weights))) * np.sqrt(252)
    return std

In [6]:
def min_variance(mean_returns, cov_matrix):
    num_assets = len(mean_returns)
    args = (mean_returns, cov_matrix)
    constraints = ({'type': 'eq', 'fun': lambda x: np.sum(x) - 1})
    bound = (0.0,1.0)
    bounds = tuple(bound for asset in range(num_assets))

    result = sco.minimize(portfolio_volatility, num_assets*[1./num_assets,], args=args,
                        method='SLSQP', bounds=bounds, constraints=constraints)

    return result

In [7]:
price_data = pd.read_csv("../Data/sp500df.csv", index_col='Date')
price_data.index = pd.to_datetime(price_data.index)
mu, Q = get_mu_Q(price_data)
min_variance(mu,Q)

  app.launch_new_instance()


     fun: 0.2239686993097294
     jac: array([0.45309086, 0.51591033, 0.77172032, 0.22328914, 0.22366494,
       0.47084977, 0.52518821, 0.31703656, 0.45536122, 0.37977513,
       0.31338509, 0.61604965, 1.36776608, 0.39130559, 0.24591867,
       0.52818402, 0.44658568, 0.23624467, 0.33763205, 0.29346129,
       0.53812006, 0.87478714, 0.39981923, 0.3974089 , 0.52134684,
       0.35900068, 0.33384814, 0.64182082, 0.31798623, 0.46201585,
       0.23770774, 0.51628045, 0.51302525, 0.49952434, 0.4481214 ,
       0.42943227, 3.91689822, 0.5331063 , 0.66629291, 1.07792547,
       0.41299752, 0.22337252, 0.44461599, 0.53413233, 0.37749874,
       0.27812668, 0.50435287, 0.59075624, 0.54673038, 0.30291635,
       0.50574864, 0.87205822, 0.6109745 , 0.22552505, 0.37973019,
       0.24861998, 0.22361251, 0.48491574, 0.58850474, 0.32587798,
       0.37652555, 0.25398145, 0.22355611, 0.25892802, 0.93297519,
       0.66278133, 0.31840171, 0.82713431, 0.38760139, 0.44965598,
       0.38750638, 0.46