In [2]:
import pandas as pd
import os
import numpy as np
import matplotlib.pyplot as plt
import pypfopt
from pypfopt.expected_returns import mean_historical_return
from pypfopt.efficient_frontier import EfficientFrontier
from pypfopt import risk_models

os.getcwd()

'/Users/edvardohlstrom/Documents/UCLouvain/Big data in finance'

In [51]:
def read_clean_data(path):
    df = pd.read_csv(path,low_memory=False)
    df = df.dropna(axis=0) # removes rows with null values
    if "Daily" in path: 
        df ['Date'] = pd.to_datetime(df ['Date'],format="%Y%m%d")
        df  = df [(df ['Date'] >= "1970-01-01") & (df ['Date'] <= "2022-12-31" )]
        df ['Date'] = df ['Date'].dt.strftime('%Y-%m-%d')
    else:
        df['Date'] = pd.to_datetime(df['Date'],format="%Y%m")
        df = df[(df['Date'] >= "1970-01") & (df['Date'] <= "2022-12" )]
    #print(df.info)
    df  = df.drop(df.columns[0], axis=1) # remove date column
    df = df.astype(float)
    df  = df*(0.01)
    
    #test1
    if df.isnull().values.any():
        print("DataFrame contains null values")
    
    #test2
    non_numeric_df = df.select_dtypes(exclude=['number'])
    non_numeric_values = [val for val in non_numeric_df.stack().tolist()]
    if non_numeric_values == 0:
        print("DataFrame contain non decimal values")

    print( "Read and cleaned data from path: ", path)
    return df

df = read_clean_data("Data/10_Industry_Portfolios.csv")
#df = read_clean_data("Data/10_Industry_Portfolios_Daily.csv")
#df = read_clean_data("Data/25_Portfolios_5x5.CSV")
#df = read_clean_data("Data/25_Portfolios_5x5_Daily.csv")
#df = read_clean_data("Data/100_Portfolios_10x10.CSV")
#df = read_clean_data("Data/100_Portfolios_10x10_Daily.csv")
#df = read_clean_data("Data/48_Industry_Portfolios.CSV")
#df = read_clean_data("Data/48_Industry_Portfolios_Daily.csv")


Read and cleaned data from path:  Data/10_Industry_Portfolios.csv


In [48]:
#daily
idx_s = 0
idx_e = 2526
idx_overlap = 105
num_of_windows = 86 
min_var = []

for i in range(num_of_windows):
    temp = df.iloc[(i*105):(2526+i*105)]
    mu = mean_historical_return(temp, returns_data=True, compounding=False)
    S = risk_models.sample_cov(temp, returns_data=True)
    ef = EfficientFrontier(mu, S)
    ef.min_volatility() 
    temp2 = df.iloc[(2526+i*105):(2526+105+i*105)]
    m = np.mean(temp2.to_numpy()*ef.weights)*12
    var = np.std(temp2.to_numpy()*ef.weights)*np.sqrt(12)
    sharp = m/var
    min_var.append([m,var,sharp])



In [58]:
#monthly
idx_s = 0
idx_e = 120
idx_overlap = 6
num_of_windows = 86 
min_var = []

for i in range(num_of_windows):
    temp = df.iloc[(i*6):(120+i*6)]
    mu = mean_historical_return(temp, returns_data=True, compounding=False, frequency=12)
    S = risk_models.sample_cov(temp, returns_data=True, frequency=12)
    ef = EfficientFrontier(mu, S)
    ef.min_volatility() 
    temp2 = df.iloc[(120+i*6):(126+i*6)]
    m = np.mean(temp2.to_numpy()*ef.weights)*12
    var = np.std(temp2.to_numpy()*ef.weights)*np.sqrt(12)
    sharp = m/var
    min_var.append([m,var,sharp])


Compare the in-sample and out-of-sample performance of three portfolio strategies: sample mean-variance portfolio, sample minimum-variance portfolio and equally weighted portfolio. For the in-sample performance, compute the three portfolios on all returns directly. For the out-of-sample performance, implement a rolling-window approach: use an estimation window of 10 years, compute the out-of-sample performance on the next six months, and then roll over the windows by six months until the end of the sample is achieved. Report the performance in terms of annualized mean, volatility and Sharpe ratio.

Discuss your results and explain why you obtain such results. What is the impact of the return frequency (daily or monthly)? How stable are the portfolio weights over time (e.g. by computing the turnover or plotting boxplots)?

Sample mean-variance portfolio

In [52]:

mu = mean_historical_return(df, returns_data=True, compounding=False, frequency=12)
S = risk_models.sample_cov(df, returns_data=True, frequency=12)


In [53]:
# Max sharpe portofolio
ef = EfficientFrontier(mu, S)
ef.max_sharpe()
ef.portfolio_performance(verbose=True)

Expected annual return: 12.1%
Annual volatility: 13.2%
Sharpe Ratio: 0.77


(0.12144871655779223, 0.13192098940806246, 0.7690111862638297)

In [54]:
#sample minimum-variance portfolio
ef = EfficientFrontier(mu, S)
ef.min_volatility() 
ef.portfolio_performance(verbose=True)
ef.weights

Expected annual return: 11.4%
Annual volatility: 12.7%
Sharpe Ratio: 0.74


array([0.15513921, 0.        , 0.        , 0.01260474, 0.        ,
       0.18241837, 0.        , 0.16860603, 0.48123164, 0.        ])

In [None]:
# sample mean-variance portfolio
# maybe same as max sharpe

In [57]:
# equally weighted portfolio
ef = EfficientFrontier(mu, S)


my_list = df.columns.to_list()
my_dict = {k: 0.1 for k in my_list}

ef.set_weights(my_dict)
ef.portfolio_performance(verbose=True)



Expected annual return: 11.9%
Annual volatility: 15.2%
Sharpe Ratio: 0.65


(0.1185569811320755, 0.1518092285364946, 0.6492160067092536)