# A Scalable Gradient-Based Optimization Framework for Sparse Minimum-Variance Portfolio Selection

This file provides a general instrusction on how to run the Grid-FW method (in python)


In [None]:
"""
Uncomment the next five lines if you want to specify the number of treads used.
"""
# import os
# os.environ["OMP_NUM_THREADS"]     = "1"
# os.environ["MKL_NUM_THREADS"]     = "1"
# os.environ["OPENBLAS_NUM_THREADS"]= "1"
# os.environ["NUMEXPR_NUM_THREADS"] = "1"

Load the required packages, including portfolio.py where Grid-FW method is implemented

In [None]:
import portfolio as pf # Loads our portfolio.py
import pandas as pd
import numpy as np
import time

# Table 1

Following code provides results in Table 1 of the paper.

In [None]:
"""
Specify the paths to the two datasets
"""
file31 = "./data/example1/31assets.csv"
file85 = "./data/example1/85assets.csv"

"""
Load the datasets as a numpy array covaraince matrices
"""
df = pd.read_csv(file31, header=None)
Sigma31 = df.to_numpy()

df = pd.read_csv(file85, header=None)
Sigma85 = df.to_numpy()

"""
Specify the paramaters of Grid-FW method
n : number of epcohs
m : number of FW steps in each epoch
"""

n = 500
m = 10

"""
Specify the list of k values
"""
k_list = range(1, 11)

"""
Run our method Grid-FW for the above list of k values
"""

print("\n---------------------------------------------")
print("     Results for the dataset with p = 31")
print("\n---------------------------------------------")

for k in k_list:
    tic = time.process_time()
    result = pf.grid_fw(Sigma31, k, maxepoch=n, maxiter=m) 
    toc = time.process_time()  
    
    print('\n----------- k =', k, '------------')
    print("Best model: ", result['model_best'])
    print("Final model:", result['model_final'])
    print("Best var:  ", pf.sci(result['var_best']))
    print("Final var:  ", pf.sci(result['var_final']))
    print("No of FW steps: ", result['nepoch']*m)
    print("Time:", round(toc-tic, 2), " sec")  
    
    
print("\n---------------------------------------------")
print("     Results for the dataset with p = 85")
print("\n---------------------------------------------")
for k in k_list:
    tic = time.process_time()
    result = pf.grid_fw(Sigma85, k, maxepoch=n, maxiter=m) 
    toc = time.process_time()  
    
    print('\n----------- k =', k, '------------')
    print("Best model: ", result['model_best'])
    print("Final model:", result['model_final'])
    print("Best var:  ", pf.sci(result['var_best']))
    print("Final var:  ", pf.sci(result['var_final']))
    print("No of FW steps: ", result['nepoch']*m)
    print("Time:", round(toc-tic, 2), " sec") 


## Example 1 in Table 2
This following code provides Example 1 results in Table 2 in the paper. Change the file path to select one of the three datasets.

In [None]:

"""
Specify the path to the dataset: uncomment only one of three options
"""
file = "./data/example1/89assets.csv"
#file = "./data/example1/98assets.csv"
#file = "./data/example1/225assets.csv"

"""
Load the datasets as a numpy array covaraince matrices
"""
df = pd.read_csv(file, header=None)
Sigma = df.to_numpy()
p = Sigma.shape[0]

"""
Specify the paramaters of Grid-FW method
n : number of epcohs
m : number of FW steps in each epoch
"""
n = 500
m = 10

"""
Specify the list of k values
"""
k_list = [int(z*p) for z in (0.10, 0.25, 0.50)]

"""
Compute the largest and smallest eigenvalues of Sigma using the power method, 
as well as the largest and smallest varainces. 
Note that diagonal of Sigma provides the varainces of the assets.
"""
eta_1, _ = pf.power_method(Sigma)
eta_p, _ = pf.power_method(eta_1*np.identity(p) - Sigma) 
eta_p = -eta_p + eta_1
v  = np.diag(Sigma)

text = f"""
Dataset path:         {file}
Largest eigenvalue:   {eta_1}
Smallest eigenvalue:  {eta_p}
Max variance:         {np.max(v)}
Min variance:         {np.min(v)}
"""

pf.print_in_box(text)

"""
Run our method Grid-FW for the above list of k values
"""

model_var_list = []
time_list = []
print("Executing .......")
for k in k_list:
    tic = time.process_time()
    result = pf.grid_fw(Sigma, k, maxepoch=n, maxiter=m) 
    toc = time.process_time()  
    
    model_var_list.append((result['model_best'], result['var_best'])) 
    time_list.append(toc-tic)
print("....... done!")

print("\n---------------------------------------------")
print("     Results for the dataset with p = ", p)
print("---------------------------------------------")

for i in range(len(k_list)):
    print("\nSize k   :", k_list[i])
    print("Variance :", model_var_list[i][1])
    print("Time     :", round(time_list[i], 4), "sec")
    print("Model    :\n", model_var_list[i][0])

## Example 2 in Table 2
This following code provides Example 2 results in Table 2 in the paper. Change the file path to select one of the three datasets.

In [None]:
"""
Specify the path to the dataset: uncomment only one of three options
"""
file = "./data/example2/2000-2007.csv"
# file = "./data/example2/2008-2015.csv"
# file = "./data/example2/2016-2023.csv"

"""
Load the datasets as a numpy array covaraince matrices
"""
df = pd.read_csv(file, header=None)
Sigma = df.to_numpy()
p = Sigma.shape[0]

"""
Specify the paramaters of Grid-FW method
n : number of epcohs
m : number of FW steps in each epoch
"""
n = 500
m = 10

"""
Specify the list of k values
"""
k_list = [int(z*p) for z in (0.10, 0.25, 0.50)]

"""
Compute the largest and smallest eigenvalues of Sigma using the power method, 
as well as the largest and smallest varainces. 
Note that diagonal of Sigma provides the varainces of the assets.
"""
eta_1, _ = pf.power_method(Sigma)
eta_p, _ = pf.power_method(eta_1*np.identity(p) - Sigma) 
eta_p = -eta_p + eta_1
v  = np.diag(Sigma)

text = f"""
Dataset path:         {file}
Largest eigenvalue:   {eta_1}
Smallest eigenvalue:  {eta_p}
Max variance:         {np.max(v)}
Min variance:         {np.min(v)}
"""
pf.print_in_box(text)


"""
Run our method Grid-FW for the above list of k values
"""
model_var_list = []
time_list = []
print("Executing .......")
for k in k_list:
    tic = time.process_time()
    result = pf.grid_fw(Sigma, k, maxepoch=n, maxiter=m) 
    toc = time.process_time()  
    
    model_var_list.append((result['model_best'], result['var_best'])) 
    time_list.append(toc-tic)

print("....... done!")
print("\n---------------------------------------------")
print("     Results for the dataset with p = ", p)
print("---------------------------------------------")

for i in range(len(k_list)):
    print("\nSize k   :", k_list[i])
    print("Variance :", model_var_list[i][1])
    print("Time     :", round(time_list[i], 4), "sec")
    print("Model    :\n", model_var_list[i][0])

## Example 3 in Table 2
This following code provides Example 3 results in Table 2 in the paper. Change the file path to select one of the three datasets. Note that the third synthetic dataset with p = 3000 used in this example is NOT included in the supplementary material due to 100MB upload limitation on submission. 

In [None]:
"""
Specify the path to the dataset: uncomment only one of three options
"""
file = "./data/example3/1000assets.csv"
# file = "./data/example3/2000assets.csv"

"""
Load the datasets as a numpy array covaraince matrices
"""
df = pd.read_csv(file, header=None)
Sigma = df.to_numpy()
p = Sigma.shape[0]

"""
Specify the paramaters of Grid-FW method
n : number of epcohs
m : number of FW steps in each epoch
"""
n = 500
m = 10

"""
Specify the list of k values
"""
k_list = [int(z*p) for z in (0.10, 0.25, 0.50)]

#%%
"""
Compute the largest and smallest eigenvalues of Sigma using the power method, 
as well as the largest and smallest varainces. 
Note that diagonal of Sigma provides the varainces of the assets.
"""
eta_1, _ = pf.power_method(Sigma)
eta_p, _ = pf.power_method(eta_1*np.identity(p) - Sigma) 
eta_p = -eta_p + eta_1
v  = np.diag(Sigma)

text = f"""
Dataset path:         {file}
Largest eigenvalue:   {eta_1}
Smallest eigenvalue:  {eta_p}
Max variance:         {np.max(v)}
Min variance:         {np.min(v)}
"""
pf.print_in_box(text)


"""
Run our method Grid-FW for the above list of k values
"""
model_var_list = []
time_list = []

print("Executing .......")
for k in k_list:
    tic = time.process_time()
    result = pf.grid_fw(Sigma, k, maxepoch=n, maxiter=m) 
    toc = time.process_time()  
    
    model_var_list.append((result['model_best'], result['var_best'])) 
    time_list.append(toc-tic)
print("....... done!")
print("\n---------------------------------------------")
print("     Results for the dataset with p = ", p)
print("---------------------------------------------")

for i in range(len(k_list)):
    print("\nSize k   :", k_list[i])
    print("Variance :", model_var_list[i][1])
    print("Time     :", round(time_list[i], 4), "sec")
    #print("Model    :\n", model_var_list[i][0])