In [16]:
import numpy as np
import pandas as pd
from cvxpy import * 

# Define the functions

In [35]:
def get_data():
    '''
    Get stock data for performing optimization. 
    Assumes data is in a t-by-n format with t months of observations
    along the rows and n stocks to choose from in columns.
    Assumes no missing values.
    
    Returns:
         Monthly returns data in a t-by-n formated dataframe with 
         a time-formatted index
    '''
    df = pd.read_csv('../../../data/monthly.csv'
                     , parse_dates = True
                     , index_col = 0)
    return(df)

In [18]:
def train_test_split(data, start_year):
    '''
    Split the data up into training and test periods
    
    Args: 
        data: a n stocks by t periods data-frame with a date-time index
        start_year: an integer indicating the start year for the training period
        
    Returns:
        tuple of training and testing data as pandas dataframes
        
    Usage: 
        training1, testing1 = train_test_split(stocks, 1986)
    '''
    
    training = data[str(start_year):str(start_year + 5)]
    testing = data[str(start_year + 6)]
    return(training, testing)

In [19]:
def ret_cov_est(training):
    '''
    Estimate the returns and covariance for the training period.
    
    Args:
        training: a n stocks by t periods data-frame with a date-time index
    
    Returns:
        A tuple with a n-by-1 matrix of estimated returns 
        and a n-by-n matrix of estimated covariance. n is the number of stocks.
    '''
    r_hat = np.asmatrix(np.mean(training)).T
    Sigma = np.asmatrix(np.cov(training, rowvar=False))
    return(r_hat, Sigma)

In [20]:
def get_equal_weights(data):
    '''
    Get a vector of equal weights
    
    Args:
        data: a t-by-n dataframe or matrix (n is number of stocks)
        
    Returns:
        a n-by-1 matrix of 1/n weights
    '''
    
    n = data.shape[1]
    x = np.asmatrix(np.ones(shape = (n, 1))) / n
    return(x)

In [30]:
def optimal_portfolio(r_hat, Sigma, tau, mu = None, sigma_2_hat = None):
    '''
    (1) For a given estimated return floor, get the weights that 
    minimize the variance with an l1 norm of the weights. OR
    (2) For a given acceptable risk level, get the weights that maximize 
    the returns witha  l1 norm of the weights.
    
    Args:
        r_hat: estimated returns as a n-by-1 matrix
        Sigma: estimated covariance as a n-by-n matrix
        tau: tuning parameter. (Larger values promote more sparsity.)
        mu: the minimum return that the portfolio must beat
        
    Returns:
        optimal weights as a n-by-1 matrix
    '''
    
    # Define the variables
    n = r_hat.shape[0]
    x = Variable(n)
    ret = r_hat.T*x 
    risk = quad_form(x, Sigma)
    
    # Define the problem
    if((sigma_2_hat is None) & (mu is not None)):        
        # Minimize variance for given returns threshold
        print('Minimizing variance')
        objective = Minimize(risk + tau*norm(x, 1))
        constraints = [sum_entries(x) == 1, x >= 0, ret >= mu]
        
    elif ((sigma_2_hat is not None) & (mu is None)):        
        # Maximize Returns for a given variance threshold
        print('Maximizing returns')
        objective = Maximize(ret - tau*norm(x, 1))
        constraints = [sum_entries(x) == 1, x >= 0, risk <= sigma_2_hat]
        
    else:
        raise Exception('Please enter arguments for one of the following: mu or sigma_2_hat')
    
    # Solve the problem
    prob = Problem(objective, constraints)
    prob.solve()
    
    # Get the values of interest
    minimal_risk = risk.value
    optimal_x = x.value

    # Handling rounding of x's
    optimal_x = np.around(optimal_x, decimals = 4)
    optimal_x =  np.asmatrix(optimal_x / sum(optimal_x))
    
    # Print message about how many stocks were chosen
    chosen = sum(optimal_x > 0)[0,0]
    chosen_p = np.around(100 * chosen/n, decimals = 2)
    print('{0} stocks ({1}%)'.format(chosen, chosen_p))
    
    return(optimal_x)

In [31]:
def risk_return(x, data):
    '''
    Calculate the risk, returns for a portfolio with a given set of weights
    
    Args:
        x: wieghts to be applied for the portfolio as a n-by-1 matrix
        data: an t-by-n pandas dataframe or numpy matrix
        
    Returns:
        A tuple of two floats: one the risk and one the return
    '''
    
    r_hat = np.asmatrix(np.mean(data)).T
    Sigma = np.asmatrix(np.cov(data, rowvar=False))
    ret = (r_hat.T * x)[0,0]
    risk = (x.T * Sigma * x)[0,0]
    return(ret, risk)

In [32]:
def format_results(results, test, strategy, tau, num_stocks):
    '''
    Format the results of the risk, returns into a data-frame 
    for plotting later
    
    Args: 
        results: numpy array with returns, risk results
        test: 1 or 0 depending on whether we are talking about in or 
                out of sample performance 
        strategy: 
            2: returns maximized
            1: variance minimized
            0: equal weight
        tau: the tau parameter used in the simulation
        num_stocks: the number of stocks selected for the portfolio
        
    Returns: 
        Dataframe with the following columns:
            return: the return for the portfolio
            risk: the risk for the portfolio
            strategy: 1 if these are results from optimized weights or 
                        0 if they are from equal weighting strategy
            year: if training, the start year of the training period, 
                    if testing the evaluation year
            test: binary, 1 for out-of-sample, 0 for in-sample
            period: one of 6 values indicating which period of six years 
                        its in
            tau: the tau parameter from the optimization
            num_stocks: the number of stocks selected for the portfolio
    '''
    years = np.arange(1986, 2010 + 1)
    df = pd.DataFrame(results, columns = ['return', 'risk'])
    df['strategy'] = strategy
    df['year'] = years + 6 if test == 1 else years
    df['test'] = test
    df['period'] = np.repeat(np.array([1, 2, 3, 4, 5]), 5)
    df['tau'] = tau
    df['num_stocks'] = num_stocks
    return(df)

# Run the dang thing

In [33]:
# Set constants
START_YEAR = 1986
TAU = 1

# Create empty matrices to store the results
res_train_equal = np.asmatrix(np.zeros(shape = (25, 2)))
res_train_min_var = np.asmatrix(np.zeros(shape = (25, 2)))
res_train_max_ret = np.asmatrix(np.zeros(shape = (25, 2)))
res_test_equal = np.asmatrix(np.zeros(shape = (25, 2)))
res_test_min_var = np.asmatrix(np.zeros(shape = (25, 2)))
res_test_max_ret = np.asmatrix(np.zeros(shape = (25, 2)))
num_min_var = np.asmatrix(np.zeros(shape = (25, 1)))
num_max_ret = np.asmatrix(np.zeros(shape = (25, 1)))

# Get stocks
stocks = get_data()

# Calculate the equal weights vector
x_equal = get_equal_weights(stocks)

for i in range(25):
    
    # Set the period starting point
    start_year = 1986 + i
    
    # Get train, test split
    training, testing = train_test_split(stocks, start_year)
    
    # 1. Calculate the risk, returns for naive approach
    res_train_equal[i,:] = risk_return(x_equal, training)
    res_test_equal[i,:] = risk_return(x_equal, testing)
    
    # 2. Estimate the risk, returns from the training data
    r_hat, Sigma = ret_cov_est(training)
    
    # 3. Get the optimized weights
    # Note: I am using the naive portfolio for theshholds
    x_min_var = optimal_portfolio(r_hat = r_hat, Sigma = Sigma, tau = TAU, mu = res_train_equal[i,0])
    x_max_ret = optimal_portfolio(r_hat = r_hat, Sigma = Sigma, tau = TAU, sigma_2_hat = res_train_equal[i,1])
    
    # 4. Calculate the risk, returns for the optimized approachs
    res_train_min_var[i,:] = risk_return(x_min_var, training)
    res_test_min_var[i,:] = risk_return(x_min_var, testing)
    res_train_max_ret[i,:] = risk_return(x_max_ret, training)
    res_test_max_ret[i,:] = risk_return(x_max_ret, testing)
    
    # Store the number of stocks selected for record keeping purposes
    num_min_var[i,0] = sum(x_min_var > 0)
    num_max_ret[i,0] = sum(x_max_ret > 0)

Minimizing variance
27 stocks (8.63%)
Maximizing returns
16 stocks (5.11%)
Minimizing variance
25 stocks (7.99%)
Maximizing returns
16 stocks (5.11%)
Minimizing variance
40 stocks (12.78%)
Maximizing returns
23 stocks (7.35%)
Minimizing variance
38 stocks (12.14%)
Maximizing returns
27 stocks (8.63%)
Minimizing variance
33 stocks (10.54%)
Maximizing returns
24 stocks (7.67%)
Minimizing variance
47 stocks (15.02%)
Maximizing returns
30 stocks (9.58%)
Minimizing variance
48 stocks (15.34%)
Maximizing returns
30 stocks (9.58%)
Minimizing variance
38 stocks (12.14%)
Maximizing returns
27 stocks (8.63%)
Minimizing variance
34 stocks (10.86%)
Maximizing returns
23 stocks (7.35%)
Minimizing variance
35 stocks (11.18%)
Maximizing returns
18 stocks (5.75%)
Minimizing variance
39 stocks (12.46%)
Maximizing returns
17 stocks (5.43%)
Minimizing variance
32 stocks (10.22%)
Maximizing returns
14 stocks (4.47%)
Minimizing variance
34 stocks (10.86%)
Maximizing returns
16 stocks (5.11%)
Minimizing var

In [34]:
# Format and write the results
results = [format_results(res_train_equal, 0, 0, 1, stocks.shape[1])
          , format_results(res_train_min_var, 0, 1, 1, num_min_var)
           , format_results(res_train_max_ret, 0, 2, 1, num_max_ret)
    
          , format_results(res_test_equal, 1, 0, 1, stocks.shape[1])
          , format_results(res_test_min_var, 1, 1, 1, num_min_var)
          , format_results(res_test_max_ret, 1, 2, 1, num_max_ret)]
results = pd.concat(results)
results.to_csv('../../../data/results.csv', index=False)