![windowitz](windowitz.png)

## EE378A Final Project, Spring 2016
### Leighton Barnes, Raghav Subramaniam, Sahaana Suri

In [None]:
import sys
import os

import numpy as np
from scipy import io

import matplotlib.pyplot as plt
%matplotlib notebook

import cvxpy

## Get Test Data

Note: `test.mat` should contain the test data (not supplied by us).

In [None]:
mat_dict = io.loadmat('test.mat')

test_cl = mat_dict['test_cl']
test_hi = mat_dict['test_hi']
test_lo = mat_dict['test_lo']
test_op = mat_dict['test_op']
test_vol = mat_dict['test_vol']
numDays, numStocks = test_cl.shape

all_stocks = []
for i in range(numStocks):
    all_stocks.append(np.array_str(mat_dict['test_stocks'][0][i])[3:-2])

## Get Train Data

Note: `portfolio.mat` contains the train data (supplied by us).

In [None]:
mat_dict = io.loadmat('portfolio.mat')

train_cl = mat_dict['train_cl']
train_hi = mat_dict['train_hi']
train_lo = mat_dict['train_lo']
train_op = mat_dict['train_op']
train_vol = mat_dict['train_vol']
numTrain, _ = train_cl.shape

# make data compatible with our algorithms
train_cl = train_cl.T
train_op = train_op.T

## Choose Stocks

In [None]:
fsdax = [455, 47, 322, 216, 190, 269, 106, 389]
fscsx = [304, 197, 456, 279, 338, 112, 7, 227]
fbsox = [456, 279, 118, 6, 227, 179, 178]
fbiox = [84, 60, 192]
the_sahaana_special = [467, 457, 469, 486, 283, 221, 256, 34, 227, 187]

fundStocks = np.unique(fsdax + fscsx + fbsox + fbiox + the_sahaana_special)
numFundStocks = len(fundStocks)

print fundStocks

## Windowitz Algorithm

In [None]:
# params
window_size = 250
update_interval = 125
mu = 1e-2
eta = 30

# build initial p matrix
p_init = np.zeros([numFundStocks, numTrain])

for curr_t in range(1, numTrain):
        
    nan_locs = np.where(np.logical_or(np.isnan(train_cl[fundStocks, curr_t-1]),
               np.isnan(train_cl[fundStocks, curr_t])))[0]
    non_nan_locs = np.where(np.logical_and(np.isfinite(train_cl[fundStocks, curr_t-1]),
                   np.isfinite(train_cl[fundStocks, curr_t])))[0]
        
    p_init[nan_locs, curr_t] = 0
    p_init[non_nan_locs, curr_t] = (train_cl[fundStocks,:][non_nan_locs, curr_t]
        - train_cl[fundStocks,:][non_nan_locs, curr_t-1]) / train_cl[fundStocks,:][non_nan_locs, curr_t-1]
    
# matrices to cache test data
cls = np.zeros([numStocks, numDays])
ops = np.zeros([numStocks, numDays])

def windowitz(t, portfolio_t, hi, lo, cl, op, stocks, vol):
    
    # save data
    cls[:, t] = cl.T
    ops[:, t] = op.T
    
    if t % update_interval is 0:
    
        # build p matrix
        p = np.zeros([numFundStocks, t])
        
        for curr_t in range(1, t):
            nan_locs = np.where(np.logical_or(np.isnan(cls[fundStocks, curr_t-1]),
                       np.isnan(cls[fundStocks, curr_t])))[0]
            #print nan_locs
            non_nan_locs = np.where(np.logical_and(np.isfinite(cls[fundStocks, curr_t-1]),
                           np.isfinite(cls[fundStocks, curr_t])))[0]
        
            p[nan_locs, curr_t] = 0
            p[non_nan_locs, curr_t] = (cls[fundStocks,:][non_nan_locs, curr_t]
                - cls[fundStocks,:][non_nan_locs, curr_t-1]) / cls[fundStocks,:][non_nan_locs, curr_t-1]
        
        # compute statistics
        pbar = np.mean(np.hstack([p_init, p])[:, t+numTrain-window_size:t+numTrain-1], axis=1)
        sigma = np.cov(np.hstack([p_init, p])[:, t+numTrain-window_size:t+numTrain-1])
    
        # set up and solve markowitz problem
        b_opt = cvxpy.Variable(numFundStocks)
        objective = cvxpy.Minimize(-pbar*b_opt + mu*cvxpy.quad_form(b_opt, sigma)
                    + eta*cvxpy.pnorm(b_opt - portfolio_t[fundStocks], 2))
        constraints = [cvxpy.sum_entries(b_opt) >= 0.1, cvxpy.sum_entries(b_opt) <= 1.0]
        problem = cvxpy.Problem(objective, constraints)
        problem.solve(verbose=False, solver='ECOS')
                           
        b_opt = b_opt.value
        
        portfolio = np.zeros(numStocks)
        portfolio[fundStocks] = b_opt
        
        portfolio /= np.sum(portfolio)
    
        return portfolio, np.zeros(numStocks, dtype=bool)
    
    else:
        
        return portfolio_t, np.ones(numStocks, dtype=bool)

## Framework Code

In [None]:
def rebalance(value_vec, value_realizable, portfolio_dst, cost_rate):
    """ % This function simulates the process of rebalancing. The key difficulty 
    % here is to compute the transaction cost, i.e., to solve the equation
    %
    % sum( cost_rate * abs( portfolio_dst .* (value_realizable - C) -
    % value_vec) ) = C
    % 
    % where C is the total transcation cost. We use the Banach contraction 
    % theorem to solve this equation, with convergence rate cost_rate^t for 
    % t-th iteration. 

    % Input: 
    %%% value_vec: current values of each active stock
    %%% value_realizable: total cash realizable value
    %%% portfolio_dst: the desired portfolio after rebalancing
    %%% cost_rate: ratio of transactions cost (0.0005 in our case)

    % Output:
    %%% new_value_vec: the value vector after rebalancing
    %%% trans_cost: the total transaction cost"""
    
    iter_num = 7
    trans_cost = 0
    for iter in range(iter_num):
        trans_cost = np.sum(cost_rate * np.abs(portfolio_dst * \
                        (value_realizable-trans_cost)-value_vec))
        
    new_value_vec = portfolio_dst * (value_realizable - trans_cost)
    return new_value_vec, trans_cost

In [None]:
value_op_seq = np.zeros(numDays) # total value before open of each day
value_cl_seq = np.zeros(numDays) # total value before close of each day 
portfolio_seq = np.zeros((numStocks, numDays)) # portfolio before open of each day
last_close_price = np.NaN * np.ones(numStocks)
print numDays, numStocks

In [None]:
# Initialization
op = test_op[0,:]
cl = np.NaN * np.ones(numStocks)
hi = np.NaN * np.ones(numStocks)
lo = np.NaN * np.ones(numStocks)
vol = np.NaN * np.ones(numStocks)
value_op_seq[0] = 1 # initially we have one unit of cash

In [None]:
for t in range(numDays):
    print t,
    
    # At the beginning of day t, we use all information up to the open 
    # price of day t to determine the capital allocation at the end of 
    # day t.
    
    # Your function should be placed here. Note that the input vector may
    # contain NaNs (at Day 1, or for some inactive stocks). 
    new_portfolio, hold_flag = windowitz(t, portfolio_seq[:,t], hi, lo, cl, op, all_stocks, vol)
    
    # Check whether your output is valid
    isActive = np.isfinite(op)
    noHold = np.logical_and(isActive, np.logical_not(hold_flag))
    if np.sum(new_portfolio[noHold]) > 1 + 1e-8:
        sys.stderr.write('The output portfolio is not valid! {}'.format(np.sum(new_portfolio[noHold])))
        
    # The value of our portfolio at the end of Day t before paying transaction costs
    cl = test_cl[t,:]
    value_vec = value_op_seq[t] * portfolio_seq[:,t]
    growth = cl[isActive] / last_close_price[isActive]-1
    growth[np.isnan(growth)] = 0
    revenue_vec = value_vec[isActive] * growth
    value_vec[isActive] = value_vec[isActive] + revenue_vec
    value_cl_seq[t] = value_op_seq[t] + np.sum(revenue_vec)
        

    # At the end of Day t, we use the close price of day t to adjust our 
    # portfolio to the desired percentage.
    if t <= numDays-2:
        nonActive = np.logical_not(isActive)
        value_realizable = value_cl_seq[t] - np.sum(value_vec[nonActive])
        new_value_vec, trans_cost = rebalance(value_vec[isActive], value_realizable, \
                                              new_portfolio[isActive], 0.0005)
        value_op_seq[t+1] = value_cl_seq[t] - trans_cost
        value_vec[isActive] = new_value_vec
        portfolio_seq[:,t+1] = value_vec / value_op_seq[t+1]
        
    last_close_price[isActive] = cl[isActive]
    
    # Update information
    if t <= numDays-2:
        op = test_op[t+1,:]
        hi = test_op[t,:]
        lo = test_lo[t,:]
        vol = test_vol[t,:]

## Results

In [None]:
fig = plt.figure()
plt.plot(value_op_seq)
plt.title('Portfolio Return over Time')
plt.ylabel('Return')
plt.xlabel('Time')
return_seq = np.log(value_op_seq[1:] / value_op_seq[:-1])
annual_return = 252 * np.mean(return_seq)
sharpe = np.sqrt(252) * np.mean(return_seq) / np.std(return_seq)
print annual_return, sharpe