In [1]:
import sys
sys.path.append('..')
import numpy as np
from util import load_pkl
from scipy.optimize import minimize

yfinance: pandas_datareader support is deprecated & semi-broken so will be removed in a future verison. Just use yfinance.


In [2]:
data_dir = f'../processed_data_128'
S = load_pkl(f'{data_dir}/S.pkl')
mu = np.load(f'{data_dir}/mu.npy')


with open(f'{data_dir}/final_tickers.txt', 'r') as f:
    final_tickers = f.read().splitlines()

In [3]:
mu

array([ 3.02263866e-02,  4.92291624e-02, -2.71182315e-02,  5.07348537e-02,
       -8.44006018e-03,  4.07393282e-02,  3.00530171e-02, -1.39917359e-02,
       -5.38466164e-02, -1.46288345e-02, -4.73387965e-02, -1.42292776e-02,
       -2.53899737e-02,  4.17080172e-02,  3.65502728e-02,  8.37732226e-03,
        4.56942974e-02, -3.66876943e-03,  9.24082208e-04, -5.78993393e-02,
        3.86363183e-02,  3.49098751e-02,  6.80459049e-02,  3.02982789e-02,
       -2.91474696e-02,  4.89236529e-03,  8.25409203e-02,  3.78631171e-02,
        2.47420565e-02, -3.86056410e-02,  3.45765686e-02,  2.94842486e-02,
        1.05730387e-01, -1.45236880e-03, -4.89740238e-04,  1.62529163e-02,
        8.05673458e-02,  4.15521921e-02,  2.80172611e-02,  3.57483659e-02,
        1.21003835e-02,  8.05896817e-02, -2.46954031e-02, -3.10626013e-02,
        6.18454625e-02,  4.80541968e-03,  5.17139469e-02,  3.01001608e-02,
       -3.31812638e-02,  2.49933673e-02,  5.47943723e-02,  2.87696074e-02,
        7.27659671e-02,  

In [4]:
import numpy as np

def negate_matrix_elements_based_on_vector(vector, matrix):
    n = len(vector)
    
    # Ensure matrix is n x n
    if matrix.shape != (n, n):
        raise ValueError("Matrix must be of shape n x n where n is the length of the vector.")
    
    # Convert input list to NumPy array for easier manipulation
    vector = np.array(vector)
    matrix = np.array(matrix)
    
    # Loop through each element in the vector
    for i in range(n):
        if vector[i] < 0:
            # Negate the ith row, excluding the diagonal element
            matrix[i, :] *= -1
            matrix[i, i] *= -1
            
            # Negate the ith column, excluding the diagonal element
            matrix[:, i] *= -1
            matrix[i, i] *= -1
    
    return matrix

# Example usage
example_vector = [1, -2, 3]
example_matrix = np.array([
    [1, 2, 3],
    [4, 5, 6],
    [7, 8, 9]
])

resulting_matrix = negate_matrix_elements_based_on_vector(example_vector, example_matrix)
print("Modified Matrix:")
print(resulting_matrix)


Modified Matrix:
[[ 1 -2  3]
 [-4  5 -6]
 [ 7 -8  9]]


In [13]:
def portfolio_volatility_log_return(weights, returns, covariance, allow_short=False):
    if allow_short:
        negated_covariance = negate_matrix_elements_based_on_vector(returns, covariance)
        return np.sqrt(np.dot(weights.T, np.dot(negated_covariance, weights)))
    else:
        return np.sqrt(np.dot(weights.T, np.dot(covariance, weights)))

def portfolio_log_return(weights, returns, allow_short=False):
    return np.sum(np.abs(returns)*weights) if allow_short else np.sum(returns*weights)

def portfolio_volatility(weights, returns, covariance_log_returns, allow_short=False):
    if allow_short:
        covariance_log_returns = negate_matrix_elements_based_on_vector(returns, covariance_log_returns)

    covariance_returns = np.exp(covariance_log_returns) - 1
    return np.sqrt(np.dot(weights.T, np.dot(covariance_returns, weights)))

def portfolio_return(weights, log_returns, allow_short=False):
    returns = np.exp(log_returns) - 1

    return np.sum(np.abs(returns)*weights) if allow_short else np.sum(returns*weights)

def min_func_sharpe(weights, returns, covariance, risk_free_rate, allow_short=False):
    portfolio_ret = portfolio_log_return(weights, returns, allow_short)
    portfolio_vol = portfolio_volatility_log_return(weights, returns, covariance, allow_short=allow_short)
    sharpe_ratio = (portfolio_ret - risk_free_rate) / portfolio_vol
    #return -sharpe_ratio # Negate Sharpe ratio because we minimize the function
    return - (portfolio_ret - 2 * portfolio_vol)



def optimize_portfolio(returns, covariance, risk_free_rate, allow_short=False):
    num_assets = len(returns)
    args = (returns, covariance, risk_free_rate)

    # Define constraints
    def constraint_sum(weights):
        return np.sum(weights) - 1
    
    constraints = [{'type': 'eq', 'fun': constraint_sum}]

    bounds = tuple((0.0, 0.20) for _ in range(num_assets))

    # Perform optimization
    def objective(weights):
        return min_func_sharpe(weights, returns, covariance, risk_free_rate, allow_short)
    
    iteration = [0]  # mutable container to store iteration count
    def callback(weights):
        iteration[0] += 1
        
        print(f"Iteration: {iteration[0]}, value: {objective(weights)}")

    # Initial guess (equal weights)
    initial_guess = num_assets * [1. / num_assets]

    # Perform optimization
    result = minimize(objective, initial_guess, 
                      method='SLSQP', bounds=bounds, constraints=constraints, callback=callback, options={'maxiter': 100})

    return result

In [31]:
INTEREST_RATE = 0.0497    # Current interest rate accessible for USD
ANNUAL_TRADING_DAYS = 252
MAX_RISK = 0.08


riskfree_log_return = np.log(1 + INTEREST_RATE) * 128 / ANNUAL_TRADING_DAYS
raw_weights = optimize_portfolio(mu, S, 0, allow_short=False)

Iteration: 1, value: 0.14677906806166452
Iteration: 2, value: 0.055189582544997645
Iteration: 3, value: 0.01118995673360726
Iteration: 4, value: 0.033592244593108944
Iteration: 5, value: -0.05817059442196103
Iteration: 6, value: -0.07037510515047905
Iteration: 7, value: -0.057439960613356905
Iteration: 8, value: -0.07884582271261621
Iteration: 9, value: -0.07990608133580622
Iteration: 10, value: -0.08014860453591435
Iteration: 11, value: -0.08022407953059683
Iteration: 12, value: -0.08023239416531416
Iteration: 13, value: -0.08023519973952714
Iteration: 14, value: -0.08023519973952714


In [33]:
raw_weights

 message: Optimization terminated successfully
 success: True
  status: 0
     fun: -0.08023519973952714
       x: [ 1.940e-16  0.000e+00 ...  0.000e+00  0.000e+00]
     nit: 14
     jac: [ 7.964e-02  1.170e-01 ...  6.787e-03  0.000e+00]
    nfev: 9217
    njev: 14

In [34]:
raw_weights_x = raw_weights.x

In [35]:
np.sum(raw_weights_x)

np.float64(1.00000000000011)

In [36]:
for index, ticker_name in enumerate(final_tickers):
    weight = raw_weights_x[index]
    if weight > 1e-6:
        print(f'index: {index} {ticker_name}: weight {weight} exp profit: {mu[index]}, variance: {S[ticker_name][ticker_name]}')

period = 128
print(f'expected return in {period} trading days: {portfolio_return(raw_weights_x, mu)}')
print(f'volatility of the return in {period} trading days: {portfolio_volatility(raw_weights_x, mu, S, allow_short=False)}')


index: 26 RHM.DE: weight 0.00045816341320039104 exp profit: 0.08254092034931508, variance: 0.12186831324011194
index: 114 STAN.L: weight 0.020908582521131154 exp profit: 0.10753872067152981, variance: 0.046424944433962965
index: 249 CTRA: weight 0.039519755753076265 exp profit: 0.0823195190471262, variance: 0.045261781356950734
index: 282 LLY: weight 0.07781250813894872 exp profit: 0.13854621875019543, variance: 0.03339536619421872
index: 327 GILD: weight 0.1031254634158085 exp profit: 0.14760081464247574, variance: 0.027106525124908597
index: 369 JKHY: weight 0.19313278505683784 exp profit: 0.1667522366518087, variance: 0.012110325496136502
index: 448 NVR: weight 0.2 exp profit: 0.2800504865596504, variance: 0.03267963158469925
index: 522 SMCI: weight 0.060949519590224766 exp profit: 0.2237866353715016, variance: 0.2854106102093247
index: 555 UNH: weight 0.09980738811120897 exp profit: 0.16713727884488241, variance: 0.01637004039107848
index: 562 VRTX: weight 0.02219160376176045 exp p

In [239]:
raw_weights_2 = optimize_portfolio(mu, S, riskfree_log_return, allow_short=True)
raw_weights_2_x = raw_weights_2.x

Iteration: 1, value: 0.1367563423042089
Iteration: 2, value: 0.03209919641236683
Iteration: 3, value: 0.20473358881393505
Iteration: 4, value: 0.19952818560087798
Iteration: 5, value: -0.002190249585423368
Iteration: 6, value: -0.04829069908752448
Iteration: 7, value: -0.05040468763414131
Iteration: 8, value: -0.04754132767329883
Iteration: 9, value: -0.05082016466109278
Iteration: 10, value: -0.05178309568144489
Iteration: 11, value: -0.052279730116660775
Iteration: 12, value: -0.05248179852138063
Iteration: 13, value: -0.052545873494688186
Iteration: 14, value: -0.0525883365871162
Iteration: 15, value: -0.052600491317548057
Iteration: 16, value: -0.05260395865504357
Iteration: 17, value: -0.05260647802896956
Iteration: 18, value: -0.05260647802896956


In [240]:
raw_weights_2

 message: Optimization terminated successfully
 success: True
  status: 0
     fun: -0.05260647802896956
       x: [ 4.244e-16  0.000e+00 ...  0.000e+00  0.000e+00]
     nit: 18
     jac: [ 6.831e-02  9.132e-02 ...  2.669e-02  1.186e-01]
    nfev: 11130
    njev: 18

In [241]:
raw_weights_2_x

array([4.24429322e-16, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00,
       0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 2.65818772e-16,
       0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 7.30363597e-17,
       1.41593073e-17, 0.00000000e+00, 0.00000000e+00, 9.17079943e-16,
       5.67361217e-16, 0.00000000e+00, 1.53176253e-17, 2.59083322e-16,
       2.22817702e-16, 4.27323341e-16, 0.00000000e+00, 9.67946787e-17,
       2.09405151e-16, 0.00000000e+00, 0.00000000e+00, 4.13292720e-16,
       9.50786903e-17, 4.87668810e-16, 0.00000000e+00, 1.79730103e-18,
       0.00000000e+00, 5.99865300e-17, 1.24492919e-16, 2.23011823e-16,
       3.90008417e-16, 2.47891453e-16, 6.32413044e-17, 9.15620402e-17,
       0.00000000e+00, 5.72276797e-16, 0.00000000e+00, 2.33671252e-18,
       1.77301921e-16, 2.51641190e-17, 3.62672193e-16, 1.35196618e-16,
       0.00000000e+00, 1.30313980e-16, 0.00000000e+00, 0.00000000e+00,
       6.14700673e-16, 8.79974803e-17, 2.41751515e-16, 1.47680889e-15,
      

In [242]:
np.sum(raw_weights_2_x)

1.0000000000000666

In [243]:

for index, ticker_name in enumerate(final_tickers):
    weight = raw_weights_2_x[index]
    if abs(weight) > 1e-2:
        print(f'index: {index} {ticker_name}: weight {weight} exp profit: {mu[index]}, variance: {S[ticker_name][ticker_name]}')

period = 128
print(f'expected return in {period} trading days: {portfolio_return(raw_weights_2_x, mu)}')
print(f'volatility of the return in {period} trading days: {portfolio_volatility(raw_weights_2_x, S)}')

index: 115 STAN.L: weight 0.06940932033704353 exp profit: 0.10238089417476481, variance: 0.04586061437233113
index: 158 AMGN: weight 0.07431243214457392 exp profit: 0.09861566943664779, variance: 0.017899267574992047
index: 189 BIIB: weight 0.042900139778976665 exp profit: 0.1988882007703651, variance: 0.07774654204843033
index: 249 COST: weight 0.03558101407191769 exp profit: 0.08356320316462489, variance: 0.02061973562158828
index: 250 CTRA: weight 0.05635014005671591 exp profit: 0.07433539701457556, variance: 0.0484500600172913
index: 283 LLY: weight 0.03948522028075085 exp profit: 0.11654037549544001, variance: 0.032870287324877165
index: 328 GILD: weight 0.1454264331821711 exp profit: 0.15744309058179476, variance: 0.026643804311949626
index: 341 HOLX: weight 0.05758084586182881 exp profit: 0.19164896779780063, variance: 0.03358620853875189
index: 370 JKHY: weight 0.1526600094065125 exp profit: 0.13122678600033347, variance: 0.011048992099250242
index: 410 MCK: weight 0.0723219528

In [232]:

def adjust_weights(weights, threshold=0.01, tolerance=1e-6):
    weights = np.array(weights)
    
    # Ensure the sum of absolute values of weights is 1
    abs_sum = np.sum(np.abs(weights))
    if abs_sum != 1:
        weights = weights / abs_sum

    run = 0
    while True:
        print(run)
        run += 1
        # Identify weights with absolute values below the threshold
        below_threshold = np.abs(weights) < threshold
        if not np.any(below_threshold):
            break

        # Find the minimal non-zero weight among those that are below the threshold
        invalid_weights = (np.abs(weights) < threshold) & (np.abs(weights) > tolerance)
        if np.any(invalid_weights):
            min_nonzero_weight = np.min(np.abs(weights[invalid_weights]))
            min_index = np.where(np.abs(weights) == min_nonzero_weight)[0][0]
        else:
            break


        # Set the minimal weight to zero
        min_value = weights[min_index]
        weights[min_index] = 0

        # Compute the deficit or surplus
        deficit = np.abs(min_value)

        # Spread this deficit or surplus equally among the remaining stocks (i.e., the ones with abs(weights) >= threshold)
        valid_weights = np.abs(weights) >= np.abs(min_value)
        adjustment = deficit / np.sum(valid_weights)
        
        # Adjust only the valid weights
        weights[valid_weights] += np.sign(weights[valid_weights]) * adjustment
        abs_sum = np.sum(np.abs(weights))
        print(abs_sum)
    return weights

In [67]:
adjusted_weights = adjust_weights(raw_weights_2)

0
1.0
1
1.0
2
0.9999999999999999
3
0.9999999999999999
4
0.9999999999999998
5
0.9999999999999999
6
0.9999999999999998
7
0.9999999999999999
8
0.9999999999999999
9
0.9999999999999998
10
1.0
11
0.9999999999999999
12
0.9999999999999999
13
0.9999999999999998
14
0.9999999999999999
15
0.9999999999999998
16
0.9999999999999999
17
0.9999999999999998
18
0.9999999999999997
19
0.9999999999999998
20
0.9999999999999998
21
0.9999999999999998
22
0.9999999999999998
23
0.9999999999999997
24
0.9999999999999998
25
0.9999999999999999
26
0.9999999999999999
27
0.9999999999999999
28
0.9999999999999999
29
0.9999999999999998
30
0.9999999999999998
31
1.0
32
0.9999999999999999
33
0.9999999999999998
34
0.9999999999999998
35
0.9999999999999998
36
0.9999999999999998
37
0.9999999999999999
38
0.9999999999999998
39
0.9999999999999999
40
0.9999999999999998
41
0.9999999999999998
42
0.9999999999999998
43
0.9999999999999999
44
0.9999999999999998
45
1.0
46
1.0
47
0.9999999999999998
48
1.0
49
0.9999999999999998
50
0.9999999999

In [69]:
for index, ticker_name in enumerate(final_tickers):
    weight = adjusted_weights[index]
    if weight != 0:
        print(f'index: {index} {ticker_name}: weight {weight} exp profit: {mu[index]}, variance: {S[ticker_name][ticker_name]}')

period = 128
print(f'expected return in {period} trading days: {portfolio_return(adjusted_weights, mu)}')
print(f'volatility of the return in {period} trading days: {portfolio_volatility(adjusted_weights, S)}')

index: 4 BAYN.DE: weight -0.010198953037516107 exp profit: 0.007934831103408728, variance: 0.05636878735389428
index: 7 BNR.DE: weight -0.01101896111033495 exp profit: -0.03431060380591024, variance: 0.045209597037622236
index: 12 DHL.DE: weight -0.010134905612508772 exp profit: -0.03535062656654784, variance: 0.06701840444315421
index: 24 PAH3.DE: weight -0.010111352816098186 exp profit: -0.023091050822275003, variance: 0.06473236879799919
index: 32 VOW3.DE: weight 0.012337935872867533 exp profit: 0.09479460852776339, variance: 0.05621442528413051
index: 37 ANTO.L: weight 0.010206852045053118 exp profit: 0.061976812827905146, variance: 0.07287534278567234
index: 53 CCH.L: weight 0.011355477232114391 exp profit: 0.07659219244254027, variance: 0.06142059283075459
index: 62 FCIT.L: weight 0.011194642645061786 exp profit: 0.07376576718726192, variance: 0.01737241474577895
index: 64 FRAS.L: weight 0.01316192934588307 exp profit: 0.21625988767808835, variance: 0.08884769741107183
index: 66 