In [80]:
import numpy as np
import pandas as pd
import random
from pandas_datareader import data

In [100]:

def get_data(tickers):
    portfolio = pd.DataFrame()
    
    for t in tickers:
        portfolio[t] = data.DataReader(t, 'stooq', start='2019-02-01')['Close']
        
    portfolio.columns = tickers
    returns = np.log(portfolio / portfolio.shift(1))
    
    port_return = np.array(returns.mean() * 252)
    port_risk = returns.cov()
    return portfolio, port_return, port_risk



def generate_weights(inputs, population):
    n_assets = len(inputs.columns)
    array = np.empty((population, (n_assets + 2)))
    weights = []
    
    for i in range(0, population):
        weighting = np.random.random(n_assets)
        weighting /= np.sum(weighting)
        weights.append(weighting)
    weights = np.array(weights)
    
    for i in range(0, n_assets):
       array[:, i] = weights[:, i]
       
    return array, n_assets



def fitness_func(weights, x1, x2, n_assets, riskFree):
    fitness = []
    
    for i in range(0, len(weights)):
        w_return = (weights[i, 0:n_assets] * x1) 
        w_risk = np.sqrt(np.dot(weights[i, 0:n_assets].T, np.dot(x2, weights[i, 0:n_assets]))) * np.sqrt(252)
        score = ((np.sum(w_return) * 100) - riskFree) / (np.sum(w_risk) * 100)
        fitness.append(score)
        
    fitness = np.array(fitness).reshape(len(weights))
    weights[:, n_assets] = fitness
    
    return weights



def elitism(elitism_rate, fit_func_res, n_assets):
    sorted_ff = fit_func_res[fit_func_res[:, n_assets].argsort()]
    elite_w = int(len(sorted_ff) * elitism_rate)
    elite_results = sorted_ff[-elite_w:]
    non_elite_results = sorted_ff[:-elite_w] 
    
    return elite_results, non_elite_results



def selection(parents, n_assets):     
    sol_len = int(len(parents) / 2)
    if (sol_len % 2) != 0: sol_len = sol_len + 1
    crossover_gen = np.empty((0, (n_assets + 2)))  
    
    for i in range(0, sol_len):
        parents[:, (n_assets + 1)] = np.cumsum(parents[:, n_assets]).reshape(len(parents))
        rand = random.randint(0, abs(int(sum(parents[:, n_assets]))))
        
        for i in range(0, len(parents)): nearest_val = min(parents[i:, (n_assets + 1)], key = lambda x: abs(x - rand))
        val = np.where(parents == nearest_val)
        index = val[0][0]
        
        next_gen = parents[index].reshape(1, (n_assets + 2))
        
        crossover_gen = np.append(crossover_gen, next_gen, axis = 0) 
        parents = np.delete(parents, (val[0]), 0)
        
    non_crossover_gen = crossover_gen.copy()
    
    return crossover_gen, non_crossover_gen



def crossover(probability, weights, assets):   
    for i in range(0, int((len(weights))/2), 2): 
        gen1, gen2 = weights[i], weights[i+1]
        gen1, gen2 = uni_co(gen1, gen2, assets, probability)
        weights[i], weights[i+1] = gen1, gen2
        
    weights = normalise(weights, assets)
    
    return weights
    


def uni_co(gen1, gen2, assets, crossover_rate):
    prob = np.random.normal(1, 1, assets)
    
    for i in range(0, len(prob)):
        if prob[i] > crossover_rate:
            gen1[i], gen2[i] = gen2[i], gen1[i]  
            
    return gen1, gen2



def mutation(probability, generation, assets): 
    weight_n = len(generation) * ((np.shape(generation)[1]) - 2)
    mutate_gens = int(weight_n * probability)
    
    if (mutate_gens >= 1):
        for i in range(0, mutate_gens):
            rand_pos_x, rand_pos_y = random.randint(0, (len(generation) - 1)), random.randint(0, (assets - 1))
            mu_gen = generation[rand_pos_x][rand_pos_y]
            mutated_ind = mu_gen * np.random.normal(0,1)
            generation[rand_pos_x][rand_pos_y] = abs(mutated_ind)
            generation = normalise(generation, assets)
        return generation
    else:
        return generation



def normalise(generation, assets):
    for i in range(0, len(generation)):
        generation[i][0:assets] /= np.sum(generation[i][0:assets])
    return generation



def next_gen(elites, children, no_cross_parents):
    weights = np.vstack((elites, children, no_cross_parents))
    return weights 



def optimal_solution(generations, assets):
    optimal_weights = generations[generations[:, (assets + 1)].argsort()]
    return optimal_weights[0]



def avg_gen_result(weights, n_assets):
    average = round(np.mean(weights[:, n_assets]), 2)
    return average



def genetic_algorithm(tickers, risk_free_rate, population, generations, crossover_rate, mutation_rate, elite_rate):
    weights, port_return, port_risk = get_data(tickers)
    weights, n_assets = generate_weights(weights, population)
    
    for i in range(0, generations):
        results = fitness_func(weights, port_return, port_risk, n_assets, risk_free_rate)
        
        elites, parents = elitism(elite_rate, results, n_assets)
        parents, no_cross_parents = selection(parents, n_assets)
        children = crossover(crossover_rate, parents, n_assets)
        children = mutation(mutation_rate, children, n_assets) 
        
        weights = next_gen(elites, children, no_cross_parents)
        
        avg_res = avg_gen_result(weights, n_assets)
        print('Generation', i, ': Average Sharpe Ratio of', avg_res, 'from', len(weights), 'chromosomes')
        
    opt_solution = optimal_solution(weights, n_assets)
    
    return opt_solution, port_return

In [101]:
tickers = ['AAPL', 'TSLA', 'MSFT', 'GOOG', 'IBM']
population = 150
risk_free_rate = 2
generations = 40
crossover_rate = 0.4
mutation_rate = 0.01
elite_rate = 0.25

# Run Function and Return Optimal Weights

optimal_weights, port_return = genetic_algorithm(tickers, risk_free_rate, population, generations, crossover_rate, mutation_rate, elite_rate)
optimal_weights

Generation 0 : Average Sharpe Ratio of -1.48 from 149 chromosomes
Generation 1 : Average Sharpe Ratio of -1.46 from 149 chromosomes
Generation 2 : Average Sharpe Ratio of -1.44 from 149 chromosomes
Generation 3 : Average Sharpe Ratio of -1.43 from 149 chromosomes
Generation 4 : Average Sharpe Ratio of -1.42 from 149 chromosomes
Generation 5 : Average Sharpe Ratio of -1.41 from 149 chromosomes
Generation 6 : Average Sharpe Ratio of -1.39 from 149 chromosomes
Generation 7 : Average Sharpe Ratio of -1.39 from 149 chromosomes
Generation 8 : Average Sharpe Ratio of -1.38 from 149 chromosomes
Generation 9 : Average Sharpe Ratio of -1.37 from 149 chromosomes
Generation 10 : Average Sharpe Ratio of -1.37 from 149 chromosomes
Generation 11 : Average Sharpe Ratio of -1.36 from 149 chromosomes
Generation 12 : Average Sharpe Ratio of -1.35 from 149 chromosomes
Generation 13 : Average Sharpe Ratio of -1.34 from 149 chromosomes
Generation 14 : Average Sharpe Ratio of -1.32 from 149 chromosomes
Gener

array([ 2.44029712e-02,  9.83167982e-02,  4.46291503e-02,  1.38842446e-01,
        6.93808635e-01, -8.50231260e-01, -1.39764505e+02])

In [102]:
print(port_return)
print(optimal_weights)

[-0.52360888 -0.96745381 -0.42027478 -0.34827696 -0.06787802]
[ 2.44029712e-02  9.83167982e-02  4.46291503e-02  1.38842446e-01
  6.93808635e-01 -8.50231260e-01 -1.39764505e+02]


# TEST AREA

In [85]:
df = pd.read_csv("dataset_assetclass.csv")
portfolio = df.transpose()
portfolio.columns = df.asset_class
returns = portfolio.iloc[0]

#print(returns)
port_return = np.array(returns.mean() * 252)
#print(port_return)
t = df.cov()
#port_risk = returns.cov()
print(t)

                        cost          mean      variance         cov_1  \
cost           189384.196170  8.512937e+00 -6.790213e-02 -1.855101e-02   
mean                8.512937  7.812559e-04 -2.729229e-06 -1.877905e-06   
variance           -0.067902 -2.729229e-06  5.808498e-07  2.893129e-08   
cov_1              -0.018551 -1.877905e-06  2.893129e-08  5.305698e-08   
cov_2               0.007128  1.725313e-06  4.357790e-08 -2.054863e-08   
cov_3               0.000658 -5.947278e-07  4.402920e-08 -9.188499e-09   
cov_4              -0.002495 -2.385450e-07  1.625472e-08  1.033096e-08   
cov_5              -0.020158  1.730020e-06  3.353650e-09 -2.622791e-08   
cov_6              -0.008024 -1.382571e-06 -2.240504e-08 -7.898919e-09   
cov_7               0.006660  6.206871e-07 -6.256608e-08  2.624541e-08   
cov_8               0.006798 -1.983419e-06 -6.533948e-08  1.531375e-08   
cov_9               0.021154  8.618647e-07  2.524217e-07 -3.542177e-08   
cov_10             -0.009019 -1.462311

In [76]:
portfolio

asset_class,Property 2,Property 3,Property 4,Property 5,Property 6,Property 7,Property 8,Property 9,Property 10,Property 11,Property 12,Property 13,Property 14,Property 15,Property 16,Property 17,Property 18,Property 19,Property 20
cost,598.186,188.194,462.446,145.515,47.696,359.134,145.327,529.357,252.338,77.716,1163.668,210.739,60.384,239.439,347.984,140.362,1811.446,72.415,374.098
mean,0.060713,0.012152,0.029049,0.035675,-0.006948,0.017258,-0.009848,0.009658,0.022583,-0.002926,0.077504,-0.004551,-0.003068,0.065,0.000559,0.02782,0.06875,0.000443,0.035818
variance,0.001186,0.000315,0.000105,0.000892,0.000233,0.000769,0.000457,0.002449,0.000743,0.001108,0.000292,0.001488,0.002453,0.002066,0.000345,0.000464,0.000636,0.002063,0.000754
asset_class,Property 2,Property 3,Property 4,Property 5,Property 6,Property 7,Property 8,Property 9,Property 10,Property 11,Property 12,Property 13,Property 14,Property 15,Property 16,Property 17,Property 18,Property 19,Property 20
cov_1,-0.000092,0.000057,0.000002,-0.000395,0.000173,0.000304,0.00012,-0.000099,0.000441,0.000258,-0.000081,-0.000094,0.000659,0.000095,-0.000065,-0.000011,0.000067,-0.000056,0.000067
cov_2,0.001186,-0.000141,0.000033,0.000246,-0.000064,-0.000078,-0.000116,-0.00002,-0.000538,0.000178,-0.000009,0.000267,0.000242,-0.000152,-0.000094,0.000033,-0.000076,-0.000182,-0.000228
cov_3,-0.000141,0.000315,-0.000022,-0.000121,0.000042,-0.000104,-0.000104,0.000329,-0.000037,-0.000158,-0.00013,-0.000049,-0.000203,0.000084,-0.000099,-0.000193,0.000064,0.000451,0.000007
cov_4,0.000033,-0.000022,0.000105,0.000028,-0.000079,-0.000052,0.0,-0.00004,0.000047,-0.000057,0.000002,-0.00007,0.000356,-0.000101,-0.000012,0.000034,0.000001,-0.000004,-0.00004
cov_5,0.000246,-0.000121,0.000028,0.000892,-0.000099,-0.000183,-0.000174,0.000041,-0.000001,0.000121,0.000056,0.000092,-0.000054,0.000146,-0.000046,-0.000097,-0.000342,-0.000519,-0.000125
cov_6,-0.000064,0.000042,-0.000079,-0.000099,0.000233,-0.00007,0.000097,-0.000029,-0.00002,0.000075,-0.000088,0.000228,-0.000296,-0.000056,0.000036,0.000045,0.000032,0.000149,-0.000065
