In [1]:
import numpy as np
import pandas as pd
from deap import algorithms
import multiprocessing

from preprocessing import preprocess_data
from ga_setup import setup_ga
from strategy_visualizer import print_strategy, plot_strategy, simulate_strategy, simulate_best_possible_strategy

In [2]:
stock_data, ta_features, _ = preprocess_data("./data/wig_d.csv")
ta_features

(1918, 95)
(1918, 110)
(1918, 115)
(1918, 123)


Unnamed: 0,volume_adi,volume_obv,volume_cmf,volume_fi,volume_em,volume_sma_em,volume_vpt,volume_vwap,volume_mfi,volume_nvi,...,GENERATED_WMAIndicator,GENERATED_AverageTrueRange,GENERATED_BollingerBands,GENERATED_DonchianChannel,GENERATED_KeltnerChannel,GENERATED_UlcerIndex,GENERATED_ChaikinMoneyFlowIndicator,GENERATED_EaseOfMovementIndicator,GENERATED_ForceIndexIndicator,GENERATED_MFIIndicator
765,0.078530,0.227485,0.367481,0.753449,0.819195,0.868265,0.549035,0.423765,0.468638,0.748485,...,0.353072,0.055609,0.063591,0.140701,0.217313,0.248456,0.545043,0.842860,0.686081,0.705340
766,0.070480,0.211336,0.334660,0.739340,0.817595,0.869404,0.540150,0.420771,0.470568,0.748485,...,0.351682,0.073916,0.061127,0.140330,0.218489,0.245642,0.541020,0.844584,0.678201,0.715920
767,0.077493,0.196443,0.424477,0.737530,0.804600,0.868892,0.537933,0.418520,0.452014,0.743019,...,0.350219,0.070645,0.058839,0.128402,0.217205,0.242949,0.521714,0.843667,0.678298,0.646265
768,0.084637,0.206373,0.510529,0.744939,0.825589,0.870605,0.541580,0.417205,0.454049,0.756484,...,0.349145,0.051581,0.057902,0.128340,0.212909,0.240989,0.677137,0.840134,0.685057,0.631447
769,0.087190,0.217087,0.480599,0.750667,0.824306,0.870822,0.544806,0.415680,0.452514,0.756484,...,0.348397,0.037163,0.057108,0.128153,0.208271,0.239082,0.586317,0.840337,0.691071,0.623883
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2278,0.957557,0.898551,0.828649,0.993424,0.871783,0.991989,0.811603,0.312019,0.712306,0.412539,...,0.071749,0.240135,0.827167,0.968082,0.786507,0.993369,0.893190,0.261899,1.000000,0.764434
2279,0.966771,0.873507,0.867073,0.955853,0.829428,0.982671,0.809756,0.323694,0.730872,0.410112,...,0.082767,0.235282,0.817432,0.978399,0.774162,0.994726,0.828025,0.266802,0.987088,0.774359
2280,0.967693,0.846309,0.823694,0.914818,0.819299,0.980124,0.802191,0.335261,0.638523,0.410112,...,0.093392,0.245592,0.804941,0.988767,0.766405,0.996290,0.788168,0.251898,0.968144,0.697097
2281,0.990571,0.882841,0.885726,0.910447,0.817958,0.979381,0.814606,0.347676,0.717372,0.410112,...,0.104165,0.220263,0.790637,0.999741,0.754129,0.997813,0.766877,0.238246,0.972904,0.775921


In [3]:
# Genetic algorithm parameters:
POPULATION_SIZE = 100
N_GENERATIONS = 500
TOURNAMENT_SIZE_POPULATION_RATIO = 0.1
CROSSOVER_PROBABILITY = 0.5
MUTATION_PROBABILITY = 0.2

# Simulation parameters:
INITIAL_MONEY = 1000
COMMISSION = 0.0001 # 0.01%

In [4]:
population, toolbox, stats, hall_of_fame = setup_ga(
    stock_data,
    ta_features,
    POPULATION_SIZE,
    TOURNAMENT_SIZE_POPULATION_RATIO,
    INITIAL_MONEY,
    COMMISSION,
)
pool = multiprocessing.Pool()
toolbox.register("map", pool.map)

In [5]:
final_population = algorithms.eaSimple(
    population,
    toolbox,
    ngen=N_GENERATIONS,
    cxpb=CROSSOVER_PROBABILITY,
    mutpb=MUTATION_PROBABILITY,
    stats=stats,
    halloffame=hall_of_fame,
    verbose=True,
)

gen	nevals	avg           	std           	min           	max           
0  	100   	[968.84420879]	[147.87454299]	[560.45032623]	[1521.2102225]
1  	63    	[1094.69034859]	[162.3536908] 	[713.07792264]	[1521.2102225]
2  	62    	[1251.54879352]	[190.73740605]	[855.17660031]	[1639.54221278]
3  	64    	[1421.28380263]	[208.27316685]	[850.86171129]	[1815.17779364]
4  	48    	[1570.93276448]	[175.33930491]	[993.10196898]	[1878.70422692]
5  	54    	[1561.36272193]	[293.71420121]	[800.20785635]	[1881.90897586]
6  	60    	[1625.0539836] 	[300.81103906]	[715.30093694]	[1887.01796179]
7  	60    	[1612.61583134]	[336.3168883] 	[792.48392541]	[1964.87158311]
8  	56    	[1832.68753355]	[162.05303336]	[1245.04063707]	[2036.19320252]
9  	65    	[1864.62082437]	[167.80376871]	[1239.37857251]	[2038.63810787]
10 	59    	[1895.73172735]	[163.23647895]	[1314.7294049] 	[2050.40741398]
11 	67    	[1944.89711981]	[158.49736645]	[1315.37356008]	[2082.02804763]
12 	66    	[1966.5420982] 	[161.28100276]	[1286.6927

KeyboardInterrupt: 

In [None]:
best_strategy = hall_of_fame[0]
buy_strategy, sell_strategy = np.array_split(best_strategy, 2)
pd.DataFrame(
    zip(ta_features.columns, buy_strategy, sell_strategy),
    columns=["feature", "buy strategy weight", "sell strategy weight"],
)

In [None]:
print_strategy(stock_data, ta_features, best_strategy, INITIAL_MONEY, COMMISSION)

In [None]:
dates, close_prices, x_buy, y_buy, x_sell, y_sell, sum_money_and_money_in_shares = simulate_strategy(
    stock_data,
    ta_features,
    best_strategy,
    INITIAL_MONEY,
    COMMISSION,
)
plot_strategy(dates, close_prices, x_buy, y_buy, x_sell, y_sell, sum_money_and_money_in_shares)
best_strategy_final_money = sum_money_and_money_in_shares[-1]

In [None]:
dates, close_prices, x_buy, y_buy, x_sell, y_sell, sum_money_and_money_in_shares = simulate_best_possible_strategy(
    stock_data
)
plot_strategy(dates, close_prices, x_buy, y_buy, x_sell, y_sell, sum_money_and_money_in_shares)
best_possible_strategy_final_money = sum_money_and_money_in_shares[-1]

In [None]:
print(f'{best_strategy_final_money / best_possible_strategy_final_money * 100}%')