<a href="https://colab.research.google.com/github/tluxxx/weekly-pattern-in-stock-markets/blob/main/weekly_patterns_(part_3_walk_forward_testing_N_weeks_cycle_with_optimized_pattern).ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Exploiting weekly patterns in stock market for trading
#### part III: walk forward testing of N-weeks cycles and N*M-weeks traning periods with GA-otimized patterns within the cycles

This series is based on a strategy-idea originally generated by Thomas Gebert (Kurzfrist-Strategien fuer Anleger, Boersenbuchverlag, 2019).
The original strategy was tested and expanded.  Further modifications are investigated. The modifications in this part are as follows:
1.   implementing walk forward test methods for 16 weeks cycle (GA-optimized pattern) with a predefined leght of the training period (M); length_train_period = M * 16 weeks
3.   extending the analyses to variable length of the training periods (by changing M) for the 16 weeks cycle (GA-optimized pattern)
1.   extending the analyses to variable cycle lengths (N) and variable training periods (M) (GA-optimized weekly pattern for an N in a cycle and an N*M training period)



#1. Preparation and Data Upload

In [None]:
# preparation
from google.colab import drive
drive.mount("/content/gdrive")

!pip install pygad

In [None]:
import sys
sys.path.append('/content/gdrive/MyDrive/Colab Notebooks/weekly_pattern')

In [None]:
# importing all standard modules
import pandas as pd
import numpy as np
import yfinance as yf
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import datetime as dt
from tqdm.notebook import tqdm
import pygad

In [None]:
# importing all customized modules
import importlib                           # codeblock required for reimporting of modules (e.g. in case of changes in the modules source code)
import helpers_pattern_01                  # element of codeblock as above
importlib.reload(helpers_pattern_01)       # element of codeblock as above

from helpers_pattern_01 import *

In [None]:
# direct download price dataframe from yfinance
end_date = '2024-03-02'
start_date = '2000-01-01'
price = yf.download('^GDAXI', start=start_date, end=end_date)

[*********************100%%**********************]  1 of 1 completed


# 2. Walk-Forward-Optimization of the weekly pattern within a 16-weeks cycle


*   trading-times: Mondays at open (9:00)
*   GA via PyGAD-module
*   General Preparations




In [None]:
# weekly data from daily data and adding the week-type and cycle number of the week
price_w = transforming_daily_weekly_open(price)
n = 16
shift= 0
price_w['week_type'] = (price_w['week_nb'] + shift) % n
price_w['cycle_nb'] = (price_w['week_nb'] /n).astype(int)
max_cycle = price_w['cycle_nb'].max()

In [None]:
# GA specific section
## function to be optimized via GA --> strat --> max
def opt_pattern1_train(ga_instance, pos, pos_idx):
  pos_weekly(price_w_train, pos, shift=0)
  return pnl_calculation_basic(price_w_train)

def opt_pattern1(ga_instance, pos, pos_idx):
  pos_weekly(price_w_run_opt, pos, shift=0)
  return pnl_calculation_basic(price_w_run_opt)

##2.1 walk forward: n = 16 weeks and fixed m = {number of cycles in training period}

In [None]:
# definitions/preparations
m = 6  # number of cycles in training period
results1 = pd.DataFrame(columns=['cycle_nb', 'ga_fittnes', 'pnl_curent_cycle', 'pattern'])
price_w_test = price_w.copy()
price_w_test['pos'] = np.zeros(len(price_w_test), dtype=int)

for cycle in tqdm(range(m, max_cycle + 1)):
  # preparing and filtering data set for training cycles
  pos = np.zeros(n, dtype=int)
  train_cycles = list(range(cycle - m, cycle))
  price_w_train = price_w[price_w['cycle_nb'].isin(train_cycles)].copy()
  # definition of a new GA-instance
  ga_instance = pygad.GA(num_generations=150,
                         num_parents_mating=4,
                         fitness_func=opt_pattern1_train,
                         sol_per_pop=8,
                         num_genes=n,
                         gene_type=int,
                         gene_space=[-1, 0, 1],
                         mutation_percent_genes=10,
                         suppress_warnings=True)

  # running the GA across the training cycle data set and calculating the optimised pattern (=pos)
  ga_instance.run()
  pos, strat, strat_idx = ga_instance.best_solution()
  # setting the positions in the current test-cycle
  pos_dict = dict(enumerate(pos))
  price_w_test.loc[price_w_test['cycle_nb']==cycle, 'pos'] = price_w_test['week_type'].map(pos_dict)

  # calculation the pnl in the current cycle and recording results of current cycle
  price_w_current_cycle = price_w_test.loc[price_w_test['cycle_nb'] == cycle]
  pnl_1 = pnl_calculation(price_w_current_cycle)
  pnl_current_cycle = pnl_1['pnl_strategy'][-1]

  results1.loc[len(results1.index)] = [cycle, strat, pnl_current_cycle, list(pos)]

  0%|          | 0/73 [00:00<?, ?it/s]

In [None]:
# filtering out the first training-period (system not actively trading) and plotting
price_w_test_real = price_w_test[price_w_test['cycle_nb']>m]
pnl_wf = pnl_calculation(price_w_test_real)
title = '16 weeks cycle - walk forward optimisation of trading pattern using GA X <br><sup> training cycle length = ' + str(m)+ 'cycles </sup>'
pnl_plotting(pnl_wf, title)
x1, x2, x3 = pnl_wf['pnl_b&h'][-1], pnl_wf['pnl_strategy'][-1], pnl_wf['pnl_strategy2'][-1]
print('**********************************************************************************************************')
print(f'Results (optimized pattern, walk forward):  Buy and Hold:  {x1:.2f}  Strategy:  {x2:.2f}  Strategy2:  {x3:.2f}')

**********************************************************************************************************
Results (optimized pattern, walk forward):  Buy and Hold:  3.66  Strategy:  7.50  Strategy2:  3.57


In [None]:
results1

Unnamed: 0,cycle_nb,ga_fittnes,pnl_curent_cycle,pattern
0,6,2.637611,1.000360,"[1, 1, -1, -1, -1, 1, -1, -1, -1, -1, 1, 1, -1..."
1,7,2.893481,1.148671,"[1, -1, 1, -1, -1, 1, -1, -1, -1, -1, 1, 1, 1,..."
2,8,3.359677,1.239315,"[1, 1, 1, -1, -1, -1, 1, -1, -1, -1, 1, 1, -1,..."
3,9,4.287807,1.156299,"[1, 1, 1, -1, -1, -1, -1, 1, -1, -1, 1, -1, -1..."
4,10,5.500739,1.146447,"[1, 1, 1, -1, -1, -1, -1, 1, -1, -1, 1, -1, 1,..."
...,...,...,...,...
68,74,1.977023,1.071779,"[1, 1, -1, -1, -1, 1, 1, 1, 1, -1, 1, -1, -1, ..."
69,75,1.960213,0.956674,"[1, -1, -1, -1, -1, 1, 1, 1, 1, -1, 1, 1, -1, ..."
70,76,2.132087,0.963614,"[1, -1, -1, -1, -1, 1, 1, 1, 1, -1, 1, 1, -1, ..."
71,77,2.092407,0.942156,"[1, -1, -1, -1, -1, 1, 1, 1, 1, -1, -1, 1, 1, ..."


##2.2 walk forward: one fixed m but multiple repetitions to check consistency
This section was introduced to verify the consistency of the method. Since the GA-steps are random by nature, a summation of the results of multiple steps may result in a distribution of final results (here strategy return) rather than in a fixed value. In this section the order of maginitude of the spread of the distribution is analyzed.

In [None]:
# cecking for consistency of the results
m = 6                   # number of cycles in training period
nb_repetitions = 10     # number of repetition to check consistency
shift = 0
results1a = pd.DataFrame(data=[], columns=['n', 'm', 'repetition', 'buh', 'strategy' , 'strategy2'])
price_w_test = price_w.copy()
price_w_test['pos'] = np.zeros(len(price_w_test), dtype=int)

for rep in tqdm(range(0, nb_repetitions), leave=False):
  for cycle in tqdm(range(m, max_cycle + 1), leave=False):
    # preparing and filtering data set from training cycles
    pos = np.zeros(n, dtype=int)
    train_cycles = list(range(cycle - m, cycle))
    price_w_train = price_w[price_w['cycle_nb'].isin(train_cycles)].copy()
    price_w_train['pos'] = np.zeros(len(price_w_train), dtype=int)
    # definition of a new GA-instance
    ga_instance = pygad.GA(num_generations=120,
                          num_parents_mating=4,
                          fitness_func=opt_pattern1_train,
                          sol_per_pop=8,
                          num_genes=n,
                          gene_type=int,
                          gene_space=[-1, 0, 1],
                          mutation_percent_genes=10,
                          suppress_warnings=True)

    # running the GA across the training cycle data set and calculating the optimised pattern (=pos)
    ga_instance.run()
    pos, strat, strat_idx = ga_instance.best_solution()
    # setting the positions in the current cycle
    pos_dict = dict(enumerate(pos))
    price_w_test.loc[price_w_test['cycle_nb']==cycle, 'pos'] = price_w_test['week_type'].map(pos_dict)

  price_w_test_real = price_w_test[price_w_test['cycle_nb']>m]
  pnl_wf = pnl_calculation(price_w_test_real)
  x1, x2, x3 = pnl_wf['pnl_b&h'][-1], pnl_wf['pnl_strategy'][-1], pnl_wf['pnl_strategy2'][-1]

  results1a.loc[len(results1a.index)] = [n, m, rep, x1, x2, x3]
  print(f'Results (optimized pattern, walk forward method):  rep:  {rep}   B&H: {x1:.2f}  Strategy:  {x2:.2f}   Strategy2:  {x3}')


  0%|          | 0/10 [00:00<?, ?it/s]

  0%|          | 0/73 [00:00<?, ?it/s]

Results (optimized pattern, walk forward method):  rep:  0   B&H: 3.66  Strategy:  7.49   Strategy2:  3.5643822137009495


  0%|          | 0/73 [00:00<?, ?it/s]

Results (optimized pattern, walk forward method):  rep:  1   B&H: 3.66  Strategy:  7.41   Strategy2:  3.55331546254543


  0%|          | 0/73 [00:00<?, ?it/s]

Results (optimized pattern, walk forward method):  rep:  2   B&H: 3.66  Strategy:  7.49   Strategy2:  3.5643822137009495


  0%|          | 0/73 [00:00<?, ?it/s]

Results (optimized pattern, walk forward method):  rep:  3   B&H: 3.66  Strategy:  7.49   Strategy2:  3.5643822137009495


  0%|          | 0/73 [00:00<?, ?it/s]

Results (optimized pattern, walk forward method):  rep:  4   B&H: 3.66  Strategy:  7.37   Strategy2:  3.548695696707629


  0%|          | 0/73 [00:00<?, ?it/s]

Results (optimized pattern, walk forward method):  rep:  5   B&H: 3.66  Strategy:  7.49   Strategy2:  3.5643822137009495


  0%|          | 0/73 [00:00<?, ?it/s]

Results (optimized pattern, walk forward method):  rep:  6   B&H: 3.66  Strategy:  7.54   Strategy2:  3.5718961258825113


  0%|          | 0/73 [00:00<?, ?it/s]

Results (optimized pattern, walk forward method):  rep:  7   B&H: 3.66  Strategy:  7.31   Strategy2:  3.540872066451966


  0%|          | 0/73 [00:00<?, ?it/s]

Results (optimized pattern, walk forward method):  rep:  8   B&H: 3.66  Strategy:  7.49   Strategy2:  3.5643822137009495


  0%|          | 0/73 [00:00<?, ?it/s]

Results (optimized pattern, walk forward method):  rep:  9   B&H: 3.66  Strategy:  7.60   Strategy2:  3.5790754574201307


In [None]:
print(f'Abweichung - Mittelwerte:  1. Strategy: {results1a.strategy.mean():.2f} 2. Strategy2: {results1a.strategy2.mean():.2f}')
print(f'Abweichung - Standardabw.: 1. Strategy: {results1a.strategy.std():.2f}  2. Strategy2: {results1a.strategy2.std():.2f}')

Abweichung - Mittelwerte:  1. Strategy: 7.47 2. Strategy2: 3.56
Abweichung - Standardabw.: 1. Strategy: 0.08  2. Strategy2: 0.01


In [None]:
fig = ga_instance.plot_fitness()

In [None]:
# export of results to excel file
with pd.ExcelWriter(r'/content/gdrive/MyDrive/Colab Notebooks/Weekly Pattern/16_weeks_cycle_walk_forward_optimized (data).xlsx') as writer:
     price_w_test.to_excel(writer, sheet_name="weekly data")
     pnl_wf.to_excel(writer, sheet_name="PnL_opt")
     results1.to_excel(writer, sheet_name='Pattern in Cycles')

# 4. walk forward: N = {cycle length}  M = {number of cycles in training period} N = [4, ... 32]  M = [4, ..., 30]

*   settings as in as in chapter 3.1
*   investigation into influence of  N and M

In [None]:
# Calculation for different combination of N (cycle-length) and M (legth of training-period)

# definitions/preparations
n_list = [4, 8, 12, 16, 20, 24, 28, 32]
m_list = [4, 6, 8, 12, 14, 16, 20, 28]
shift = 0

results2 = pd.DataFrame(data=[], columns=['n', 'm', 'buh', 'strategy' , 'strategy2'])
price_w = transforming_daily_weekly_open(price)

# looping through combinations
for n in tqdm(n_list, leave = False):
  price_w['week_type'] = (price_w['week_nb'] + shift) % n
  price_w['cycle_nb'] = (price_w['week_nb'] /n).astype(int)
  max_cycle = price_w['cycle_nb'].max()
  for m in tqdm(m_list, leave=False):
    price_w_test = price_w.copy()
    price_w_test['pos'] = np.zeros(len(price_w_test), dtype=int)
    # walk forward testing
    for cycle in tqdm(range(m, max_cycle + 1), leave=False):
      # preparing and filtering data set from training cycles
      pos = np.zeros(n, dtype=int)
      train_cycles = list(range(cycle - m, cycle))
      price_w_train = price_w[price_w['cycle_nb'].isin(train_cycles)].copy()
      price_w_train['pos'] = np.zeros(len(price_w_train), dtype=int)
      # definition of a new GA-instance
      ga_instance = pygad.GA(num_generations=120,
                            num_parents_mating=4,
                            fitness_func=opt_pattern1_train,
                            sol_per_pop=8,
                            num_genes=n,
                            gene_type=int,
                            gene_space=[-1, 0, 1],
                            mutation_percent_genes=10,
                            suppress_warnings=True)

      # running the GA across the training cycle data set and calculating the optimised pattern (=pos)
      ga_instance.run()
      pos, strat, strat_idx = ga_instance.best_solution()
      # setting the positions in the current cycle
      pos_dict = dict(enumerate(pos))
      price_w_test.loc[price_w_test['cycle_nb']==cycle, 'pos'] = price_w_test['week_type'].map(pos_dict)

    price_w_test_real = price_w_test[price_w_test['cycle_nb']>m]
    pnl_wf = pnl_calculation(price_w_test_real)
    x1, x2, x3 = pnl_wf['pnl_b&h'][-1], pnl_wf['pnl_strategy'][-1], pnl_wf['pnl_strategy2'][-1]
    results2.loc[len(results2.index)] = [n, m, x1, x2, x3]
    # print(f'Results (optimized pattern, walk forward method):  N/M:  {n}/{m}   B&H: {buh_wf:.2f}  Strategy:  {strat_wf:.2f}   Strategy2:  {strat2_wf:.2f} ')

  0%|          | 0/8 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

  0%|          | 0/312 [00:00<?, ?it/s]

  0%|          | 0/310 [00:00<?, ?it/s]

  0%|          | 0/308 [00:00<?, ?it/s]

  0%|          | 0/304 [00:00<?, ?it/s]

  0%|          | 0/302 [00:00<?, ?it/s]

  0%|          | 0/300 [00:00<?, ?it/s]

  0%|          | 0/296 [00:00<?, ?it/s]

  0%|          | 0/288 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

  0%|          | 0/154 [00:00<?, ?it/s]

  0%|          | 0/152 [00:00<?, ?it/s]

  0%|          | 0/150 [00:00<?, ?it/s]

  0%|          | 0/146 [00:00<?, ?it/s]

  0%|          | 0/144 [00:00<?, ?it/s]

  0%|          | 0/142 [00:00<?, ?it/s]

  0%|          | 0/138 [00:00<?, ?it/s]

  0%|          | 0/130 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

  0%|          | 0/102 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/98 [00:00<?, ?it/s]

  0%|          | 0/94 [00:00<?, ?it/s]

  0%|          | 0/92 [00:00<?, ?it/s]

  0%|          | 0/90 [00:00<?, ?it/s]

  0%|          | 0/86 [00:00<?, ?it/s]

  0%|          | 0/78 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

  0%|          | 0/75 [00:00<?, ?it/s]

  0%|          | 0/73 [00:00<?, ?it/s]

  0%|          | 0/71 [00:00<?, ?it/s]

  0%|          | 0/67 [00:00<?, ?it/s]

  0%|          | 0/65 [00:00<?, ?it/s]

  0%|          | 0/63 [00:00<?, ?it/s]

  0%|          | 0/59 [00:00<?, ?it/s]

  0%|          | 0/51 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

  0%|          | 0/60 [00:00<?, ?it/s]

  0%|          | 0/58 [00:00<?, ?it/s]

  0%|          | 0/56 [00:00<?, ?it/s]

  0%|          | 0/52 [00:00<?, ?it/s]

  0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/48 [00:00<?, ?it/s]

  0%|          | 0/44 [00:00<?, ?it/s]

  0%|          | 0/36 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

  0%|          | 0/49 [00:00<?, ?it/s]

  0%|          | 0/47 [00:00<?, ?it/s]

  0%|          | 0/45 [00:00<?, ?it/s]

  0%|          | 0/41 [00:00<?, ?it/s]

  0%|          | 0/39 [00:00<?, ?it/s]

  0%|          | 0/37 [00:00<?, ?it/s]

  0%|          | 0/33 [00:00<?, ?it/s]

  0%|          | 0/25 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

  0%|          | 0/42 [00:00<?, ?it/s]

  0%|          | 0/40 [00:00<?, ?it/s]

  0%|          | 0/38 [00:00<?, ?it/s]

  0%|          | 0/34 [00:00<?, ?it/s]

  0%|          | 0/32 [00:00<?, ?it/s]

  0%|          | 0/30 [00:00<?, ?it/s]

  0%|          | 0/26 [00:00<?, ?it/s]

  0%|          | 0/18 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

  0%|          | 0/36 [00:00<?, ?it/s]

  0%|          | 0/34 [00:00<?, ?it/s]

  0%|          | 0/32 [00:00<?, ?it/s]

  0%|          | 0/28 [00:00<?, ?it/s]

  0%|          | 0/26 [00:00<?, ?it/s]

  0%|          | 0/24 [00:00<?, ?it/s]

  0%|          | 0/20 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

In [None]:
results2

Unnamed: 0,n,m,buh,strategy,strategy2
0,4.0,4.0,2.491135,0.410512,0.735963
1,4.0,6.0,2.376571,0.298264,0.403056
2,4.0,8.0,2.393713,0.627835,1.142578
3,4.0,12.0,2.705079,0.883361,1.477275
4,4.0,14.0,2.861320,0.734863,1.295130
...,...,...,...,...,...
59,32.0,12.0,2.164116,1.847467,1.982588
60,32.0,14.0,4.359178,7.365564,3.265827
61,32.0,16.0,2.960402,3.094857,2.369887
62,32.0,20.0,2.478087,2.861498,2.237839


In [None]:
# export of results to excel file
with pd.ExcelWriter(r'/content/gdrive/MyDrive/Colab Notebooks/weekly_pattern/N_weeks_cycle_walk_forward_optimized (data).xlsx') as writer:
     results2.to_excel(writer, sheet_name="data")

In [None]:
# plotting of results (PnL as Function of cycle length (N) and length of training Period (M*N))
title = 'PnL depending on length of weekly cycle and length of training period <br><sup>length training period = M * cycle length </sup>'
fig = px.scatter(results2, x='n', y='strategy', color='m', template='plotly_dark')
fig.update_layout(title=title)
fig.update_layout(autosize=False,width=800,height=400)
fig.update_layout(xaxis_title='length of cycle in weeks (N)', yaxis_title='PnL')
fig.show()

In [None]:
fig = px.scatter(results2, x='m', y='strategy', color='n', template='plotly_dark')
title = 'PnL depending on length of weekly cycle and length of training period <br><sup>length of training period = M * cycle length </sup>'
fig.update_layout(title=title)
fig.update_layout(autosize=False,width=800,height=400)
fig.update_layout(xaxis_title='M', yaxis_title='PnL')
fig.show()

In [None]:
fig = px.box(results2, x='n', y='strategy', points='all', template='plotly_dark')
title = 'PnL depending on length of weekly cycle and length of training period <br><sup>length of training period = M * cycle length </sup>'
fig.update_layout(title=title, autosize=False, width=800, height=400)
fig.update_layout(xaxis_title='length of cycle in weeks (N)', yaxis_title='PnL')
fig.show()

In [None]:
fig = px.box(results2, x='m', y='strategy', points='all', template='plotly_dark')
title = 'PnL depending on length of weekly cycle and length of training period <br><sup>length of training period = M * cycle length </sup>'
fig.update_layout(title=title, autosize=False, width=800, height=400)
fig.update_layout(xaxis_title='M', yaxis_title='PnL')
fig.show()