In [None]:
import Live_Strategy_Filter
import Config
import numpy as np
import pandas as pd
pd.set_option('display.float_format', lambda x: '%.5f' % x)
pd.set_option('display.max_columns', None)
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings("ignore")

import copy
import time
from pymongo import MongoClient
client=MongoClient(Config.DB_Hostname,Config.DB_Port)
import multiprocessing

class Live_Strategy_Simulator:
    
    def __init__(self, parameters, simulation_parameters, analysis_start_date):
        
        '''
        parameters = {"start_date" : "2022-04-22",
                      "end_date" : "2023-10-13", 
                      "prediction_date" : '2023-10-13'}
        
        simulation_parameters = {"rolling_periods" : [50, 100, 150, -1], 
                                   "zero_rtd_flag" : [True, False],
                                   "outlier_flag": [True, False], 
                                   "train_sample_size" : [20, 40, 50],
                                   "test_sample_size" : [5, 10, 20],
                                   "maximize_returns" : [True, False],
                                   "top_n" : [1, 2, 3]}
                                   
        analysis_start_date = {10 : "2023-06-09", 20 : "2023-05-26", 30 : "2023-05-12", 40 : "2023-04-27"}
        '''
        
        self.parameters = copy.deepcopy(parameters)
        self.simulation_parameters = copy.deepcopy(simulation_parameters)
        self.analysis_start_date = copy.deepcopy(analysis_start_date)
        
        self.parameter_map = {}
        self._create_simulation_parameter_map()
        
    def _create_simulation_parameter_map(self):
        
        identifier = 1
        
        for rolling_periods in self.simulation_parameters["rolling_periods"]:
            
            for zero_rtd_flag in self.simulation_parameters["zero_rtd_flag"]:
                
                        for outlier_flag in self.simulation_parameters["outlier_flag"]:
                        
                                self.parameter_map[identifier] = {"rolling_periods" : rolling_periods, 
                                                                  "zero_rtd_flag" : zero_rtd_flag, 
                                                                  "outlier_flag" : outlier_flag,
                                                                  "train_sample_size" : None,
                                                                  "test_sample_size" : None,
                                                                  "maximize_returns" : None}
                                identifier+=1
    
    def _initial_simulation(self):
        
        params_list = []
        for identifier, simulation_parameters in self.parameter_map.items():
            
            params_list.append({"simulation_parameters": copy.deepcopy(simulation_parameters),
                      "parameters": copy.deepcopy(self.parameters),
                      "identifier" : identifier})
        
        
        with multiprocessing.Pool() as pool:
            res = pool.map(Live_Strategy_Filter.Simulate, iterable = params_list)

        sim_results = pd.concat(res)
        sim_results.to_csv('sim_res.csv')
        
        '''
        sim_results = pd.read_csv('sim_res.csv').drop(columns = ['Unnamed: 0'])
        '''
        
        sim_results.sort_values(['date', 'identifier', 'current_week_limit', 'next_week_limit', 'rtd_threshold_lower', 'rtd_threshold_upper'], inplace=True)
        print("Initial Simulation Complete")
        return sim_results

    def _calc_portfolio_performance(self, temp_sim, split_dates):

        if split_dates is None:
            
            returns = temp_sim.sort_values('date').groupby('date').mean().returns
            drawdown = copy.deepcopy(returns)
            drawdown[drawdown > 0] = 0
            return {"average_returns" : np.mean(returns),
                    "absolute_return_volatility" : np.std(returns),
                    "average_MDD" : np.mean(drawdown)}
        
        else:
            split_dates = sorted(split_dates)
            average_returns_list = []
            average_MDD_list =[]

            for i in range(len(split_dates)):

                if i==0:
                    temp = temp_sim[temp_sim.date <= split_dates[i]]
                else:
                    temp = temp_sim[(temp_sim.date > split_dates[i-1]) & (temp_sim.date <= split_dates[i])]

                cumulative_returns = (1 + temp.groupby('date').mean().returns).cumprod()
                average_MDD = min(-1 + cumulative_returns/cumulative_returns.cummax().apply(lambda x: max(1, x)))
                average_returns = cumulative_returns.tolist()[-1]**(1/len(temp.date.unique())) - 1
                average_returns_list.append(average_returns)
                average_MDD_list.append(average_MDD)

            return {"average_returns" : np.mean(average_returns_list),
                    "absolute_return_volatility" : np.std(average_returns_list),
                    "average_MDD" : np.mean(average_MDD_list)}

    def _filter_best_micro_params(self, simulation, maximize_returns = True, top_n = 1):

        sim = copy.deepcopy(simulation)
        current_week_limit_list = []
        next_week_limit_list = []
        rtd_threshold_lower_list = []
        rtd_threshold_upper_list = []

        average_returns = []
        absolute_return_volatility = []
        average_MDD = []
    
        performance = pd.DataFrame()
        date_list = sorted(sim.date.unique())
        split_dates = None

        for current_week_limit in sorted(sim.current_week_limit.unique().tolist()):
            for next_week_limit in sorted(sim.next_week_limit.unique().tolist()):
                for rtd_threshold_lower, rtd_threshold_upper in sorted(set((zip(sim.rtd_threshold_lower, sim.rtd_threshold_upper)))):

                    current_week_limit_list.append(current_week_limit)
                    next_week_limit_list.append(next_week_limit)
                    rtd_threshold_lower_list.append(rtd_threshold_lower)
                    rtd_threshold_upper_list.append(rtd_threshold_upper)

                    temp_sim = sim[(sim.current_week_limit == current_week_limit) & 
                                   (sim.next_week_limit == next_week_limit) & 
                                   (sim.rtd_threshold_lower == rtd_threshold_lower) &
                                   (sim.rtd_threshold_upper == rtd_threshold_upper)]

                    perf = self._calc_portfolio_performance(temp_sim, split_dates)
                    average_returns.append(perf["average_returns"])
                    absolute_return_volatility.append(perf["absolute_return_volatility"])
                    average_MDD.append(perf["average_MDD"])

        performance["current_week_limit"] = current_week_limit_list
        performance["next_week_limit"] = next_week_limit_list
        performance["rtd_threshold_lower"] = rtd_threshold_lower_list
        performance["rtd_threshold_upper"] = rtd_threshold_upper_list
        performance["average_returns"] = average_returns
        performance["absolute_return_volatility"] = absolute_return_volatility
        performance["average_MDD"] = average_MDD        
        performance["sharpe"] = np.where(performance.absolute_return_volatility == 0, 
                                         np.where(performance.average_returns == 0, 0, np.where(performance.average_returns>0, np.inf, -np.inf)), 
                                         performance.average_returns / performance.absolute_return_volatility)

        performance = performance[performance.average_MDD >= performance.average_MDD.quantile(0.1)]
        performance = performance[performance.sharpe >= performance.sharpe.quantile(0.9)]

        if maximize_returns:
            performance.sort_values(['average_returns', 'absolute_return_volatility', 'current_week_limit', 
                                     'next_week_limit', 'rtd_threshold_lower', 'rtd_threshold_upper', 'average_MDD'], 
                                    ascending = [False, True, False, False, False, True, False], inplace = True)

        else:
            performance.sort_values(['absolute_return_volatility', 'average_returns', 'current_week_limit', 
                                     'next_week_limit', 'rtd_threshold_lower', 'rtd_threshold_upper', 'average_MDD'], 
                                    ascending = [True, False, False, False, False, True, False], inplace = True)
        
        return performance.head(top_n).to_dict('records')
        '''
        {'current_week_limit': 1,
         'next_week_limit': 1,
         'rtd_threshold_lower': 0,
         'rtd_threshold_upper': 100,
         'average_returns': 0.045103822092706025,
         'absolute_return_volatility': 0.011403502790589215,
         'average_MDD': -0.23581863551409965,
         'sharpe': 3.955260319656179}
        '''

    def _date_simulator(self, date_list, sim, macro_parameter):

        main_sim = []
        previous_train_date = None
        previous_train_index = None

        current_train_date = None
        current_train_index = None

        current_test_date = None
        current_test_index = None

        while current_test_index != len(date_list):

            if previous_train_index is None:
                previous_train_index = 0
                current_train_index = macro_parameter["train_sample_size"]
            else:
                previous_train_index += macro_parameter["test_sample_size"]
                current_train_index += macro_parameter["test_sample_size"]

            if len(date_list) - (current_train_index + macro_parameter["test_sample_size"]) - 1 < 0.5 * macro_parameter["test_sample_size"]:
                current_test_index = len(date_list)

            else:
                current_test_index = current_train_index + macro_parameter["test_sample_size"]

            previous_train_date = date_list[previous_train_index] #include in train
            current_train_date = date_list[current_train_index-1] #include in train exclude in test
            current_test_date = date_list[current_test_index-1] #include in test

            #train
            temp_sim = sim[(sim.date >= previous_train_date) & (sim.date <= current_train_date)]
            train_params_list = self._filter_best_micro_params(temp_sim, 
                                                          maximize_returns = macro_parameter["maximize_returns"], 
                                                          top_n = macro_parameter["top_n"])
            
            main_sim_temp = None
            for train_params in train_params_list:
                
                temp_sim = sim[(sim.date > current_train_date) & (sim.date <= current_test_date) & 
                                    (sim.current_week_limit == train_params["current_week_limit"]) &
                                    (sim.next_week_limit == train_params["next_week_limit"]) &
                                    (sim.rtd_threshold_lower == train_params["rtd_threshold_lower"]) &
                                    (sim.rtd_threshold_upper == train_params["rtd_threshold_upper"])]
                
                if main_sim_temp is None:
                    main_sim_temp = copy.deepcopy(temp_sim)
                else:
                    dummy_date_list = main_sim_temp[main_sim_temp.underlying == "DUMMY"].date.unique()
                    main_sim_temp = pd.concat([main_sim_temp[~main_sim_temp.date.isin(dummy_date_list)], temp_sim[temp_sim.date.isin(dummy_date_list)]])                    
                    
            main_sim.append(main_sim_temp)

        main_sim = pd.concat(main_sim).sort_values('date')
        
        #create splitting dates for performance evaluation
        i = -1
        split_dates = []
        date_list_temp = sorted(main_sim.date.unique())
        while i != len(date_list_temp) - 1:
            i = min(i+5, len(date_list_temp) - 1)
            split_dates.append(date_list_temp[i])

        #evaluate performance and return
        return self._calc_portfolio_performance(main_sim, split_dates)
    
    def Hyperparameter_Tuner(self):
        
        
        sim_results = self._initial_simulation()
        sim_results.sort_values(['date', 'identifier'], inplace = True)
        performance_summary = []

        for identifier, simulation_parameters in self.parameter_map.items():
            
            start = time.time()
            print(f"Identifier : {identifier}")
            sim = sim_results[sim_results.identifier == identifier]
            macro_parameter = copy.deepcopy(self.parameter_map[identifier])
            date_list = sorted(sim.date.unique())
            
            for train_sample_size in self.simulation_parameters["train_sample_size"]:
                
                for test_sample_size in self.simulation_parameters["test_sample_size"]:
                    
                    for maximize_returns in self.simulation_parameters["maximize_returns"]:
                        
                        for top_n in self.simulation_parameters["top_n"]:
                        
                            macro_parameter["train_sample_size"] = train_sample_size
                            macro_parameter["test_sample_size"] = test_sample_size
                            macro_parameter["maximize_returns"] = maximize_returns
                            macro_parameter["top_n"] = top_n

                            perf = self._date_simulator(date_list, 
                                                        sim[sim.date >= self.analysis_start_date[train_sample_size]], 
                                                        macro_parameter)
                            perf.update(macro_parameter)
                            performance_summary.append(perf)
                            
            print(f"Took {(time.time() - start)/60} minutes")
                        
        performance_summary = pd.DataFrame(performance_summary)
        performance_summary["date"] = self.parameters['prediction_date']
        performance_summary.to_csv('performance_summary.csv')

        performance_summary["sharpe"] = np.where(performance_summary.absolute_return_volatility == 0, 
                                         np.where(performance_summary.average_returns == 0, 0, np.where(performance_summary.average_returns>0, np.inf, -np.inf)), 
                                         performance_summary.average_returns / performance_summary.absolute_return_volatility)

        performance_summary = performance_summary[performance_summary.average_MDD >= performance_summary.average_MDD.quantile(0.5)]
        performance_summary = performance_summary[performance_summary.sharpe >= performance_summary.sharpe.quantile(0.9)]
        performance_summary.sort_values(['average_returns', 'absolute_return_volatility', 'average_MDD', 'top_n'], ascending = [False, True, False, True], inplace = True)

        print(client.Strategy.Profit_Maximizing_Hyperparams.delete_many({"date" : self.parameters["prediction_date"]}).deleted_count, f" records deleted Strategy.Profit_Maximizing_Hyperparams")
        print(len(client.Strategy.Profit_Maximizing_Hyperparams.insert_many(performance_summary.to_dict('records')[:1]).inserted_ids), f" records inserted for Strategy.Profit_Maximizing_Hyperparams")
        
        return performance_summary

In [None]:
parameters = {"start_date" : "2022-04-22",
              "end_date" : "2023-12-29", 
              "prediction_date" : "2023-12-29"}

simulation_parameters = {"rolling_periods" : [150, 200, 250, -1], 
                         "zero_rtd_flag" : [True, False],
                         "outlier_flag": [True, False], 
                         "train_sample_size" : [10, 20, 30, 40],
                         "test_sample_size" : [5, 10],
                         "maximize_returns" : [True, False], 
                         "top_n" : [1, 2, 3]}

analysis_start_date = {10 : "2023-06-09", 20 : "2023-05-26", 30 : "2023-05-12", 40 : "2023-04-27"}

self = Live_Strategy_Simulator(parameters, simulation_parameters, analysis_start_date)

In [None]:
start = time.time()
performance_summary = self.Hyperparameter_Tuner()
print(time.time()-start)

In [None]:
#performance_summary.to_csv('performance_summary.csv')

performance_summary = pd.read_csv('performance_summary.csv').drop(columns = ['Unnamed: 0'])

performance_summary["sharpe"] = np.where(performance_summary.absolute_return_volatility == 0, 
                                 np.where(performance_summary.average_returns == 0, 0, np.where(performance_summary.average_returns>0, np.inf, -np.inf)), 
                                 performance_summary.average_returns / performance_summary.absolute_return_volatility)

performance_summary = performance_summary[performance_summary.average_MDD >= performance_summary.average_MDD.quantile(0.5)]
performance_summary = performance_summary[performance_summary.sharpe >= performance_summary.sharpe.quantile(0.95)]
performance_summary.sort_values(['average_returns', 'absolute_return_volatility', 'average_MDD', 'top_n'], ascending = [False, True, False, True], inplace = True)


In [None]:
performance_summary

In [None]:
performance_summary

In [None]:
print(len(client.Strategy.Profit_Maximizing_Hyperparams.insert_many(performance_summary.head(1).to_dict('records')).inserted_ids))
