In [None]:
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

from IPython.display import display, HTML
display(HTML("<style>.container { width:90% !important; }</style>"))

In [None]:
pip install baostock

In [None]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import math
import baostock as bs
import backtrader as bt
from datetime import datetime, time
import copy

pd.set_option('display.max_columns', 100)
pd.set_option('display.max_rows', 500)

In [None]:
def get_modified_data_from_local():
    ohlc_data_train = {}
    ohlc_data_test = {}
    for file_name in os.listdir('all_data'):
        df = pd.read_csv(f'all_data/{file_name}')
        df['time'] = pd.to_datetime(df['time'])
        df = df[df['time'].dt.date >= pd.to_datetime('2022-04-01').date()]
        df_train = df[df['time'].dt.date < pd.to_datetime('2022-07-01').date()]
        df_test = df[df['time'].dt.date >= pd.to_datetime('2022-07-01').date()]

        ohlc_data_train[file_name.replace('.csv', '')] = df_train
        ohlc_data_test[file_name.replace('.csv', '')] = df_test
        
    copied_ohlc_data_train = copy.deepcopy(ohlc_data_train)
    for stock, df in copied_ohlc_data_train.items():
        if df.shape[0] == 0:
            del ohlc_data_train[stock]

    copied_ohlc_data_test = copy.deepcopy(ohlc_data_test)
    for stock, df in copied_ohlc_data_test.items():
        if df.shape[0] == 0:
            del ohlc_data_test[stock]
            
    return ohlc_data_train, ohlc_data_test

def get_composition(date):
    lg = bs.login()

    print('login respond error_code:'+lg.error_code)
    print('login respond error_msg:'+lg.error_msg)

    rs = bs.query_zz500_stocks(date)
    print('query_zz500 error_code:'+rs.error_code)
    print('query_zz500 error_msg:'+rs.error_msg)

    zz500_stocks = []
    while (rs.error_code == '0') & rs.next():
        zz500_stocks.append(rs.get_row_data())
    result = pd.DataFrame(zz500_stocks, columns=rs.fields)

    bs.logout()
    return result
    
def data_stock(code, start, end, freq):
    
    rs = bs.query_history_k_data_plus(code,
        "time,code,open,high,low,close,volume,amount,adjustflag",
        start_date=start, end_date=end,
        frequency=freq, adjustflag="3")
    print('query_history_k_data_plus respond error_code:'+rs.error_code)
    print('query_history_k_data_plus respond error_msg:'+rs.error_msg)
    
    data_list = []
    while (rs.error_code == '0') & rs.next():
        data_list.append(rs.get_row_data())
    result = pd.DataFrame(data_list, columns=rs.fields)
    
    result['open'] = result['open'].astype(float)
    result['high'] = result['high'].astype(float)
    result['close'] = result['close'].astype(float)
    result['low'] = result['low'].astype(float)
    result['volume'] = result['volume'].astype(float)
    result['amount'] = result['amount'].astype(float)
    result['adjustflag'] = result['adjustflag'].astype(float)
    result['time'] = pd.to_datetime(result['time'], format='%Y%m%d%H%M%S')
    result.set_index('time',inplace=True)

    return result

def calculate_rsi_divergence(data, window=14):
    data['RSI'] = 100 - (100 / (1 + (data['close'].diff(1).fillna(0).apply(lambda x: np.where(x > 0, x, 0)).rolling(window=window).mean() / data['close'].diff(1).fillna(0).apply(lambda x: np.where(x < 0, -x, 0)).rolling(window=window).mean())))
    data['RSI_Average'] = data['RSI'].rolling(window=window).mean()
    data['RSI_Divergence'] = (data['RSI'] - data['RSI_Average'])/data['RSI_Average']
    data.drop(columns={'RSI', 'RSI_Average'}, inplace=True)
    
    return data

def calculate_bollinger_Zscore(data, window=20):
    data['MA'] = data['close'].rolling(window=window).mean()
    data['StdDev'] = data['close'].rolling(window=window).std()
    data['BB_Zvalue'] = (data['close'] - data['MA'])/data['StdDev']
    data.drop(columns={'MA', 'StdDev'}, inplace=True)
    
    return data

def calculate_williams_percent_r(data, window=14):
    highest_high = data['high'].rolling(window=window).max()
    lowest_low = data['low'].rolling(window=window).min()
    data['Williams_%R'] = ((highest_high - data['close']) / (highest_high - lowest_low)) * -100
    return data

def calculate_volume_relative_to_average(data, window=20):
    data['Volume_Average'] = data['volume'].rolling(window=window).mean()
    data['Volume_Ratio'] = data['volume'] / data['Volume_Average']
    data.drop(columns={'Volume_Ratio'}, inplace=True)
    
    return data

def calculate_rate_of_change(data, window=1):
    data['ROC'] = ((data['close'] - data['close'].shift(window)) / data['close'].shift(window)) * 100
    return data

def data_modification(stock, df):
    df = calculate_rsi_divergence(df)
    df = calculate_bollinger_Zscore(df)
    df = calculate_williams_percent_r(df)
    df = calculate_volume_relative_to_average(df)
    df = calculate_rate_of_change(df)

    columns_to_shift = ['RSI_Divergence', 'BB_Zvalue', 'Williams_%R', 'Volume_Average', 'ROC']

    for column in columns_to_shift:
        df[column] = df[column].shift(1)
        
    return df

def generate_modified_stock_data(start, end, composition_date, freq):
    ohlc_data = {} 
    lg = bs.login()
    
    composition_data = get_composition(composition_date)
    
    stocks_id = [stock for stock in composition_data['code']]
    
    for stock in stocks_id:
        try:
            stock_data = data_stock(stock, start, end, freq)
            ohlc_data[stock] = pd.DataFrame(stock_data)
            ohlc_data[stock] = data_modification(stock, ohlc_data[stock])
            ohlc_data[stock] = ohlc_data[stock][ohlc_data[stock].index.date >= pd.to_datetime('2022-07-01')] # for testing data
        except Exception:
            print(f'Unable to download data for stock: {stock}')
    bs.logout()
    
    return ohlc_data

In [None]:
ohlc_data_train, ohlc_data_test = get_modified_data_from_local()

In [None]:
class CustomPandasData(bt.feeds.PandasData):
    cols = [
        'time', 'open', 'high', 'low', 'close', 'volume', 'amount',
        'adjustflag', 'RSI_Divergence', 'BB_Zvalue', 'Williams',
        'Volume_Average', 'ROC'
    ]
    
    lines = ('RSI_Divergence', 'BB_Zvalue', 'Williams', 'Volume_Average', 'ROC')

    params = (
        ('time', None),
        ('open', 'open'),
        ('high', 'high'),
        ('low', 'low'),
        ('close', 'close'),
        ('volume', 'volume'),
        ('amount', 'amount'),
        ('adjustflag', 'adjustflag'),
        ('RSI_Divergence', 'RSI_Divergence'),
        ('BB_Zvalue', 'BB_Zvalue'),
        ('Williams', 'Williams'),
        ('Volume_Average', 'Volume_Average'),
        ('ROC', 'ROC'),
    )

In [None]:
class SelectStocks(bt.Strategy):
    params = {
        'closing_time': time(14, 30, 0),
    }

    def __init__(self):
        self.stocks_selected = []
        self.RSI = {}
        self.BB = {}
        self.WR = {}
        self.VA = {}
        self.ROC = {}
        self.buy_signal = -1
        
        for i, d in enumerate(self.datas):
            self.RSI[d._name] = d.RSI_Divergence
            self.BB[d._name] = d.BB_Zvalue
            self.WR[d._name] = d.Williams
            self.VA[d._name] = d.Volume_Average
            self.ROC[d._name] = d.ROC

    def next(self):
        if self.buy_signal == -1:

            RSI_list = {d._name: self.RSI[d._name][0] for d in self.datas}
            BB_list = {d._name: self.BB[d._name][0] for d in self.datas}
            WR_list = {d._name: self.WR[d._name][0] for d in self.datas}
            VA_list = {d._name: self.VA[d._name][0] for d in self.datas}
            ROC_list = {d._name: self.ROC[d._name][0] for d in self.datas}
            
            sorted_stocks_RSI = sorted(RSI_list.items(), key=lambda x: x[1])
            sorted_stocks_BB = sorted(BB_list.items(), key=lambda x: x[1])
            sorted_stocks_WR = sorted(WR_list.items(), key=lambda x: x[1])
            sorted_stocks_VA = sorted(VA_list.items(), key=lambda x: x[1])
            sorted_stocks_ROC = sorted(ROC_list.items(), key=lambda x: x[1])
            
            ranks = {}
            for i, (stock, _) in enumerate(sorted_stocks_RSI):
                ranks[stock] = i + 1  # RSI rank
            for i, (stock, _) in enumerate(sorted_stocks_BB):
                ranks[stock] += i + 1  # BB rank
            for i, (stock, _) in enumerate(sorted_stocks_WR):
                ranks[stock] += i + 1  # WR rank
            for i, (stock, _) in enumerate(sorted_stocks_VA):
                ranks[stock] += i + 1  # VA rank
            for i, (stock, _) in enumerate(sorted_stocks_ROC):
                ranks[stock] += i + 1  # ROC rank

            # Calculate average rank for each stock across all indicators
            for stock in ranks:
                ranks[stock] /= 5  # 5 indicators

            sorted_stocks_avg_rank = sorted(ranks.items(), key=lambda x: x[1])

            self.stocks_selected = [stock[0] for stock in sorted_stocks_avg_rank[:100]]

            self.buy_stocks()

        elif (self.buy_signal == 1) and (self.datas[0].datetime.time() == self.params.closing_time):
            for i, d in enumerate(self.datas):
                if self.getposition(data=d).size != 0:
                    self.close(data=d)
                    self.buy_signal = -1

    def buy_stocks(self):
        weight = 1 / len(self.stocks_selected)
        for i, d in enumerate(self.datas):
            if d._name in self.stocks_selected:
                self.order_target_percent(data=d, target=weight)
                self.buy_signal = 1

In [None]:
def backtest_selectstock(ohlc_data):

    cerebro = bt.Cerebro()
    for stock, dataframe in ohlc_data.items():
        dataframe['time'] = pd.to_datetime(dataframe['time'])
        dataframe.set_index('time',inplace=True)
        dataframe.rename(columns={'Williams_%R':'Williams'}, inplace=True)
        data_feed = CustomPandasData(dataname=dataframe)
        cerebro.adddata(data_feed, name=stock)

    cerebro.addstrategy(SelectStocks)

    cerebro.addanalyzer(bt.analyzers.PyFolio, _name='pyfolio')

    cerebro.broker.setcash(100000)
    entry = cerebro.broker.getvalue()
    results = cerebro.run()
#     cerebro.plot()
    exit = cerebro.broker.getvalue()
    pyfoliozer = results[0].analyzers.getbyname('pyfolio')
    returns, positions, transactions, _ = pyfoliozer.get_pf_items()
    
    return (returns, positions, transactions)

In [None]:
class HoldStrategy(bt.Strategy):
    params = {
        'entry_time': time(10, 0, 0),
        'exit_time': time(14, 30, 0),
        'entry_date': pd.to_datetime('2022-04-07').date(),
        'exit_date': pd.to_datetime('2022-06-30').date(),
    }

    def __init__(self,**kwargs):
        self.buy_signal = -1
        self.stocks_selected = [d._name for d in self.datas]
        self.entry_time = kwargs.get("entry_time", self.params.entry_time)
        self.exit_time = kwargs.get("exit_time", self.params.exit_time)

    def next(self):
        
        if (self.datas[0].datetime.time() == self.params.entry_time) and (self.datas[0].datetime.date() == self.params.entry_date):
            self.buy_stocks()
        
        if (self.datas[0].datetime.time() == self.params.exit_time) and (self.datas[0].datetime.date() == self.params.exit_date):
            for i, d in enumerate(self.datas):
                if self.getposition(data=d).size != 0:
                    self.close(data=d)
                    self.buy_signal = -1

    def buy_stocks(self):
        weight = 1 / len(self.stocks_selected)
        for i, d in enumerate(self.datas):
            if d._name in self.stocks_selected:
                self.order_target_percent(data=d, target=weight)
                self.buy_signal = 1

In [None]:
def backtest_holdstrategy(ohlc_data, entry_date, exit_date):
    cerebro = bt.Cerebro()

    for stock, dataframe in ohlc_data.items():
        dataframe.rename(columns={'Williams_%R':'Williams'}, inplace=True)
        data_feed = CustomPandasData(dataname=dataframe)
        cerebro.adddata(data_feed, name=stock)
    params = {}
    params['entry_date'] = pd.to_datetime(entry_date).date()
    params['exit_date'] = pd.to_datetime(exit_date).date()
    cerebro.addstrategy(HoldStrategy, **params)

    cerebro.addanalyzer(bt.analyzers.PyFolio, _name='pyfolio')

    cerebro.broker.setcash(100000)
    entry = cerebro.broker.getvalue()

    results = cerebro.run()
#     cerebro.plot()
    exit = cerebro.broker.getvalue()
    pyfoliozer = results[0].analyzers.getbyname('pyfolio')
    returns, positions, transactions, _ = pyfoliozer.get_pf_items()
    
    return (returns, positions, transactions)

In [None]:
result = backtest_selectstock(ohlc_data_train)
print('Overall return: ', result[0].cumsum()[-1])

In [None]:
result[2] #transactions

In [None]:
result = backtest_holdstrategy(ohlc_data_train, '2022-04-07', '2022-06-30')
print('Overall return: ', result[0].cumsum()[-1])

In [None]:
result[2] #transactions

In [None]:
result1 = backtest_selectstock(ohlc_data_test)
result2 = backtest_holdstrategy(ohlc_data, '2022-07-01', '2022-07-31')