In [7]:
!pip install backtrader

Defaulting to user installation because normal site-packages is not writeable
Collecting backtrader
  Downloading backtrader-1.9.78.123-py2.py3-none-any.whl (419 kB)
     ---------------------------------------- 0.0/419.5 kB ? eta -:--:--
     ------------------------------------  409.6/419.5 kB 12.9 MB/s eta 0:00:01
     -------------------------------------- 419.5/419.5 kB 8.9 MB/s eta 0:00:00
Installing collected packages: backtrader
Successfully installed backtrader-1.9.78.123




In [8]:
import numpy as np
import pandas as pd
import statsmodels.api as sm
import scipy.stats as st

import backtrader as bt
import backtrader.feeds as btfeeds
import backtrader.indicators as btind
import backtrader.analyzers as btanalyzers

import math
import datetime
import pytz

# from ipynb.fs.full.Task1 import get_files
# from ipynb.fs.full.Task2 import find_suitable_pairs
from BackTrader import *
# from BackTrader import PandasData, PairTradingStrategy
# from BackTrader import CommInfoFloat, DataAnalyzer, CashValueAnalyzer, OrderAnalyzer

In [9]:
# import backtrader as bt
import backtrader.feeds as btfeeds
import backtrader.indicators as btind
import backtrader.analyzers as btanalyzers
from enum import Enum
import pandas as pd
import numpy as np
import statsmodels.api as sm
import matplotlib

import math
import datetime

#This is the data feed to be fed into the backtesting lib
class PandasData(btfeeds.PandasData):
    params = (
        # Possible values for datetime (must always be present)
        #  None : datetime is the "index" in the Pandas Dataframe
        #  -1 : autodetect position or case-wise equal name
        #  >= 0 : numeric index to the colum in the pandas dataframe
        #  string : column name (as index) in the pandas dataframe
        ('datetime', None),

        ('open', 'Open'),
        ('high', 'High'),
        ('low', 'Low'),
        ('close', 'Adj Close'),
        ('openinterest', None),
    )
    
class Log(btind.Indicator):
    """Calculates log."""
    lines = ('log',)
    
    def next(self):
        self.l.log[0] = math.log(self.data[0])

class OLSSlopeIntercept(btind.PeriodN):
    """Calculates a linear regression using OLS."""
    _mindatas = 2  # ensure at least 2 data feeds are passed

    lines = ('slope', 'intercept',)
    params = (
        ('period', 10),
    )

    def next(self):
        p0 = pd.Series(self.data0.get(size=self.p.period))
        p1 = pd.Series(self.data1.get(size=self.p.period))
        p1 = sm.add_constant(p1)
        intercept, slope = sm.OLS(p0, p1).fit().params

        self.lines.slope[0] = slope
        self.lines.intercept[0] = intercept

class OLSSpread(btind.PeriodN):
    """Calculates the z-score of the OLS spread."""
    _mindatas = 2  # ensure at least 2 data feeds are passed
    lines = ('slope', 'spread', 'spread_mean', 'spread_std', 'zscore',)
    params = (('period', 10),)

    def __init__(self):
        data0_log = Log(self.data0)
        data1_log = Log(self.data1)
        slint = OLSSlopeIntercept(data0_log, data1_log, period=self.p.period)

        spread = data0_log - (slint.slope * data1_log + slint.intercept)
        self.l.spread = spread
        self.l.slope = slint.slope

        self.l.spread_mean = bt.ind.SMA(spread, period=self.p.period)
        self.l.spread_std = bt.ind.StdDev(spread, period=self.p.period)
        self.l.zscore = (spread - self.l.spread_mean) / self.l.spread_std
        
class Status(Enum):
    LONG = 1
    SHORT = 2
    NONE = 3
    
class PairTradingStrategy(bt.Strategy):
    """Basic pair trading strategy."""
    
    # These are just default values, pass in these values while initialising strategy
    params = dict(
        period=100,
        order_pct1=0.1,
        order_pct2=0.1,
        printout=True,
        upper=2,
        lower=2,
        symbol1="SYMBOL1",
        symbol2="SYMBOL2"
    )

    def log(self, txt, dt=None):
        if self.p.printout:
            dt = dt or self.data.datetime[0]
            dt = bt.num2date(dt)
            print('%s, %s' % (dt.isoformat(), txt))

    def notify_order(self, order):
        if order.status in [bt.Order.Submitted, bt.Order.Accepted]:
            return  # Await further notifications

        if order.status == order.Completed:
            if order.isbuy():
                buytxt = 'BUY COMPLETE {}, size = {:.2f}, price = {:.2f}'.format(
                    order.data._name, order.executed.size, order.executed.price)
                self.log(buytxt, order.executed.dt)
            else:
                selltxt = 'SELL COMPLETE {}, size = {:.2f}, price = {:.2f}'.format(
                    order.data._name, order.executed.size, order.executed.price)
                self.log(selltxt, order.executed.dt)

        elif order.status in [order.Expired, order.Canceled, order.Margin]:
            self.log('%s ,' % order.Status[order.status])
            pass  # Simply log

        # Allow new orders
        self.orderid = None

    def __init__(self):
        # To control operation entries
        self.orderid = None
        self.order_pct1 = self.p.order_pct1
        self.order_pct2 = self.p.order_pct2
        self.upper = self.p.upper
        self.lower = self.p.lower
        
        self.transform = OLSSpread(self.data0, self.data1, period=self.p.period)

        self.spread = self.transform.spread
        self.zscore = self.transform.zscore
        self.slope = self.transform.slope
        
        self.status = Status.NONE
        self.symbol1 = self.p.symbol1
        self.symbol2 = self.p.symbol2

    def next(self):
        if self.orderid:
            return  # if an order is active, no new orders are allowed
        
        if self.zscore[0] > self.upper and self.status != Status.SHORT:
            # Short sell stock 1
            self.order_target_percent(data=self.data0, target=-self.order_pct1)
            # Buy stock 2
            self.order_target_percent(data=self.data1, target=self.order_pct2)
            
            self.status = Status.SHORT

        elif self.zscore[0] < self.lower and self.status != Status.LONG:
            # Short sell stock 2
            self.order_target_percent(data=self.data1, target=-self.order_pct2)
            # Buy stock 1
            self.order_target_percent(data=self.data0, target=self.order_pct1)
                     
            self.status = Status.LONG
        
#         # Sample strategy does not close position when zscore reaches zero again, 
#         # only reverses position when it swings the other way
#         # uncomment below if desired behaviour is to close at z-score = 0
#         elif self.zscore[0] <= 0 and self.status == Status.SHORT:
#             # Close position
#             self.order_target_percent(data=self.data1, target=0)
#             self.order_target_percent(data=self.data0, target=0)
#             self.status = Status.NONE
        
#         elif self.zscore[0] >= 0 and self.status == Status.LONG:
#             # Close position
#             self.order_target_percent(data=self.data1, target=0)
#             self.order_target_percent(data=self.data0, target=0)
#             self.status = Status.NONE

    def stop(self):
        if self.p.printout:
            print('==================================================')
            print('Starting Value - %.2f' % self.broker.startingcash)
            print('Ending   Value - %.2f' % self.broker.getvalue())
            print('==================================================')


# This this can be added to Cerebro allow fractional shares (for this like crypto)
# By default backtrader doesnt allow that.
class CommInfoFloat(bt.CommInfoBase):
    """Commission schema that keeps size as float."""
    params = (
        ('stocklike', True),
        ('commtype', bt.CommInfoBase.COMM_PERC),
        ('percabs', True),
      )
    
    def getsize(self, price, cash):
        if not self._stocklike:
            return self.p.leverage * (cash / self.get_margin(price))

        return self.p.leverage * (cash / price)


class DataAnalyzer(bt.analyzers.Analyzer):
    """Analyzer to extract OHLCV."""
    def create_analysis(self):
        self.rets0 = {}
        self.rets1 = {}

    def next(self):
        self.rets0[self.strategy.datetime.datetime()] = [
            self.data0.open[0],
            self.data0.high[0],
            self.data0.low[0],
            self.data0.close[0],
            self.data0.volume[0]
        ]
        self.rets1[self.strategy.datetime.datetime()] = [
            self.data1.open[0],
            self.data1.high[0],
            self.data1.low[0],
            self.data1.close[0],
            self.data1.volume[0]
        ]

    def get_analysis(self):
        return self.rets0, self.rets1

class CashValueAnalyzer(bt.analyzers.Analyzer):
    """Analyzer to extract cash and value."""
    def create_analysis(self):
        self.rets = {}

    def notify_cashvalue(self, cash, value):
        self.rets[self.strategy.datetime.datetime()] = self.strategy.broker.getvalue()

    def get_analysis(self):
        return self.rets
    
class OrderAnalyzer(bt.analyzers.Analyzer):
    """Analyzer to extract order price, size, value, and paid commission."""
    def create_analysis(self):
        self.rets0 = {}
        self.rets1 = {}

    def notify_order(self, order):
        if order.status == order.Completed:
            if order.data._name == self.strategy.symbol1:
                rets = self.rets0
            else:
                rets = self.rets1
            rets[self.strategy.datetime.datetime()] = (
                order.executed.price,
                order.executed.size,
                -order.executed.size * order.executed.price,
                order.executed.comm
            )

    def get_analysis(self):
        return self.rets0, self.rets1

In [10]:
def calculate_volatility(values):
    values = pd.Series(values)
    log_return = np.log(values/values.shift())
    volatility = log_return.std()*252**.5
    return volatility*100

In [50]:
start_date = '2021-01-01'
end_date = '2023-09-29'

In [51]:
import yfinance as yf
CSI300 = yf.download("000300.SS", start=start_date, end=end_date)
CSI300 = CSI300.drop(columns=['Volume'])
CSI300

[*********************100%%**********************]  1 of 1 completed


Unnamed: 0_level_0,Open,High,Low,Close,Adj Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2021-03-11,5024.560059,5138.410156,5020.580078,5128.220215,5128.220215
2021-03-12,5153.669922,5153.669922,5086.819824,5146.379883,5146.379883
2021-03-15,5116.120117,5120.879883,4992.399902,5035.540039,5035.540039
2021-03-16,5054.410156,5084.310059,5009.950195,5079.359863,5079.359863
2021-03-17,5062.770020,5123.549805,5020.129883,5100.859863,5100.859863
...,...,...,...,...,...
2023-09-22,3672.639893,3740.419922,3664.770020,3738.929932,3738.929932
2023-09-25,3738.060059,3738.060059,3711.199951,3714.600098,3714.600098
2023-09-26,3711.709961,3723.850098,3692.620117,3692.889893,3692.889893
2023-09-27,3694.959961,3731.139893,3694.389893,3700.500000,3700.500000


In [52]:
class BuyAndHold(bt.Strategy):
    def start(self):
        self.val_start = self.broker.get_cash()  # keep the starting cash

    def nextstart(self):
        # Buy all the available cash
        self.order_target_value(target=self.broker.get_cash())

    def stop(self):
        pass
#         # calculate the actual returns
#         self.roi = (self.broker.get_value() / self.val_start) - 1.0
#         print('ROI:        {:.2f}%'.format(100.0 * self.roi))

In [53]:
cerebro = bt.Cerebro()
csi300_datafeed = PandasData(dataname=CSI300, name="CSI300")
cerebro.adddata(csi300_datafeed)
cerebro.addstrategy(BuyAndHold)
cerebro.addanalyzer(btanalyzers.DrawDown, _name='drawdown')
cerebro.addanalyzer(btanalyzers.SharpeRatio_A, _name='sharpe')
cerebro.addanalyzer(btanalyzers.Returns, _name='returns')
cerebro.broker.setcash(100000)
res = cerebro.run()[0]
print("Sharpe Ratio: ", res.analyzers.sharpe.get_analysis()["sharperatio"])
print("Annualised Return: ", res.analyzers.returns.get_analysis()["rnorm100"])
print("Max Drawdown: ", res.analyzers.drawdown.get_analysis()["max"]["drawdown"])
print("Annualised Volatilty: ", calculate_volatility(CSI300["Close"]))
# print()
# pairs.at[indexes[index], "SharpeRatio"] = res.analyzers.sharpe.get_analysis()["sharperatio"]
#         pairs.at[indexes[index], "Annualised Return(%)"] = res.analyzers.returns.get_analysis()["rnorm100"]
#         pairs.at[indexes[index], "Max Drawdown(%)"] = res.analyzers.drawdown.get_analysis()["max"]["drawdown"]
#         pairs.at[indexes[index], "Annualised Volatility(%)"] = volatility

Sharpe Ratio:  -1.3755446845973969
Annualised Return:  -12.335249833833736
Max Drawdown:  33.62545741806995
Annualised Volatilty:  17.395054583234607


In [18]:
PRICE_DIR = "individual_data"
STOCK_PRICE = {}

In [162]:
# Adjust these params for backtesting
PARAMS = {
    "CASH" : 100000,
    "UPPER" : st.norm.ppf(1 - 0.05 / 2),
    "LOWER" : -st.norm.ppf(1 - 0.05 / 2),
    "ORDER_PCT1" : 0.5, # Essentially order size (how much pct of current portfolio you want to spend for stock)
    "ORDER_PCT2" : 0.5,
    "PERIOD" : 100, # Look back period for doing rolling OLS and z-score calc.
    "COMMPERC": 0.005 # 0.5%
}

def run_backtest(stock1_name, stock2_name, stock1_df, stock2_df, PARAMS):
    stock1_datafeed = PandasData(dataname=stock1_df, name=stock1_name)
    stock2_datafeed = PandasData(dataname=stock2_df, name=stock2_name)
    print(stock1_datafeed)
    
    cerebro = bt.Cerebro()

    cerebro.adddata(stock1_datafeed)
    cerebro.adddata(stock2_datafeed)

    cerebro.addstrategy(PairTradingStrategy, 
                        period=PARAMS["PERIOD"], 
                        upper=PARAMS["UPPER"], 
                        lower=PARAMS["LOWER"], 
                        order_pct1=PARAMS["ORDER_PCT1"],
                        order_pct2=PARAMS["ORDER_PCT2"],
                        symbol1=stock1_name,
                        symbol2=stock2_name,
                        printout=False)

    cerebro.addanalyzer(btanalyzers.DrawDown, _name='drawdown')
    cerebro.addanalyzer(btanalyzers.SharpeRatio_A, _name='sharpe')
    cerebro.addanalyzer(btanalyzers.TradeAnalyzer, _name='ta')
    cerebro.addanalyzer(btanalyzers.Returns, _name='returns')
#     cerebro.addanalyzer(btanalyzers.LogReturnsRolling, _name='logrollingreturns')
    cerebro.addanalyzer(btanalyzers.AnnualReturn, _name='annualreturn')
#     cerebro.addanalyzer(btanalyzers.PositionsValue, _name='positionvalues')
    
    # Below are custom analyzers defined by the sample code, can uncomment if you need more detailed info
#     cerebro.addanalyzer(DataAnalyzer)
    cerebro.addanalyzer(CashValueAnalyzer)
    cerebro.addanalyzer(OrderAnalyzer)

    cerebro.broker.setcash(PARAMS["CASH"])
    comminfo = CommInfoFloat(commission=PARAMS["COMMPERC"])
    cerebro.broker.addcommissioninfo(comminfo)

    backtest_res = cerebro.run()[0]
    return backtest_res

In [150]:
filtered_pairs = pd.read_pickle("Data/filtered_pairs_dict.pickle")

In [151]:
filtered_pairs

{'KMeans':                                                    pair        pvalue  \
 0     (Communication Services-Communication Services...  1.673093e-29   
 2     (Communication Services-Communication Services...  0.000000e+00   
 4     (Communication Services-Communication Services...  1.037380e-19   
 5     (Communication Services-Communication Services...  4.969905e-11   
 6     (Communication Services-Communication Services...  7.538697e-20   
 ...                                                 ...           ...   
 8755  (Industry-Industry - 603806.SH.csv, Industry-I...  1.801551e-29   
 8756  (Industry-Industry - 603806.SH.csv, Materials-...  8.249050e-11   
 8757  (Industry-Industry - 688303.SH.csv, Industry-I...  3.371344e-28   
 8758  (Industry-Industry - 688303.SH.csv, Materials-...  2.869351e-28   
 8759  (Industry-Industry - 688599.SH.csv, Materials-...  2.239281e-19   
 
       hurst_exp  half_life  avg_cross_count  
 0      0.001769  13.908661       137.980328  
 2    

In [152]:
start_test = '2023-01-01'
end_test = '2023-09-28'

In [163]:
from datetime import datetime

In [171]:
TESTING_START = datetime(2023, 1, 1)
TESTING_END = datetime(2023, 9, 28)
backtest_results_raw = {}

In [198]:
STOCK_PRICE = {}

In [199]:
def read_stock_price(stock_name):
    stock_name = stock_name.replace("-", "/", 1)
    if stock_name in STOCK_PRICE:
        return STOCK_PRICE[stock_name]
    file_path = PRICE_DIR + "/" + stock_name
    df = pd.read_csv(file_path)
    df = df.drop(0)
    df = df.ffill()
    dates = pd.to_datetime(df['Date'], format="%Y%m%d", utc=True)
    dates = dates.dt.strftime("%Y-%m-%d")
    df['Date'] = dates
    dates = pd.to_datetime(dates)
    
#     print(dates)
#     print(df['Date'])
    df.index = dates
    df = df.loc[df.index >= TESTING_START]
    df = df.loc[df.index <= TESTING_END]
    STOCK_PRICE[stock_name] = df
    
    df.rename(columns={'open': 'Open', 'high': 'High', 'low': 'Low', 'close': 'Close', 'adj close': 'Adj Close'}, inplace=True)
    return df

In [200]:
for pair_strat, pairs in filtered_pairs.items():
    print("Doing Backtesting for ", pair_strat)
    pairs["SharpeRatio"] = np.nan
    pairs["Annualised Return(%)"] = np.nan
    pairs["Max Drawdown(%)"] = np.nan
    pairs["Annualised Volatility(%)"] = np.nan
    raw_results = {}
    indexes = pairs.index
    for (index, (stock1, stock2)) in enumerate(pairs["pair"]):
        print("Backtesting for ", (stock1, stock2))
        stock1_df = read_stock_price(stock1)
        stock2_df = read_stock_price(stock2)
        print(stock1_df)
        print(stock2_df)
        if stock1_df.isnull().values.any() or stock2_df.isnull().values.any():
            print(f'{stock1} or {stock2} has null values')
            continue
        res = run_backtest(stock1, stock2, stock1_df, stock2_df, PARAMS)
        portfolio_value = res.analyzers.cashvalueanalyzer.get_analysis()
        volatility = calculate_volatility(portfolio_value.values())
        orders = res.analyzers.orderanalyzer.get_analysis()
        
        # Raw results store the portfolio value at each date, and also the specific orders which were made during backtesting
        raw_results[(stock1, stock2)] = (portfolio_value, orders)        
        
        pairs.at[indexes[index], "SharpeRatio"] = res.analyzers.sharpe.get_analysis()["sharperatio"]
        pairs.at[indexes[index], "Annualised Return(%)"] = res.analyzers.returns.get_analysis()["rnorm100"]
        pairs.at[indexes[index], "Max Drawdown(%)"] = res.analyzers.drawdown.get_analysis()["max"]["drawdown"]
        pairs.at[indexes[index], "Annualised Volatility(%)"] = volatility
    backtest_results_raw[pair_strat] = raw_results

        

Doing Backtesting for  KMeans
Backtesting for  ('Communication Services-Communication Services - 002027.SZ.csv', 'Communication Services-Communication Services - 002555.SZ.csv')
            Unnamed: 0    ts_code  trade_date  Open  High   Low  Close  \
Date                                                                     
2023-09-27       26221  002027.SZ    20230927  7.29  7.43  7.26   7.36   
2023-09-26       26222  002027.SZ    20230926  7.30  7.34  7.25   7.32   
2023-09-25       26223  002027.SZ    20230925  7.34  7.36  7.24   7.27   
2023-09-22       26224  002027.SZ    20230922  7.15  7.35  7.12   7.34   
2023-09-21       26225  002027.SZ    20230921  7.23  7.25  7.12   7.16   
...                ...        ...         ...   ...   ...   ...    ...   
2023-01-09       26397  002027.SZ    20230109  6.89  7.14  6.87   7.07   
2023-01-06       26398  002027.SZ    20230106  7.41  7.46  6.81   6.88   
2023-01-05       26399  002027.SZ    20230105  6.96  7.44  6.92   7.39   
2023-01-

IndexError: array assignment index out of range

In [148]:
import pickle
pickle.dump(filtered_pairs, open('results_df.pickle', 'wb'))
pickle.dump(backtest_results_raw, open('results_raw.pickle', 'wb'))

In [3]:
import pickle

In [4]:
test = pickle.load(open('results_df.pickle', 'rb'))

In [16]:
kmeans = test['KMeans']

In [17]:
kmeans[kmeans['SharpeRatio'] > 1.5]

Unnamed: 0,pair,pvalue,hurst_exp,half_life,avg_cross_count,SharpeRatio,Annualised Return(%),Max Drawdown(%),Annualised Volatility(%)
0,"(comms-comms - AMX.csv, comms-comms - BCE.csv)",8.992734999999999e-26,-0.000929,30.519382,128.002649,11.243254,11.23529,11.035461,13.068065
2,"(comms-comms - AMX.csv, comms-comms - CMCSA.csv)",1.4471029999999998e-19,-0.000383,90.858701,134.344371,13.375081,12.274141,9.653723,11.78634
4,"(comms-comms - AMX.csv, comms-comms - RELX.csv)",2.478852e-24,-0.006052,117.074734,132.508609,1.822888,4.72245,13.467652,12.423609
15,"(comms-comms - AMX.csv, conscycl-conscycl - OR...",7.279650000000001e-17,0.003845,139.771779,124.331126,2014.955957,5.991797,15.086648,13.910517
16,"(comms-comms - AMX.csv, conscycl-conscycl - SB...",2.775241e-22,0.002339,106.314157,129.337748,3.963369,11.460155,18.258898,11.769534
17,"(comms-comms - AMX.csv, conscycl-conscycl - TJ...",1.305448e-18,-0.005652,90.469991,130.005298,11.65209,6.838383,25.062733,13.613701
32,"(comms-comms - AMX.csv, consdef-consdef - PEP....",5.229755e-21,0.002759,111.792638,129.337748,2.499694,4.841371,10.288249,11.710815
33,"(comms-comms - AMX.csv, consdef-consdef - PG.csv)",5.582688000000001e-23,0.001859,121.056678,127.668874,5.204245,3.821809,12.90775,12.638612
38,"(comms-comms - AMX.csv, consdef-consdef - WMT....",1.246935e-19,0.002352,196.287837,134.344371,3.466247,15.455565,9.971369,13.403827
44,"(comms-comms - AMX.csv, financials-financials ...",4.38598e-15,0.00192,106.710282,125.666225,2.55154,11.012015,12.845052,12.19067


In [9]:
dbscan = test['DBSCAN']

In [14]:
len(dbscan)

166

In [13]:
dbscan[dbscan['SharpeRatio'] > 1.5]

Unnamed: 0,pair,pvalue,hurst_exp,half_life,avg_cross_count,SharpeRatio,Annualised Return(%),Max Drawdown(%),Annualised Volatility(%)
190,"(comms-comms - ATVI.csv, comms-comms - BCE.csv)",2.266747e-10,0.004259,112.705866,133.009272,9.623299,9.97948,13.946644,15.951186
201,"(comms-comms - ATVI.csv, comms-comms - TMUS.csv)",1.102617e-10,0.00521,109.79136,133.343046,3.952211,6.118078,18.602669,15.112959
203,"(comms-comms - ATVI.csv, comms-comms - VOD.csv)",1.428656e-10,0.003393,110.542321,131.006623,56.944142,8.021765,15.44538,17.71853
221,"(comms-comms - ATVI.csv, consdef-consdef - BTI...",1.281428e-11,0.00356,111.617257,133.009272,1.928518,14.346694,14.200463,19.606089
223,"(comms-comms - ATVI.csv, consdef-consdef - CL....",5.404995e-11,0.00418,112.925701,130.672848,4.422399,3.809133,10.39519,13.942325
232,"(comms-comms - ATVI.csv, consdef-consdef - MO....",7.143599e-11,0.004094,114.34055,131.340397,3.375018,3.502285,20.015622,19.56708
235,"(comms-comms - ATVI.csv, consdef-consdef - PM....",3.817536e-11,0.003547,114.136814,131.340397,10.373932,2.408854,13.757356,15.602441
242,"(comms-comms - ATVI.csv, energy-energy - COP.csv)",4.66446e-11,0.003355,114.312446,131.006623,2.521776,13.911065,19.533474,20.772526
245,"(comms-comms - ATVI.csv, energy-energy - EOG.csv)",3.207535e-11,0.003358,114.614842,128.670199,4.632911,6.898688,23.659646,25.104729
246,"(comms-comms - ATVI.csv, energy-energy - EPD.csv)",3.814314e-11,0.003466,115.111201,130.672848,1.883972,8.602894,14.249602,17.229318


In [6]:
test[test["SharpeRatio"] > 1.5]

KeyError: 'SharpeRatio'

In [None]:
# def read_stock_price(file_path: str):
#     df = pd.read_csv(file_path, header=[0, 1]).drop(0)
#     # Skipping files with null values for now
#     if df.isnull().values.any():
#         print(f'{file_path} has null values')
#         return None
#     dates = pd.to_datetime(df.iloc[:, 0], utc=True).dt.date
#     dates = pd.to_datetime(dates)
#     dates.name = "Date"
#     df.index = dates
#     return df

In [None]:
# prices = {}
# files = get_files(path="Data", extension="csv")
# for file_name, file_path in files.items():
#     df = read_stock_price(file_path)
#     if df is not None:
#         prices[file_name] = df

In [None]:
# results = {}
# for file, stock_price in prices.items():
#     print(f"Getting results for {file}")
#     testing_prices = stock_price.loc[stock_price.index < TESTING_CUTOFF]
#     testing_prices = testing_prices.iloc[:, testing_prices.columns.get_level_values(1) == "Adj Close"]
    
#     stocks = testing_prices.columns.get_level_values(0)
#     testing_prices.columns = stocks
#     pairs = pd.Series([(stocks[i], stocks[j]) for i in range(len(stocks)) for j in range(i+1, len(stocks))])
#     results[file] = find_suitable_pairs(testing_prices, pairs)

In [None]:
# backtest_res = {}
# for file, (all_pairs_df, filtered_pairs_df) in results.items():
#     for pair in filtered_pairs_df['pair']:
#         print(f"Doing backtesting for: {pair}")
#         stock_prices = prices[file].loc[stock_price.index > TESTING_CUTOFF]
#         res = run_backtest(pair[0], pair[1], stock_prices[pair[0]], stock_prices[pair[1]], PARAMS)
#         backtest_res[file] = res
#         res.analyzers.sharpe.print()
#         res.analyzers.returns.print()
#         print('Ending test\n')
#         print('\n')
        