Since all of our file types are .txt, we will convert then into .csv and delete the .txt files before we push them into our database

In [1]:
DATABASE_PATH = 'data/MarketHistoricalData.db'

In [2]:
import csv, pathlib, os

def csv_converter(dir_path, delimiter=','):
    """Converts all .txt files within given dir to .csv and removes original"""
    
    # Converting 
    for path in pathlib.Path(dir_path).glob("*.txt"):
        with path.open() as txtfile, path.with_suffix(".csv").open(mode="w") as csvfile:
            reader = csv.reader(txtfile, delimiter = delimiter)
            writer = csv.writer(csvfile)
            for row in reader:
                writer.writerow(row)
    # Deleting 
    test = os.listdir(dir_path)
    for item in test:
        if item.endswith(".txt"):
            os.remove(os.path.join(dir_path, item))

csv_converter("data/nasdaq_stocks/")
# csv_converter("data/nyse_stocks/")

The ticker file has a variable amount of spaces between ticker and company name, so we have to remove the spaces and place a comma there instead before we convert

In [3]:

def fix_spacing(file_path):
    new_format = []
    with open(file_path, "r") as f:
        for line in f:
            new_format.append(",".join(line.split(maxsplit=1)))

    with open(file_path, 'w') as f:
        f.write("\n".join(new_format))

# fix_spacing("nasdaq_stock_tickers.txt")
# fix_spacing("nyse_stock_tickers.txt")
# csv_converter(".") #convert into csv

We will now create tables for all of our stocks 

In [4]:
import pandas as pd 
new_names = {'<TICKER>' : 'Ticker', '<NAME>' : 'Name'}

nasdaq_tickers = pd.read_csv(
    'data/nasdaq_stock_tickers.csv').rename(columns=new_names)

nyse_tickers = pd.read_csv(
    'data/nyse_stock_tickers.csv').rename(columns=new_names)

print(nasdaq_tickers.head())
print(nyse_tickers.head())

     Ticker                   Name
0   AACG.US  ATA CREATIVITY GLOBAL
1   AACI.US   ARMADA ACQUISITION I
2  AACIU.US   ARMADA ACQUISITION I
3  AACIW.US   ARMADA ACQUISITION I
4   AADI.US        AADI BIOSCIENCE
      Ticker                  Name
0       A.US  AGILENT TECHNOLOGIES
1      AA.US                 ALCOA
2   AAC-U.US      ARES ACQUISITION
3  AAC-WS.US      ARES ACQUISITION
4     AAC.US      ARES ACQUISITION


In [5]:
import os

def find_blanks(tickers, file_path):
    for ticker in tickers.loc[:, 'Ticker']:
        try:
            ticker_df = pd.read_csv(f'{file_path}{ticker.lower()}.csv')
            ticker_df.rename(
                columns=(
                lambda column_name : column_name.replace("<", "").replace(">", "").title()
                ),
                inplace= True
            )
            ticker_df.to_csv(f'{file_path}{ticker.lower()}.csv', index=False)
        except:
            try:
                os.remove(f'{file_path}{ticker.lower()}.csv')
                print(f"No data in {ticker}")
            except:
                print(f"{ticker} file doesn't exist")

# find_blanks(nasdaq_tickers, 'data/nasdaq_stocks/')
# find_blanks(nyse_tickers, 'data/nyse_stocks/')

Since the historical data for each stock comes in at differnt lengths, we first will find the smallest date in order to make all the time series the same length

In [6]:
## Finding the lowest date 
from utils import _int_to_datetime
from datetime import datetime 

def find_lowest_date(tickers, file_path):

    lowest_date = datetime.now()
    lowest_ticker = ''

    for ticker in tickers.loc[:, 'Ticker']:
        ticker_df = pd.read_csv(f'{file_path}{ticker.lower()}.csv')
        date = _int_to_datetime(ticker_df.loc[0, 'Date'])

        if date < lowest_date:
            lowest_date = date
            lowest_ticker = ticker

    return lowest_date, lowest_ticker

# print(find_lowest_date(nasdaq_tickers, 'nasdaq_stocks/'))
# print(find_lowest_date(nyse_tickers, 'nyse_stocks/'))

So we have found that the lowest date on the nasdaq is (in YYYY-MM-DD) 1970-01-02 from 'AEP.US'
and on the NYSE is 1962-01-02 from 'GE.US'

We will now make all the time series the same length 

In [7]:
ge_timeseries =pd.read_csv("data/nyse_stocks/ge.us.csv")

def normalize_dates(tickers, file_path):
    for ticker in tickers.loc[:, 'Ticker']:
        ticker_df = pd.read_csv(f'{file_path}{ticker.lower()}.csv')
        date = ticker_df.loc[0, 'Date']
        
        date_idx = (ge_timeseries['Date'] == date).idxmax() # This will be -2 from the excel csv! 

        missing_dates = ge_timeseries.loc[:date_idx - 1, 'Date']
        insert_df = pd.DataFrame(columns=ge_timeseries.columns)
        insert_df['Date'] = missing_dates
        insert_df['Ticker'] = ticker
        insert_df['Per'] = 'D'
        insert_df = insert_df.fillna(-1)

        normalized_df = pd.concat([insert_df, ticker_df], ignore_index=True)
        normalized_df.to_csv(f'{file_path}{ticker.lower()}.csv', index=False)

# normalize_dates(nasdaq_tickers, 'data/nasdaq_stocks/')
# normalize_dates(nyse_tickers, 'data/nyse_stocks/')

Pushing data into SQLite database

In [8]:
import sqlite3
from utils import _ticker_to_table_name

conn = sqlite3.connect(DATABASE_PATH)
cursor = conn.cursor()


def push_table_to_db(tickers, file_path):

    for ticker in tickers.loc[:, 'Ticker']:
        ticker_df = pd.read_csv(f'{file_path}{ticker.lower()}.csv')
        
        cursor.execute(
            f'''CREATE TABLE {_ticker_to_table_name(ticker)} (
                    Ticker text, 
                    Per text, 
                    Date text, 
                    Open real, 
                    High real, 
                    Low real, 
                    Close real, 
                    Vol integer, 
                    Openint integer
                    )'''
                    )
        ticker_df.to_sql(f'{_ticker_to_table_name(ticker)}' ,conn, if_exists='replace', index=False )

        

# push_table_to_db(nasdaq_tickers, 'data/nasdaq_stocks/')
# push_table_to_db(nyse_tickers, 'data/nyse_stocks/')

Let's validate our data and make sure all dates are included. Once that is done, we can compute the parameters of the strategy

In [9]:
from momentum_strategy import QuantitativeMomentum

nasdaq_tickers = pd.read_csv('data/nasdaq_stock_tickers.csv')
nyse_tickers = pd.read_csv('data/nyse_stock_tickers.csv')

universe = pd.concat([nasdaq_tickers, nyse_tickers], ignore_index=True)

# Start with the default parameters for the strategy as described by the book
strategy = QuantitativeMomentum(
    'data/MarketHistoricalData.db',
    tickers=universe
)

# tickers_missing_dates = strategy.validate_data()

# assert tickers_missing_dates is []




Then we can compute the parameters needed for our strategy

In [10]:
# strategy.compute_parameters() # Using default parameters, uploads everything to SQL 

Then we can finally see the results of our strategy 

In [11]:
from backtester import Backtester

backtester = Backtester(
    database=DATABASE_PATH
)

# equity_timeseries = backtester.backtest(strategy)
# equity_timeseries
# 33666383.07106178

In [12]:
# Computing columns for different strategy variations

# strategy = QuantitativeMomentum(
#     'data/MarketHistoricalData.db',
#     tickers=universe,
#     lottery_window=1,
#     look_back=1
# )

# strategy.compute_parameters()

# strategy = QuantitativeMomentum(
#     'data/MarketHistoricalData.db',
#     tickers=universe,
#     lottery_window=3,
#     look_back=60
# )

# strategy.compute_parameters()

# strategy = QuantitativeMomentum(
#     'data/MarketHistoricalData.db',
#     tickers=universe,
#     lottery_window=6,
#     look_back=36
# )

# strategy.compute_parameters()


In [13]:
from utils import _strategy_table_convention
# Computing strategies 

for look_back in [12, 36, 60]:
    for lottery_window in [1, 3, 6, 9, 12]:
        for rebalance in [1, 3, 6, 9, 12]: 
            for firms_held in [25, 50, 100, 200]:
                strategy = QuantitativeMomentum(
                    database_name='data/MarketHistoricalData.db',
                    tickers=universe,
                    look_back=look_back,
                    lottery_window=lottery_window,
                    firms_held=firms_held
                )

                equity_timeseries = backtester.backtest(
                    strategy=strategy,
                    rebalance_period=rebalance
                )

                equity_timeseries.to_sql(_strategy_table_convention(
                    look_back=look_back,
                    lottery_window=lottery_window,
                    rebalance=rebalance,
                    firms_held=firms_held
                ),
                                         conn,
                                         if_exists='replace',
                                         index=False)

                

100%|██████████| 13258/13258 [2:15:20<00:00,  1.63it/s]  
100%|██████████| 13258/13258 [3:47:09<00:00,  1.03s/it]  
100%|██████████| 13258/13258 [4:04:57<00:00,  1.11s/it]  
100%|██████████| 13258/13258 [4:38:17<00:00,  1.26s/it]  
100%|██████████| 13258/13258 [1:17:12<00:00,  2.86it/s]  
100%|██████████| 13258/13258 [1:21:39<00:00,  2.71it/s]  
 14%|█▍        | 1835/13258 [12:05<24:41,  7.71it/s]   