#### Description of the file

##### 0 - Setup

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import tqdm
import sklearn
import statsmodels as sm
import numba
from pathlib import Path

In [2]:
DIR_DATA = Path("..") / "data"
DIR_RESULTS = Path("..") / "results"
DIR_STOCK_DATA = DIR_DATA / "stock_data"

PATH_META = DIR_DATA / "metadata_listed.csv"
PATH_META_DELISTED = DIR_DATA / "metadata_delisted.csv"

In [3]:
df_meta = pd.read_csv(PATH_META)
df_target = df_meta.copy()

In [4]:
df_meta_delisted = pd.read_csv(PATH_META_DELISTED)
df_target_delisted = df_meta_delisted.copy()

In [5]:
all_stocks = []
p = 0.1

for row in tqdm.tqdm(df_target.itertuples(index=False), total=len(df_target), desc="Loading listed stocks"):
    x = np.random.binomial(1, p)
    if x == 0:
        continue
    
    symbol = row.Symbol
    path_stock = DIR_STOCK_DATA / f"{symbol}.csv"
    try:
        df_stock = pd.read_csv(path_stock)
    except Exception as e:
        continue

    df_stock.index = pd.MultiIndex.from_product([[symbol], df_stock.index], names=['ticker', 'date'])

    all_stocks.append(df_stock)

for row in tqdm.tqdm(df_target_delisted.itertuples(index=False), total=len(df_target_delisted), desc="Loading delisted stocks"):
    x = np.random.binomial(1, p)
    if x == 0:
        continue
    
    symbol = row.Symbol
    path_stock = DIR_STOCK_DATA / f"{symbol}.csv"
    try:
        df_stock = pd.read_csv(path_stock)
    except Exception as e:
        continue

    df_stock.index = pd.MultiIndex.from_product([[symbol], df_stock.index], names=['ticker', 'date'])

    all_stocks.append(df_stock)

df_stocks = pd.concat(all_stocks)

Loading listed stocks: 100%|██████████| 5088/5088 [00:25<00:00, 196.94it/s]
Loading delisted stocks: 100%|██████████| 9720/9720 [00:27<00:00, 354.40it/s]


In [6]:
df_stocks.sample(10)

Unnamed: 0_level_0,Unnamed: 1_level_0,ticker,date,open,high,low,close,adj_close,volume
ticker,date,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
TBI,2334,TBI,2003-07-10,7.5,7.5,7.17,7.17,7.17,152600.0
ATLC,3686,ATLC,2013-12-16,3.64,3.64,3.64,3.64,3.64,0.0
LKQ,4021,LKQ,2019-09-26,31.48,31.709999,31.290001,31.43,31.43,1701700.0
DECK,4654,DECK,2012-04-09,63.779999,64.519997,63.509998,64.010002,64.010002,1369500.0
TSH,16,TSH,1995-05-11,11.625,11.625,11.625,11.625009,11.625009,5800.0
EF,2067,EF,1998-07-01,21.5625,21.5625,21.5625,42.324613,42.324613,34400.0
DFG,583,DFG,1999-02-25,51.6875,51.6875,51.6875,54.851059,54.851059,288500.0
SILI,5527,SILI,1994-11-18,12.5,12.5,12.5,10.449765,10.449765,545.0
GUL,6920,GUL,1997-03-05,9.6875,9.6875,9.6875,164.609422,164.609422,7700.0
MGU,840,MGU,2008-12-29,11.82,11.82,11.0,5.462299,5.462299,181700.0
