In [2]:
import numpy as np
from numpy.linalg import norm
import pandas as pd
import itertools
import random
from matplotlib import pyplot as plt
from pair_trading_foundations.data_generation import ExecutePairTrading, generate_training_data
random.seed(23)
import pstats
from tqdm.notebook import tqdm
tqdm.pandas()

In [2]:
data = pd.read_csv('Data/sp500_full_20181231_to_20231229.csv')

In [3]:
set(data['GICS Sector'])

{'Communication Services',
 'Consumer Discretionary',
 'Consumer Staples',
 'Energy',
 'Financials',
 'Health Care',
 'Industrials',
 'Information Technology',
 'Materials',
 'Real Estate',
 'Utilities'}

# Draft below

## Get Combinations

In [None]:
tickers = list(set(data.Ticker))
combinations = list(itertools.combinations(tickers, 2))

In [None]:
len(combinations)

In [3]:
data=pd.read_csv("Data/Training/pair_features1.csv")

In [7]:
data[data.pnls.notnull()].shape

(1259000, 21)

# Initiate data tables to store the generated results

In [None]:
recorded_info_tb = pd.DataFrame(columns=[
    'ticker1', 
    'ticker2',
    'target_date',
    'spread',
    'spread_mean',
    'spread_std',
    'abs_spread_mean_l28',
    'abs_spread_std_l28',
    'spread_normed'
])

features_tb = pd.DataFrame(columns=[
    'ticker1', 
    'ticker2',
    'target_date',
    'same_sector_flag',
    'same_sub_industry_flag',
    'cos_sim',
    'corr_coef',
    'abs_spread_normed_max',
    'abs_spread_normed_90th',
    'abs_spread_normed_75th',
    'abs_spread_normed_median',
    'abs_spread_normed_l7_avg',
    'abs_spread_normed_l14_avg'
])

labels_tb = pd.DataFrame(columns=[
    'total_pnl',
    'total_pnl_l28_mean_std'
])

## Generate

In [4]:
data.head()

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume,Ticker,GICS Sector,GICS Sub-Industry
0,2018-12-31,190.339996,191.649994,188.5,190.539993,155.818451,1804400.0,MMM,Industrials,Industrial Conglomerates
1,2019-01-02,187.820007,190.990005,186.699997,190.949997,156.153717,2475200.0,MMM,Industrials,Industrial Conglomerates
2,2019-01-03,188.279999,188.279999,182.889999,183.759995,150.273972,3358200.0,MMM,Industrials,Industrial Conglomerates
3,2019-01-04,186.75,191.979996,186.029999,191.320007,156.456284,2995100.0,MMM,Industrials,Industrial Conglomerates
4,2019-01-07,191.360001,192.300003,188.660004,190.880005,156.096466,2162200.0,MMM,Industrials,Industrial Conglomerates


In [5]:
ticker1 = 'META'
ticker2 = 'WAT'

In [6]:
# Get a list of unique dates for later use
all_dates = data['Date'].unique()

In [7]:
# Flag indicating whether the two tickers are from the same sector
same_sector_flag = data[data.Ticker==ticker1]['GICS Sector'].values[0] == data[data.Ticker==ticker2]['GICS Sector'].values[0]
same_sub_industry_flag = data[data.Ticker==ticker1]['GICS Sub-Industry'].values[0] == data[data.Ticker==ticker2]['GICS Sub-Industry'].values[0]

In [8]:
vec1_full = data[['Ticker','Date','Close']][data.Ticker==ticker1].reset_index(drop=True)
vec2_full = data[['Ticker','Date','Close']][data.Ticker==ticker2].reset_index(drop=True)

In [9]:
# # Number of days in the data
# num_days_total = len(vec1_full)

In [10]:
# # Keep 500 days for training and 120 days for label calculation
# possible_indices_to_sample = list(range(500, num_days_total-119))

In [46]:
# sampled_indices = random.choices(possible_indices_to_sample, k=100)

In [47]:
df = pd.merge(vec1_full,vec2_full,on='Date',how='left',suffixes=['_P1','_P2'])
df.head()

Unnamed: 0,Ticker_P1,Date,Close_P1,Ticker_P2,Close_P2
0,META,2018-12-31,131.089996,WAT,188.649994
1,META,2019-01-02,135.679993,WAT,183.410004
2,META,2019-01-03,131.740005,WAT,176.339996
3,META,2019-01-04,137.949997,WAT,179.779999
4,META,2019-01-07,138.050003,WAT,187.240005


In [48]:
# # Absolute value of the difference of the two stocks
# abs_spread = abs(vec1_sub1 - vec2_sub1)
# abs_spread_mean = np.mean(abs_spread)
# abs_spread_std = np.std(abs_spread)

# # Sometimes, historical data might be too strict to get a signal for trade in
# abs_spread_mean_l28 = np.mean(abs_spread[-28:])
# abs_spread_std_l28 = np.std(abs_spread[-28:])

df['abs_spread'] = (df['Close_P1'] - df['Close_P2']).abs()
df['abs_spread_mean'] = df.rolling(500).abs_spread.mean()
df['abs_spread_std'] = df.rolling(500).abs_spread.std()
df['abs_spread_mean_l28'] = df.rolling(28).abs_spread.mean()
df['abs_spread_std_l28'] = df.rolling(28).abs_spread.std()
df['spread_normed'] = (df['abs_spread']-df['abs_spread_mean'])/df['abs_spread_std']
df['abs_spread_normed_max'] = df.spread_normed.abs().rolling(500).max()
df['abs_spread_normed_90th'] = df.spread_normed.abs().rolling(500).quantile(0.9)
df['abs_spread_normed_75th'] = df.spread_normed.abs().rolling(500).quantile(0.75)
df['abs_spread_normed_median'] = df.spread_normed.abs().rolling(500).median()
df['abs_spread_normed_l7_avg'] = df.spread_normed.abs().rolling(7).mean()
df['abs_spread_normed_l14_avg'] = df.spread_normed.abs().rolling(14).mean()
df.tail()


# abs_spread_normed_max = max(abs(spread_normed))
# abs_spread_normed_90th = np.percentile(abs(spread_normed),90)
# abs_spread_normed_75th = np.percentile(abs(spread_normed),75)
# abs_spread_normed_median = np.percentile(abs(spread_normed),50)

# # latest 7 day/14 day avg normalized spread
# ## These could help predict whether a trading signal will appear
# abs_spread_normed_l7_avg = abs(np.mean(spread_normed[-7:]))
# abs_spread_normed_l14_avg = abs(np.mean(spread_normed[-14:]))

Unnamed: 0,Ticker_P1,Date,Close_P1,Ticker_P2,Close_P2,abs_spread,abs_spread_mean,abs_spread_std,abs_spread_mean_l28,abs_spread_std_l28,spread_normed,abs_spread_normed_max,abs_spread_normed_90th,abs_spread_normed_75th,abs_spread_normed_median,abs_spread_normed_l7_avg,abs_spread_normed_l14_avg
1254,META,2023-12-22,353.390015,WAT,329.73999,23.650024,103.764481,66.817611,43.008214,20.422297,-1.199002,3.943149,2.868902,2.276608,1.331487,1.242128,1.157295
1255,META,2023-12-26,354.829987,WAT,331.970001,22.859985,103.754441,66.829412,41.051785,19.588654,-1.210462,3.943149,2.868902,2.276608,1.331487,1.218459,1.169034
1256,META,2023-12-27,357.829987,WAT,333.040009,24.789978,103.74872,66.836062,39.588213,19.200496,-1.181379,3.943149,2.868902,2.276608,1.331487,1.197023,1.172867
1257,META,2023-12-28,358.320007,WAT,332.769989,25.550018,103.727321,66.859426,38.0725,18.539125,-1.169279,3.943149,2.868902,2.276608,1.331487,1.189942,1.181351
1258,META,2023-12-29,353.959991,WAT,329.230011,24.72998,103.72468,66.862525,36.342856,17.366422,-1.18145,3.943149,2.868902,2.276608,1.331487,1.182664,1.198456


In [49]:
# Cosine sim
def cos_sim(rs,df):
    rows = df.loc[rs.index]
    vec1_sub1 = rows['Close_P1']
    vec2_sub1 = rows['Close_P2']
    return np.dot(vec1_sub1, vec2_sub1) / (norm(vec1_sub1) * norm(vec2_sub1))

# Correlation coef
def corr_coef(rs,df):
    rows = df.loc[rs.index]
    vec1_sub1 = rows['Close_P1']
    vec2_sub1 = rows['Close_P2']
    return np.corrcoef(vec1_sub1, vec2_sub1)[0, 1]

df['cos_sim'] = df['Close_P1'].rolling(500).progress_apply(cos_sim, args=(df,))
df['corr_coef'] = df['Close_P1'].rolling(500).progress_apply(corr_coef, args=(df,))

0it [00:00, ?it/s]

0it [00:00, ?it/s]

In [50]:
df.columns

Index(['Ticker_P1', 'Date', 'Close_P1', 'Ticker_P2', 'Close_P2', 'abs_spread',
       'abs_spread_mean', 'abs_spread_std', 'abs_spread_mean_l28',
       'abs_spread_std_l28', 'spread_normed', 'abs_spread_normed_max',
       'abs_spread_normed_90th', 'abs_spread_normed_75th',
       'abs_spread_normed_median', 'abs_spread_normed_l7_avg',
       'abs_spread_normed_l14_avg', 'cos_sim', 'corr_coef'],
      dtype='object')

In [51]:
pnls = []
for idx in range(df.shape[0]):
    if (idx < 501) | (idx > df.shape[0]-119):
        pnls.append(np.nan)
    else:
        previous_row = df.loc[idx-1]
        result=ExecutePairTrading(
                        previous_row.abs_spread_mean,
                        previous_row.abs_spread_std,
                        entry_signal=1.5
                    ).execute(
                        vec1=df.loc[idx:(idx+120)]['Close_P1'].values,
                        vec2=df.loc[idx:(idx+120)]['Close_P2'].values,
                        dates=df.loc[idx:(idx+120)]['Date'].values
                    )
        pnls.append(result.final_pl_pct)

In [52]:
df['pnls_oldmethod'] = pnls

In [53]:
df.columns

Index(['Ticker_P1', 'Date', 'Close_P1', 'Ticker_P2', 'Close_P2', 'abs_spread',
       'abs_spread_mean', 'abs_spread_std', 'abs_spread_mean_l28',
       'abs_spread_std_l28', 'spread_normed', 'abs_spread_normed_max',
       'abs_spread_normed_90th', 'abs_spread_normed_75th',
       'abs_spread_normed_median', 'abs_spread_normed_l7_avg',
       'abs_spread_normed_l14_avg', 'cos_sim', 'corr_coef', 'pnls_oldmethod'],
      dtype='object')

# Trading scoring - new

In [20]:
df['forward_abs_spread'] = df['abs_spread'].shift(-1)
df['entry_thresh'] = df['abs_spread_mean'] + 1.5*df['abs_spread_std']
df['exit_thresh'] = df['abs_spread_mean'] + 0.1*df['abs_spread_std']

In [21]:
df['entry_signal'] = np.where(df['forward_abs_spread']>=df['entry_thresh'],1,0)
df['exit_signal'] = np.where(df['forward_abs_spread']<=df['exit_thresh'],1,0)

In [22]:
df['stock1_current_Close'] = df['Close_P1'].shift(-1)
df['stock2_current_Close'] = df['Close_P2'].shift(-1)
df['stock1_7_days_ago_Close'] = df['Close_P1'].shift(7)
df['stock2_7_days_ago_Close'] = df['Close_P2'].shift(7)
df['pct_delta_1'] = (df['stock1_current_Close']/df['stock1_7_days_ago_Close']) - 1
df['pct_delta_2'] = (df['stock2_current_Close']/df['stock2_7_days_ago_Close']) - 1
df['long_P1'] = np.where(df.pct_delta_1 >= df.pct_delta_2,0,1)

In [23]:
def compute_pnl(rs,df):
    rows = df.loc[rs.index].iloc[::-1]    
    long_P1 = rows.long_P1.iloc[-1]

    rows['long_entry'] = np.where(rows.entry_signal==1, np.where(long_P1==1,rows.Close_P1,rows.Close_P2),np.nan)
    rows['long_exit'] = np.where(rows.exit_signal==1, np.where(long_P1==1,rows.Close_P1,rows.Close_P2),np.nan)
    rows['short_entry'] = np.where(rows.entry_signal==1, np.where(long_P1!=1,rows.Close_P1,rows.Close_P2),np.nan)
    rows['short_exit'] = np.where(rows.exit_signal==1, np.where(long_P1!=1,rows.Close_P1,rows.Close_P2),np.nan)

    rows['exit_signal_sum'] = rows.groupby('entry_signal').exit_signal.cumsum()

    rows['long_entry'] = rows['long_entry'].ffill()
    rows['short_entry'] = rows['short_entry'].ffill()

    rows['long_entry'] = np.where(rows['exit_signal_sum']==1,rows['long_entry'],np.nan)

    rows['long_pnl'] = (rows.long_exit - rows.long_entry)/rows.long_entry
    rows['short_pnl'] = (rows.short_entry - rows.short_exit)/rows.short_entry

    if rows[rows.exit_signal_sum==1].shape[0]>0:
        pnl = rows[rows.exit_signal_sum==1].long_pnl.cumsum().iloc[-1]+rows[rows.exit_signal_sum==1].short_pnl.cumsum().iloc[-1]
    else:
        pnl = np.nan
        
#     if rows.Date.iloc[-1]=='2014-10-06':
#         print(rows)
#         print(rows[rows.exit_signal_sum==1].long_pnl.cumsum().iloc[-1],rows[rows.exit_signal_sum==1].short_pnl.cumsum().iloc[-1])
#         print(pnl)
    return pnl

In [24]:
df['pnl'] = df['Close_P1'].iloc[::-1].rolling(120).progress_apply(compute_pnl, args=(df,))

0it [00:00, ?it/s]

In [25]:
df[['Ticker_P1', 'Ticker_P2', 'Date', 'long_P1', 'entry_signal', 'exit_signal','pnl', 'pnls_oldmethod']][df.pnl.notnull()].head(10)

Unnamed: 0,Ticker_P1,Ticker_P2,Date,long_P1,entry_signal,exit_signal,pnl,pnls_oldmethod
944,META,WAT,2022-09-29,1,0,0,-0.541385,0.222324
945,META,WAT,2022-09-30,1,0,0,-0.541385,0.225471
946,META,WAT,2022-10-03,1,1,0,-0.541385,0.209287
947,META,WAT,2022-10-04,1,1,0,-0.541385,0.207035
948,META,WAT,2022-10-05,1,1,0,-0.541385,-0.215832
949,META,WAT,2022-10-06,1,0,0,-0.541385,0.227778
950,META,WAT,2022-10-07,1,0,0,-0.541385,0.267214
951,META,WAT,2022-10-10,1,0,0,-0.541385,0.275657
952,META,WAT,2022-10-11,1,0,0,-0.541385,-0.26009
953,META,WAT,2022-10-12,1,0,0,-0.541385,-0.248056


In [44]:
df.columns

Index(['Ticker_P1', 'Date', 'Close_P1', 'Ticker_P2', 'Close_P2', 'abs_spread',
       'abs_spread_mean', 'abs_spread_std', 'abs_spread_mean_l28',
       'abs_spread_std_l28', 'spread_normed', 'abs_spread_normed_max',
       'abs_spread_normed_90th', 'abs_spread_normed_75th',
       'abs_spread_normed_median', 'abs_spread_normed_l7_avg',
       'abs_spread_normed_l14_avg', 'cos_sim', 'corr_coef', 'pnls_oldmethod',
       'forward_abs_spread', 'entry_thresh', 'exit_thresh', 'entry_signal',
       'exit_signal', 'stock1_current_Close', 'stock2_current_Close',
       'stock1_7_days_ago_Close', 'stock2_7_days_ago_Close', 'pct_delta_1',
       'pct_delta_2', 'long_P1', 'pnl'],
      dtype='object')

In [43]:
final = pd.DataFrame(columns=df.columns)

pd.concat([final,df], ignore_index=True)

  pd.concat([final,df], ignore_index=True)


Unnamed: 0,Ticker_P1,Date,Close_P1,Ticker_P2,Close_P2,abs_spread,abs_spread_mean,abs_spread_std,abs_spread_mean_l28,abs_spread_std_l28,...,entry_signal,exit_signal,stock1_current_Close,stock2_current_Close,stock1_7_days_ago_Close,stock2_7_days_ago_Close,pct_delta_1,pct_delta_2,long_P1,pnl
0,META,2018-12-31,131.089996,WAT,188.649994,57.559998,,,,,...,0,0,135.679993,183.410004,,,,,1,
1,META,2019-01-02,135.679993,WAT,183.410004,47.730011,,,,,...,0,0,131.740005,176.339996,,,,,1,
2,META,2019-01-03,131.740005,WAT,176.339996,44.599991,,,,,...,0,0,137.949997,179.779999,,,,,1,
3,META,2019-01-04,137.949997,WAT,179.779999,41.830002,,,,,...,0,0,138.050003,187.240005,,,,,1,
4,META,2019-01-07,138.050003,WAT,187.240005,49.190002,,,,,...,0,0,142.529999,192.389999,,,,,1,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1254,META,2023-12-22,353.390015,WAT,329.739990,23.650024,103.764481,66.817611,43.008214,20.422297,...,0,1,354.829987,331.970001,334.739990,312.130005,0.060017,0.063563,1,
1255,META,2023-12-26,354.829987,WAT,331.970001,22.859985,103.754441,66.829412,41.051785,19.588654,...,0,1,357.829987,333.040009,333.170013,321.339996,0.074016,0.036410,0,
1256,META,2023-12-27,357.829987,WAT,333.040009,24.789978,103.748720,66.836062,39.588213,19.200496,...,0,1,358.320007,332.769989,334.920013,320.140015,0.069867,0.039451,0,
1257,META,2023-12-28,358.320007,WAT,332.769989,25.550018,103.727321,66.859426,38.072500,18.539125,...,0,1,353.959991,329.230011,344.619995,322.279999,0.027102,0.021565,0,


In [None]:
# df['cos_sim'] = 
# df.dropna()[['Close_P1','Close_P2']].rolling(500,axis=0).apply(lambda x: x.Close_P1.sum())

# Trading scoring - old

In [29]:
target_date = '2022-09-29'
np.where(df.Date==target_date)[0][0]

944

In [30]:
# Assuming we are scoring for the pair on 2022-09-23
abs_spread_mean = df['abs_spread_mean'][df.Date=='2022-09-29'].values[0]
abs_spread_std = df['abs_spread_std'][df.Date=='2022-09-29'].values[0]

# look forward 120 days
idx = np.where(df.Date==target_date)[0][0]
vec1 = df['Close_P1'][(idx+1):(idx+121)].values
vec2 = df['Close_P2'][(idx+1):(idx+121)].values
dates = df['Date'][(idx+1):(idx+121)].values

In [31]:
result=ExecutePairTrading(
                        abs_spread_mean,
                        abs_spread_std,
                        entry_signal=1.5
                    ).execute(
                        vec1=vec1,
                        vec2=vec2,
                        dates=dates
                    )

In [32]:
result.trade_execution_table

Unnamed: 0,exit_idx,entry_idx,stock1_price_entry,stock1_price_exit,stock2_price_entry,stock2_price_exit,long_stock_1,pnl,entry_dates,exit_dates
0,119,3,138.979996,204.279999,290.839996,298.170013,True,22.232445,2022-10-05,2023-03-23


In [None]:
base_fund = 100
split = 0.5

entry_thresh = abs_spread_mean + 1.5*abs_spread_std
exit_thresh = abs_spread_mean + 0.1*abs_spread_std

In [None]:
# absolute spread of the look forward set
abs_spread = abs(np.array(vec1) - np.array(vec2))

In [None]:
# entry_signals = np.array([0, 1, 1, 1, 0, 0, 1, 1, 0, 0])
# exit_signals =  np.array([1, 0, 0, 0, 1, 0, 0, 0, 0, 1])


# Calculate at which position did the signals appear
entry_signals = np.array([0]+[1 if abs_spread[i-1] >= entry_thresh else 0 for i in range(1, len(abs_spread))])
exit_signals = np.array([0]+[1 if abs_spread[i-1] <= exit_thresh else 0 for i in range(1, len(abs_spread))])

In [None]:
entry_positions = np.where(entry_signals == 1)[0]
exit_positions = np.where(exit_signals == 1)[0]
pairs = []

for entry_pos in entry_positions:
    # Find the first exit position that is greater than the entry position
    next_exit_pos = exit_positions[exit_positions > entry_pos]
    if next_exit_pos.size > 0:
        exit_pos = next_exit_pos[0]
    else:
        # Default exit position if no exit signal is found after the entry signal
        exit_pos = len(entry_signals) - 1
    pairs.append((entry_pos, exit_pos))

In [None]:
# Storing in a temporary table
temp_tb = pd.DataFrame(pairs)
temp_tb.columns = ['entry_idx', 'exit_idx']
temp_tb = temp_tb.groupby('exit_idx').min().reset_index()

In [None]:
temp_tb

When vectorizing the code above, be mindful of scenarios where there are multiple entry and exits during the 120 day window

In [None]:
# At each position, calculate each stocks relative pct delta compated to l7. Short the one with larger pct delta
def long_stock1_flag(stock1, stock2, idx):
    """
    This function is a ultility function to determine which stock to long/short given an entry signal.
    It:
        1. Takes the prices of two stocks, and the position where the entry signal appears 
        2. Calculate the percentage deltas between the current price and the price 7 days ago (or the earliest record) for each stock

    Then we will tell the ago to short the one with higher percentage delta and long the other.

    The function returns a boolean on whether we should long the stock 1.
    """
    stock1_current = stock1[idx]
    stock1_ref = stock1[max(0, idx-7)]

    stock2_current = stock2[idx]
    stock2_ref = stock2[max(0, idx-7)]

    pct_delta_1 = (stock1_current/stock1_ref) - 1
    pct_delta_2 = (stock2_current/stock2_ref) - 1

    if pct_delta_1 >= pct_delta_2:
        return False
    else:
        return True

In [None]:
# get the price for each stock when the entry and exit signal appears
temp_tb['stock1_price_entry'] = vec1[temp_tb['entry_idx']] 
temp_tb['stock1_price_exit'] = vec1[temp_tb['exit_idx']] 
temp_tb['stock2_price_entry'] = vec2[temp_tb['entry_idx']] 
temp_tb['stock2_price_exit'] = vec2[temp_tb['exit_idx']] 

# calculate whether we should long each stock
temp_tb['long_stock_1'] = [long_stock1_flag(vec1, vec2, x) for x in temp_tb.entry_idx]

temp_tb

In [None]:
pnls = []
for row in range(temp_tb.shape[0]):
    long_pnl=0
    short_pnl=0
    if temp_tb.long_stock_1[row]:
        # calculate pnl when we long stock 1 and short stock 2
        long_pnl = base_fund * split * ((temp_tb.stock1_price_exit.values[row] - temp_tb.stock1_price_entry.values[row])/temp_tb.stock1_price_entry.values[row])
        short_pnl = base_fund * (1-split) * ((temp_tb.stock2_price_entry.values[row] - temp_tb.stock2_price_exit.values[row])/temp_tb.stock2_price_entry.values[row])
    else:
        # calculate pnl when we long stock 2 and short stock 1
        long_pnl = base_fund * (1-split) * ((temp_tb.stock2_price_exit.values[row] - temp_tb.stock2_price_entry.values[row])/temp_tb.stock2_price_entry.values[row])
        short_pnl = base_fund * (split) * ((temp_tb.stock1_price_entry.values[row] - temp_tb.stock1_price_exit.values[row])/temp_tb.stock1_price_entry.values[row])
    pnls.append(long_pnl+short_pnl)
temp_tb['pnl'] = pnls

In [None]:
temp_tb

# Testing the module

In [None]:
plt.plot(vec1, label='Stock1', color='red')
plt.plot(vec2, label='Stock2', color='green')
plt.plot(abs(vec1 - vec2), label='Absolute Spread')
plt.axvline(x=3, color='red', linestyle='--', label='Vertical Line')
plt.axvline(x=119, color='red', linestyle='--', label='Vertical Line')

In [None]:
with cProfile.Profile() as pr:
    recorded_info_tb, features_tb, labels_tb = generate_training_data(
        # data=data[data['GICS Sector'].isin(['Information Technology'])],
        data=data,
        training_len=500,
        test_len=120,
        sample_size_per_pair=2
    )

stats = pstats.Stats(pr)
stats.sort_stats(pstats.SortKey.TIME)
# Now you have two options, either print the data or save it as a file
stats.print_stats() # Print The Stats
stats.dump_stats("File/path.prof") # Saves the data in a file, can me used to see the data visually
features_tb
labels_tb

recorded_info_tb.to_csv('Data/all_recorded_info_tb.csv', index=False)
features_tb.to_csv('Data/all_features_tb.csv', index=False)
labels_tb.to_csv('Data/all_labels_tb.csv', index=False)