In [1]:
import numpy as np
from numpy.linalg import norm
import pandas as pd
import itertools
import random
from matplotlib import pyplot as plt
from pair_trading_foundations.data_generation import ExecutePairTrading, generate_training_data
random.seed(23)
import cProfile
import pstats

In [2]:
data = pd.read_csv('Data/sp500_full_20181231_to_20231229.csv')

In [3]:
value_count_tb = data[['Ticker']].groupby('Ticker').size().reset_index()
value_count_tb.columns = ['Ticker', 'Count']
stock_to_keep = value_count_tb['Ticker'][value_count_tb.Count==value_count_tb.Count.max()]
data = data[data.Ticker.isin(stock_to_keep)]

sampled_tickers = random.sample(list(stock_to_keep.values), 10)

data_tech = data[data['GICS Sector'].isin(['Information Technology'])]
data_sampled = data[data['Ticker'].isin(sampled_tickers)]

In [4]:
# data_sampled = data_sampled[['Date', 'Ticker','Close', 'GICS Sector', 'GICS Sub-Industry']]

In [5]:
print(f"Used around {15/45}s per pair for features of full history")
print(f"Used around {30/45}s per pair for features and labels of full history")

Used around 0.3333333333333333s per pair for features of full history
Used around 0.6666666666666666s per pair for features and labels of full history


In [8]:
(19900*0.67)/60

222.21666666666667

In [9]:
222/60

3.7

In [11]:
(60*60)/0.67

5373.134328358208

In [6]:
features_tb, labels_tb, pnl_metadata_tb = generate_training_data(
        data=data_sampled,
        training_len=500,
        test_len=120,
        calculate_label=True,
        verbose=False
    )

45 stock pairs detected
Took 0.011698246002197266 to initilize. Entering ticker pair loop


  features_tb = pd.concat(
  labels_tb = pd.concat(
  pnl_metadata_tb = pd.concat(


Took 28.26302409172058 to finish


In [16]:
pnl_metadata_tb.dropna()['trade_executions']

685        exit_idx  entry_idx  stock1_price_entry  st...
686        exit_idx  entry_idx  stock1_price_entry  st...
687        exit_idx  entry_idx  stock1_price_entry  st...
688        exit_idx  entry_idx  stock1_price_entry  st...
689        exit_idx  entry_idx  stock1_price_entry  st...
                              ...                        
1136       exit_idx  entry_idx  stock1_price_entry  st...
1137       exit_idx  entry_idx  stock1_price_entry  st...
1138       exit_idx  entry_idx  stock1_price_entry  st...
1139       exit_idx  entry_idx  stock1_price_entry  st...
1140       exit_idx  entry_idx  stock1_price_entry  st...
Name: trade_executions, Length: 11340, dtype: object

# Get the tech sector only

In [None]:
with cProfile.Profile() as pr:
    recorded_info_tb, features_tb, labels_tb = generate_training_data(
        data=data_tech,
        training_len=500,
        test_len=120,
        sample_size_per_pair=10
    )

stats = pstats.Stats(pr)
stats.sort_stats(pstats.SortKey.TIME)
# Now you have two options, either print the data or save it as a file
stats.print_stats() # Print The Stats

In [None]:
stats.dump_stats("logs/path.prof") # Saves the data

In [None]:
combined = pd.merge(recorded_info_tb, features_tb, how='inner', on= ['ticker1','ticker2', 'target_date'])
combined = pd.merge(combined,labels_tb, how='inner', on= ['ticker1','ticker2', 'target_date'] )

In [None]:
combined.to_csv('Data/tech_sector_pairs.csv', index=False)

In [None]:
combined

# Get sampled combs

In [None]:
(0.013090133666992188 * 124750)/60

In [None]:
19900*0.28

In [None]:
5572/60

In [None]:
features_tb, labels_tb, pnl_metadata_tb = generate_training_data(
        data=data_sampled,
        training_len=500,
        test_len=120,
        calculate_label=True,
        calculate_feature=True
    )

In [None]:
combined = pd.merge(recorded_info_tb, features_tb, how='inner', on= ['ticker1','ticker2', 'target_date'])
combined = pd.merge(combined,labels_tb, how='inner', on= ['ticker1','ticker2', 'target_date'] )
combined.to_csv('Data/sampled_sector_pairs_200.csv', index=False)

# Examining

In [None]:
combined = pd.read_csv('Data/sampled_sector_pairs_200.csv')

In [None]:
combined

In [None]:
combined = combined.drop_duplicates().reset_index(drop=True)

In [None]:
combined.shape

In [None]:
combined.head()

In [None]:
combined.shape

In [None]:
random.choices()

In [None]:
combined[['ticker1','ticker2']].groupby(['ticker1','ticker2']).size().sort_values()

In [None]:
combined[(combined.ticker1=='COR')&(combined.ticker2=='META')].drop_duplicates()

In [None]:
combined.total_pnl.describe()

In [None]:
combined[combined.total_pnl>0.7]

In [None]:
ticker1 = 'AOS'
ticker2 = 'AMD'
# Get a list of unique dates for later use
all_dates = data['Date'].unique()
vec1_full = data['Close'][data.Ticker==ticker1].values
vec2_full = data['Close'][data.Ticker==ticker2].values

In [None]:
idx = np.where(all_dates=='2021-11-23')[0][0]

In [None]:
plt.plot(vec1_full, label='Stock1', color='green')
plt.plot(vec2_full, label='Stock2', color='blue')
# plt.plot(abs(vec1_full - vec2_full), label='Absolute Spread', color='grey')
plt.axvline(x=idx, color='red', linestyle='--', label='Vertical Line')
plt.axvline(x=idx-500, color='red', linestyle='--', label='Vertical Line')
plt.axvline(x=idx+120, color='red', linestyle='--', label='Vertical Line')

In [None]:
trade = ExecutePairTrading(
    abs_spread_mean=21.923220,
    abs_spread_std=15.530315
).execute(
vec1=vec1_full[idx:(idx+120)],
    vec2=vec2_full[idx:(idx+120)]
)

In [None]:
trade.trade_execution_table

In [None]:
plt.plot(vec1_full[idx:(idx+120)], label='Stock1', color='green')
plt.plot(vec2_full[idx:(idx+120)], label='Stock2', color='blue')
plt.axvline(x=1, color='red', linestyle='--', label='Vertical Line')
plt.axvline(x=106, color='red', linestyle='--', label='Vertical Line')

In [None]:
trade.final_pl_pct

In [None]:
trade.final_pl