In [1]:
from utils.returns_data_class import ReturnsData
from utils.pointwise_context import get_contextual_indices

# Constants and configurations
PERIODS = [1, 5, 10]
TRAIN_PCT = 1
CONTEXT_SIZE = 32

tgt_context_sets = []
for period in PERIODS:
    data = ReturnsData(
        daily_returns_path="Data/returns_df_611.csv",
        extras_path="Data/historical_stocks.csv",
    )
    data.change_returns_period(period)
    data.train_test_split(TRAIN_PCT)

    tgt_context_sets += get_contextual_indices(
        data.train_returns_df,
        context_size=CONTEXT_SIZE,
        verbose=True,
        iqr_noise_reduction=True,
    )

No change made because period entered is 1


100%|██████████| 611/611 [00:05<00:00, 109.26it/s]
100%|██████████| 611/611 [00:01<00:00, 510.76it/s] 
100%|██████████| 611/611 [00:00<00:00, 1768.42it/s]


In [2]:
i = data.ticker2idx["JPM"]
import numpy as np

temp = np.array([xi[1] for xi in tgt_context_sets if xi[0] == i]).flatten()
import pandas as pd
print(f"Top stocks most commonly cooccurring:")
[data.idx2ticker[xi] for xi in pd.Series(temp).value_counts().index][:10]

Top stocks most commonly cooccurring:


['C', 'BAC', 'STI', 'WFC', 'PNC', 'HBAN', 'ZION', 'STT', 'KEY', 'CMA']

In [3]:
from models.embedding_models import ClassificationEmbeddings
EMBEDDING_DIM = 20
model = ClassificationEmbeddings(
    n_time_series=len(data.tickers), embedding_dim=EMBEDDING_DIM
)

In [4]:
from utils.training_helpers import train_embeddings_from_tgt_context_sets
EPOCHS = 20
model, losses = train_embeddings_from_tgt_context_sets(
    n_time_series=len(data.tickers),
    tgt_context_sets=tgt_context_sets,
    model=model,
    epochs=EPOCHS,
    # embedding_dim=EMBEDDING_DIM,
    verbose=True,
)

Training embeddings...


  5%|▌         | 1/20 [00:47<14:58, 47.29s/it]

Epoch 0: Loss = 0.10380644054553727


 10%|█         | 2/20 [01:34<14:13, 47.42s/it]

Epoch 1: Loss = 0.10123775190470778


 15%|█▌        | 3/20 [02:21<13:18, 46.99s/it]

Epoch 2: Loss = 0.09955468019632388


 20%|██        | 4/20 [03:09<12:39, 47.45s/it]

Epoch 3: Loss = 0.09863335297636991


 25%|██▌       | 5/20 [03:57<11:53, 47.59s/it]

Epoch 4: Loss = 0.09813912793488956


 30%|███       | 6/20 [04:44<11:03, 47.42s/it]

Epoch 5: Loss = 0.09785007430798376


 30%|███       | 6/20 [05:31<12:52, 55.19s/it]

Epoch 6: Loss = 0.09766151166512842
Early stopping at epoch 6 due to minimal loss reduction.





In [7]:
from utils.sector_classification import get_sector_score

get_sector_score(model.embeddings.weight.detach().numpy(), sectors=data.sectors)

Precision Score: 0.6
Recall Score: 0.58
F1 Score: 0.58
Accuracy Score: 0.58


In [None]:
SAVE_MODEL = False
SAVE_PATH_TEMPLATE = "embeddings/abs_diff_E{epochs}_C{context_size}_D{embedding_dim}_P{periods}_train{train_pct}.pt"
save_path = SAVE_PATH_TEMPLATE.format(
    epochs=EPOCHS,
    context_size=CONTEXT_SIZE,
    embedding_dim=EMBEDDING_DIM,
    periods="-".join(map(str, PERIODS)),
    train_pct=TRAIN_PCT,
)