In [None]:
!pip install datasets evaluate transformers[sentencepiece]
!pip install accelerate


## BERT Financial Sentiment

In [1]:
from datasets import load_dataset
from datasets import load_from_disk
train_dataset = load_from_disk('./twitter-financial-news-sentiment/train')

validation_dataset = load_from_disk('./twitter-financial-news-sentiment/validation')

print(train_dataset[0])
print(validation_dataset[0])


{'text': '$BYND - JPMorgan reels in expectations on Beyond Meat https://t.co/bd0xbFGjkT', 'label': 0}
{'text': '$ALLY - Ally Financial pulls outlook https://t.co/G9Zdi1boy5', 'label': 0}


In [2]:
from transformers import BertTokenizer
from transformers import BertForSequenceClassification
model_path = './saved_model'
model = BertForSequenceClassification.from_pretrained(model_path, num_labels=3)

tokenizer = BertTokenizer.from_pretrained(model_path)

def tokenize_function(examples):
    return tokenizer(examples['text'], padding="max_length", truncation=True)

train_dataset = train_dataset.map(tokenize_function, batched=True)
validation_dataset = validation_dataset.map(tokenize_function, batched=True)

train_dataset = train_dataset.remove_columns(["text"])
validation_dataset = validation_dataset.remove_columns(["text"])

train_dataset.set_format("torch")
validation_dataset.set_format("torch")


In [None]:
import numpy as np

def compute_accuracy(eval_pred):
    logits, labels = eval_pred
    # Get the predicted class (0 or 1) by taking the argmax of the logits
    predictions = np.argmax(logits, axis=-1)
    labels = np.squeeze(labels)  # Squeeze to remove unnecessary dimensions
    # Compute the number of correct predictions
    correct_predictions = np.sum(predictions == labels)
    # Compute the accuracy as the proportion of correct predictions
    accuracy = correct_predictions / len(labels)
    return {"accuracy": accuracy}


In [13]:
from transformers import Trainer, TrainingArguments

training_args = TrainingArguments(
    output_dir='./results',
    evaluation_strategy="epoch",
    learning_rate=2e-4,
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    num_train_epochs=2,
    weight_decay=0.01,
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=validation_dataset,  # if available
)

trainer.train()
# sinteractive -p gpu2 --gres=gpu:3 --mem=50G --time=12:00:00 --account=mpcs53113

dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
Detected kernel version 3.10.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.


Epoch,Training Loss,Validation Loss
1,No log,0.496802
2,0.585000,0.413007




TrainOutput(global_step=796, training_loss=0.48482224929272827, metrics={'train_runtime': 1592.8937, 'train_samples_per_second': 11.982, 'train_steps_per_second': 0.5, 'total_flos': 5021782690756608.0, 'train_loss': 0.48482224929272827, 'epoch': 2.0})

In [14]:

results = trainer.evaluate()

print(results)

{'eval_loss': 0.41300681233406067, 'eval_runtime': 78.5922, 'eval_samples_per_second': 30.385, 'eval_steps_per_second': 1.272, 'epoch': 2.0}


In [22]:
import torch

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

model.to(device)


text = "The FOMC meeting showed concerns about inflation."

inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True).to(device)

outputs = model(**inputs)

predictions = torch.argmax(outputs.logits, dim=-1)

sentiments = {0: "Bearish", 1: "Bullish", 2: "Neutral"}
print("Predicted sentiment:", sentiments[predictions.item()])


Predicted sentiment: Neutral


In [3]:
from transformers import BertTokenizer

from transformers import BertForSequenceClassification

model_path = './saved_model'

orig_model = BertForSequenceClassification.from_pretrained(model_path, num_labels=3)

orig_tokenizer = BertTokenizer.from_pretrained(model_path)


In [20]:
import torch

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

orig_model.to(device)


text = "The FOMC meeting showed concerns about inflation."

inputs = orig_tokenizer(text, return_tensors="pt", padding=True, truncation=True).to(device)

outputs = orig_model(**inputs)

predictions = torch.argmax(outputs.logits, dim=-1)

sentiments = {0: "Bearish", 1: "Bullish", 2: "Neutral"}
print("Original Predicted sentiment:", sentiments[predictions.item()])


Original Predicted sentiment: Bearish


In [25]:
trainer = Trainer(
    model=orig_model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=validation_dataset,
)
results = trainer.evaluate()

print(results)

dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
Detected kernel version 3.10.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.


{'eval_loss': 1.0871833562850952, 'eval_runtime': 78.4621, 'eval_samples_per_second': 30.435, 'eval_steps_per_second': 1.275}


## Bert on FOMC to predict SPY

In [27]:
from datasets import load_dataset
from datasets import load_from_disk
dataset = load_dataset('csv', data_files='fomc_spy_aligned_data.csv')

print(dataset)


DatasetDict({
    train: Dataset({
        features: ['Date', 'FOMC_Minutes_Text', 'SPY_Price_Difference'],
        num_rows: 42
    })
})


In [28]:

def tokenize_function(examples):
    return orig_tokenizer(examples['FOMC_Minutes_Text'], padding="max_length", truncation=True)

tokenized_dataset = dataset.map(tokenize_function, batched=True)

print(tokenized_dataset)

DatasetDict({
    train: Dataset({
        features: ['Date', 'FOMC_Minutes_Text', 'SPY_Price_Difference', 'input_ids', 'token_type_ids', 'attention_mask'],
        num_rows: 42
    })
})


In [29]:
# Doing regression, keep SPY_Price_Difference as the label

tokenized_dataset = tokenized_dataset.rename_column("SPY_Price_Difference", "labels")

tokenized_dataset = tokenized_dataset.remove_columns(["Date", "FOMC_Minutes_Text"])

tokenized_dataset.set_format("torch")

# Split the dataset into train and test sets
train_test_split = tokenized_dataset['train'].train_test_split(test_size=0.2)
train_dataset = train_test_split['train']
test_dataset = train_test_split['test']

print(train_dataset)
print(test_dataset)


Dataset({
    features: ['labels', 'input_ids', 'token_type_ids', 'attention_mask'],
    num_rows: 33
})
Dataset({
    features: ['labels', 'input_ids', 'token_type_ids', 'attention_mask'],
    num_rows: 9
})


In [30]:

SPY_model = BertForSequenceClassification.from_pretrained(model_path, num_labels=1, ignore_mismatched_sizes=True)


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at ./saved_model and are newly initialized because the shapes did not match:
- classifier.bias: found shape torch.Size([3]) in the checkpoint and torch.Size([1]) in the model instantiated
- classifier.weight: found shape torch.Size([3, 768]) in the checkpoint and torch.Size([1, 768]) in the model instantiated
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [31]:
from transformers import Trainer, TrainingArguments
training_args = TrainingArguments(
    output_dir='./results',
    evaluation_strategy="epoch",
    learning_rate=2e-5,
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    num_train_epochs=3,
    weight_decay=0.01,
)

trainer = Trainer(
    model=SPY_model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=test_dataset,
)

Detected kernel version 3.10.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.


In [32]:
trainer.train()

Epoch,Training Loss,Validation Loss
1,No log,33.75436
2,No log,33.554718
3,No log,33.520226


TrainOutput(global_step=15, training_loss=16.580052693684895, metrics={'train_runtime': 182.0036, 'train_samples_per_second': 0.544, 'train_steps_per_second': 0.082, 'total_flos': 26047760606208.0, 'train_loss': 16.580052693684895, 'epoch': 3.0})

In [33]:

results = trainer.evaluate()

print("MSE")
print(results)

MSE
{'eval_loss': 33.520225524902344, 'eval_runtime': 4.6859, 'eval_samples_per_second': 1.921, 'eval_steps_per_second': 0.427, 'epoch': 3.0}


In [34]:
orig_SPY_model = BertForSequenceClassification.from_pretrained(model_path, num_labels=1, ignore_mismatched_sizes=True)
orig_trainer = Trainer(
    model=orig_SPY_model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=test_dataset,
)
results = orig_trainer.evaluate()

print("Without Training")
print(results)

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at ./saved_model and are newly initialized because the shapes did not match:
- classifier.bias: found shape torch.Size([3]) in the checkpoint and torch.Size([1]) in the model instantiated
- classifier.weight: found shape torch.Size([3, 768]) in the checkpoint and torch.Size([1, 768]) in the model instantiated
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Detected kernel version 3.10.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.


Without Training
{'eval_loss': 32.53522872924805, 'eval_runtime': 4.1604, 'eval_samples_per_second': 2.163, 'eval_steps_per_second': 0.481}


## Comparison vs Buy and Hold

In [35]:
import numpy as np
def compute_cumulative_gaps(eval_pred):
    predictions, labels = eval_pred
    predictions = np.squeeze(predictions)
    labels = np.squeeze(labels)

    cumulative_true_price_gap = np.sum(labels)  # Sum of actual price gaps

    cumulative_predicted_price_gap = 0.0  # Initialize the cumulative predicted price gap

    # Loop through each prediction and label
    for pred, true_gap in zip(predictions, labels):
        if pred > 0:
            # Add the true price gap if the prediction is positive
            cumulative_predicted_price_gap += true_gap
        else:
            # Subtract the true price gap if the prediction is negative
            cumulative_predicted_price_gap -= true_gap

    return {
        "cumulative_true_price_gap": cumulative_true_price_gap,
        "cumulative_predicted_price_gap": cumulative_predicted_price_gap
    }
training_args = TrainingArguments(
    output_dir='./results',
    evaluation_strategy="epoch",
    learning_rate=2e-5,
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    num_train_epochs=3,
    weight_decay=0.01,
)

trainer = Trainer(
    model=SPY_model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=test_dataset,
    compute_metrics=compute_cumulative_gaps  # Use the custom function to calculate cumulative price gaps
)

results = trainer.evaluate()

print(f"Our strategy: {results['eval_cumulative_predicted_price_gap']:.4f}")
print(f"Buy and Hold: {results['eval_cumulative_true_price_gap']:.4f}")


Detected kernel version 3.10.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.


Our strategy: -4.2499
Buy and Hold: -4.2499


## Longer Holding periods

In [18]:
from datasets import load_dataset
from datasets import load_from_disk

dataset = load_dataset('csv', data_files='longer_fomc_spy_price_gap_data.csv')

print(dataset)

DatasetDict({
    train: Dataset({
        features: ['Date', 'FOMC_Minutes_Text', 'SPY_Price_Gap', 'Opening_Price_Next_Day', 'Closing_Price_One_Month_Later'],
        num_rows: 44
    })
})


In [19]:

tokenized_dataset = dataset.map(tokenize_function, batched=True)

print(tokenized_dataset)

DatasetDict({
    train: Dataset({
        features: ['Date', 'FOMC_Minutes_Text', 'SPY_Price_Gap', 'Opening_Price_Next_Day', 'Closing_Price_One_Month_Later', 'input_ids', 'token_type_ids', 'attention_mask'],
        num_rows: 44
    })
})


In [20]:


tokenized_dataset = tokenized_dataset.rename_column("SPY_Price_Gap", "labels")

tokenized_dataset = tokenized_dataset.remove_columns(["Date", "FOMC_Minutes_Text"])

tokenized_dataset.set_format("torch")

# Split the dataset into train and test sets
train_test_split = tokenized_dataset['train'].train_test_split(test_size=0.2)
train_dataset = train_test_split['train']
test_dataset = train_test_split['test']

print(train_dataset)
print(test_dataset)


Dataset({
    features: ['labels', 'Opening_Price_Next_Day', 'Closing_Price_One_Month_Later', 'input_ids', 'token_type_ids', 'attention_mask'],
    num_rows: 35
})
Dataset({
    features: ['labels', 'Opening_Price_Next_Day', 'Closing_Price_One_Month_Later', 'input_ids', 'token_type_ids', 'attention_mask'],
    num_rows: 9
})


In [21]:
long_SPY_model = BertForSequenceClassification.from_pretrained(model_path, num_labels=1, ignore_mismatched_sizes=True)

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at ./saved_model and are newly initialized because the shapes did not match:
- classifier.bias: found shape torch.Size([3]) in the checkpoint and torch.Size([1]) in the model instantiated
- classifier.weight: found shape torch.Size([3, 768]) in the checkpoint and torch.Size([1, 768]) in the model instantiated
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


from transformers import Trainer, TrainingArguments
training_args = TrainingArguments(
    output_dir='./results',
    evaluation_strategy="epoch",
    learning_rate=2e-5,
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    num_train_epochs=3,
    weight_decay=0.01,
)

trainer = Trainer(
    model=long_SPY_model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=test_dataset,
)

results = trainer.evaluate()

print("MSE Before")
print(results)

In [23]:
trainer.train()
results = trainer.evaluate()

print("MSE After")
print(results)

Epoch,Training Loss,Validation Loss
1,No log,241.00946
2,No log,240.551193
3,No log,240.429993


MSE After
{'eval_loss': 240.42999267578125, 'eval_runtime': 4.8992, 'eval_samples_per_second': 1.837, 'eval_steps_per_second': 0.408, 'epoch': 3.0}


## Compare vs B&H

In [24]:
trainer = Trainer(
    model=long_SPY_model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=test_dataset,
    compute_metrics=compute_cumulative_gaps  # Use the custom function to calculate cumulative price gaps
)

results = trainer.evaluate()

print(f"Long holding time/Our strategy: {results['eval_cumulative_predicted_price_gap']:.4f}")
print(f"Long holding time/Buy and Hold: {results['eval_cumulative_true_price_gap']:.4f}")

Detected kernel version 3.10.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.


Long holding time/Our strategy: 33.1599
Long holding time/Buy and Hold: 10.7739


## Using RobertA Model

In [1]:
from datasets import load_dataset
from datasets import load_from_disk
dataset = load_dataset('csv', data_files='fomc_spy_aligned_data.csv')

print(dataset)

DatasetDict({
    train: Dataset({
        features: ['Date', 'FOMC_Minutes_Text', 'SPY_Price_Difference'],
        num_rows: 42
    })
})


In [8]:
from transformers import RobertaTokenizer, RobertaForSequenceClassification

load_directory = './saved_roberta_model'
tokenizer = RobertaTokenizer.from_pretrained(load_directory)
model = RobertaForSequenceClassification.from_pretrained(load_directory, num_labels=1,ignore_mismatched_sizes=True)

print("Model and tokenizer loaded from disk.")


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at ./saved_roberta_model and are newly initialized because the shapes did not match:
- classifier.out_proj.bias: found shape torch.Size([2]) in the checkpoint and torch.Size([1]) in the model instantiated
- classifier.out_proj.weight: found shape torch.Size([2, 768]) in the checkpoint and torch.Size([1, 768]) in the model instantiated
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Model and tokenizer loaded from disk.


In [3]:

def tokenize_function(examples):
    return tokenizer(examples['FOMC_Minutes_Text'], padding="max_length", truncation=True)

tokenized_dataset = dataset.map(tokenize_function, batched=True)

print(tokenized_dataset)

DatasetDict({
    train: Dataset({
        features: ['Date', 'FOMC_Minutes_Text', 'SPY_Price_Difference', 'input_ids', 'attention_mask'],
        num_rows: 42
    })
})


In [4]:
tokenized_dataset = tokenized_dataset.rename_column("SPY_Price_Difference", "labels")

tokenized_dataset = tokenized_dataset.remove_columns(["Date", "FOMC_Minutes_Text"])

tokenized_dataset.set_format("torch")

# Split the dataset into train and test sets
train_test_split = tokenized_dataset['train'].train_test_split(test_size=0.2)
train_dataset = train_test_split['train']
test_dataset = train_test_split['test']

print(train_dataset)
print(test_dataset)

Dataset({
    features: ['labels', 'input_ids', 'attention_mask'],
    num_rows: 33
})
Dataset({
    features: ['labels', 'input_ids', 'attention_mask'],
    num_rows: 9
})


In [9]:
import numpy as np

# Define a custom MSE function
def compute_metrics(eval_pred):
    logits, labels = eval_pred
    # Since it's regression, we need to squeeze the logits and labels
    predictions = np.squeeze(logits)
    labels = np.squeeze(labels)
    # Compute Mean Squared Error (MSE) manually
    mse = np.mean((predictions - labels) ** 2)
    return {"mse": mse}


In [10]:
from transformers import Trainer, TrainingArguments
training_args = TrainingArguments(
    output_dir='./results',
    evaluation_strategy="epoch",
    learning_rate=2e-5,
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    num_train_epochs=3,
    weight_decay=0.01,
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=test_dataset,
    compute_metrics=compute_metrics
)

results = trainer.evaluate()

print("MSE Before")
print(results)

dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
Detected kernel version 3.10.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.


MSE Before
{'eval_loss': 27.02898406982422, 'eval_mse': 27.028982162475586, 'eval_runtime': 0.4434, 'eval_samples_per_second': 20.297, 'eval_steps_per_second': 2.255}


In [11]:
trainer.train()
results = trainer.evaluate()

print("MSE After")
print(results)

Epoch,Training Loss,Validation Loss,Mse
1,No log,27.221447,27.221443
2,No log,27.251722,27.25172
3,No log,27.286657,27.286655


MSE After
{'eval_loss': 27.286657333374023, 'eval_mse': 27.28665542602539, 'eval_runtime': 0.3567, 'eval_samples_per_second': 25.234, 'eval_steps_per_second': 2.804, 'epoch': 3.0}
