In [1]:
!pip install datasets

Collecting datasets
  Downloading datasets-3.4.1-py3-none-any.whl.metadata (19 kB)
Collecting dill<0.3.9,>=0.3.0 (from datasets)
  Downloading dill-0.3.8-py3-none-any.whl.metadata (10 kB)
Collecting xxhash (from datasets)
  Downloading xxhash-3.5.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)
Collecting multiprocess<0.70.17 (from datasets)
  Downloading multiprocess-0.70.16-py311-none-any.whl.metadata (7.2 kB)
Collecting fsspec<=2024.12.0,>=2023.1.0 (from fsspec[http]<=2024.12.0,>=2023.1.0->datasets)
  Downloading fsspec-2024.12.0-py3-none-any.whl.metadata (11 kB)
Downloading datasets-3.4.1-py3-none-any.whl (487 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m487.4/487.4 kB[0m [31m32.5 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading dill-0.3.8-py3-none-any.whl (116 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m116.3/116.3 kB[0m [31m10.9 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading fsspec-2024.12.0-py3-none-any.

In [2]:
from google.colab import drive
import json
drive.mount('/content/gdrive')

Mounted at /content/gdrive


In [3]:
MODEL_FILE = "/content/gdrive/MyDrive/CS7641-models/trained_cryptobert_model"
METRIC_FILE = "/content/gdrive/MyDrive/CS7641-models/trained_cryptobert_model/training_metrics.json"

In [4]:
from datasets import load_dataset
from transformers import BertTokenizer, BertForSequenceClassification, TrainingArguments, Trainer
import torch
from torch.utils.data import Dataset
import numpy as np
from transformers import AutoModelForSequenceClassification, AutoTokenizer
import os
import re
from transformers import TrainerCallback
from sklearn.metrics import accuracy_score, f1_score, precision_score
torch.manual_seed(24)
os.environ["WANDB_DISABLED"] = "true"

#!pip install datasets

def remove_wallets(text):
    # This is a basic implementation - modify if your original remove_wallets was different
    # Common crypto wallet patterns (like Bitcoin/Ethereum addresses)
    wallet_pattern = r'0x[a-fA-F0-9]{40}|[13][a-km-zA-HJ-NP-Z1-9]{25,34}'
    return re.sub(wallet_pattern, '', text)

# Apply cleaning operations specified to the paper, https://github.com/mikik1234/CryptoBERT-LUKE/blob/main/CODE_Data_Collection.ipynb
def clean_text(text):
    # Remove Asian characters
    text = re.sub(r'[\u4e00-\u9fff]+', '', text)
    # Remove URLs
    text = re.sub(r'http\S+|www\S+', '', text)
    # Remove mentions, hashtags, stock symbols, and forward slashes with content
    text = re.sub(r'[@][A-Za-z0-9_]+|#[A-Za-z0-9_]+|$[A-Za-z0-9_ ]+|/[A-Za-z0-9_ ]+', '', text)
    # Remove RT prefix
    text = re.sub(r'RT : ', '', text)
    # Replace & with 'and'
    text = re.sub(r'&', 'and', text)
    # Handle special characters and quotes
    text = re.sub(r'â€™', '\'', text)
    text = re.sub(r'["&;]', '', text)
    text = re.sub(r'', '', text)  # Zero-width space
    # Remove .X or .x
    text = re.sub(r'\.[Xx]', '', text)
    # Normalize multiple dots to ellipsis
    text = re.sub(r'\.\.+', '...', text)
    # Remove standalone @ and pipe symbols
    text = re.sub(r'@|\|', '', text)
    # Normalize spaces
    text = re.sub(r'\s+', ' ', text).strip()
    # Convert to lowercase
    text = text.lower()
    # Remove wallet addresses
    text = remove_wallets(text)
    text = re.sub(r'^\s*\S+(?:\s+\S+){0,2}\s*$', '', text) # Remove short texts (fewer than 4 words)
    return text

def sentiment_map(text):
  if 'Bullish' in text:
    return 0
  elif 'Neutral' in text:
    return 1
  else:
    return 2

In [5]:
import torch
import numpy as np
import random

# Set random seeds for reproducibility
seed = 42
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)
torch.cuda.manual_seed_all(seed)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

# Create a generator for the split
generator = torch.Generator().manual_seed(seed)


In [6]:
data = load_dataset("StephanAkkerman/financial-tweets-crypto")
train_dataset_ori = data['train']

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


README.md:   0%|          | 0.00/1.19k [00:00<?, ?B/s]

crypto.csv:   0%|          | 0.00/54.9M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/57935 [00:00<?, ? examples/s]

In [7]:

print(f'No. of data: {len(train_dataset_ori)}')
train_dataset_ori = train_dataset_ori.filter(lambda data: data['sentiment'] is not None)
print(f'No. of data after remove sentiment equals to none: {len(train_dataset_ori)}')
train_dataset_ori = train_dataset_ori.filter(lambda data: data['tweet_type']!='quote tweet')
print(f'No. of data after remove quote tweet: {len(train_dataset_ori)}')
train_dataset_ori = train_dataset_ori.filter(lambda data: len(data['description'].split(' '))>1)
print(f'No. of data after remove short text: {len(train_dataset_ori)}')
train_dataset_ori = train_dataset_ori.to_pandas()
train_dataset_ori['description'] = train_dataset_ori['description'].apply(clean_text)
train_dataset_ori.drop_duplicates(inplace=True, ignore_index=True)
print(f'No. of data after remove duplicates: {len(train_dataset_ori)}') # Make sure the records here remains the same after remove duplicates, else the following train test split might be different
train_dataset_ori['sentiment_label'] = train_dataset_ori['sentiment'].apply(sentiment_map)



No. of data: 57935


Filter:   0%|          | 0/57935 [00:00<?, ? examples/s]

No. of data after remove sentiment equals to none: 48692


Filter:   0%|          | 0/48692 [00:00<?, ? examples/s]

No. of data after remove quote tweet: 46866


Filter:   0%|          | 0/46866 [00:00<?, ? examples/s]

No. of data after remove short text: 45567
No. of data after remove duplicates: 45567


In [None]:
# train_dataset_ori = train_dataset_ori.iloc[:1000]

In [9]:
num_samples = len(train_dataset_ori)
# Create an array of indices
indices = np.arange(num_samples)

# Shuffle the indices randomly
np.random.seed(42)  # Set a seed for reproducibility
np.random.shuffle(indices)

# Split the indices into train, validation, and test sets
train_size = int(num_samples * 0.8)  # 80% for training
val_size = int(num_samples * 0.1)  # 10% for validation
test_size = num_samples - train_size - val_size  # 10% for testing

# Split the shuffled indices
train_idx = indices[:train_size]
val_idx = indices[train_size:train_size + val_size]
test_idx = indices[train_size + val_size:]

# Print the sizes of each split
print(f"Train size: {len(train_idx)}")
print(f"Validation size: {len(val_idx)}")
print(f"Test size: {len(test_idx)}")

Train size: 36453
Validation size: 4556
Test size: 4558


In [10]:
train_dataset = train_dataset_ori.loc[train_idx]
valid_dataset = train_dataset_ori.loc[val_idx]
test_dataset = train_dataset_ori.loc[test_idx]

In [11]:
# 2. Prepare the data
#sentiment_map = {'positive': 2, 'neutral': 1, 'negative': 0}  # Adjust based on your actual sentiment values

class TweetDataset(Dataset):
    def __init__(self, texts, encodings, labels):
        self.encodings = encodings
        self.labels = labels
        self.texts = texts

    def __getitem__(self, idx):
        item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
        item['labels'] = torch.tensor(self.labels[idx])
        # print(self.texts[idx])
        item['text'] = self.texts[idx]
        # item['text'] = torch.tensor(self.texts[idx])
        return item

    def __len__(self):
        return len(self.labels)

model_name = "ElKulako/cryptobert"

# 3. Initialize tokenizer
#tokenizer = BertTokenizer.from_pretrained('ElKulako/cryptobert')
tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=True)

# 6. Create dataset
encodings = tokenizer(train_dataset['description'].to_list(),
                      truncation=True, padding="max_length", max_length=128)
train_dataset = TweetDataset(train_dataset['description'].to_list(), encodings, train_dataset['sentiment_label'].to_list())
encodings = tokenizer(valid_dataset['description'].to_list(),
                      truncation=True, padding="max_length", max_length=128)
val_dataset = TweetDataset(valid_dataset['description'].to_list(), encodings, valid_dataset['sentiment_label'].to_list())
encodings = tokenizer(test_dataset['description'].to_list(),
                      truncation=True, padding="max_length", max_length=128)
test_dataset = TweetDataset(test_dataset['description'].to_list(), encodings, test_dataset['sentiment_label'].to_list())


tokenizer_config.json:   0%|          | 0.00/1.35k [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/798k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/2.11M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/957 [00:00<?, ?B/s]

In [12]:

# # 11. Define a custom callback to print validation stats
# class PrintValidationStatsCallback(TrainerCallback):
#     def __init__(self, trainer=None):
#         super().__init__()
#         self.trainer = trainer  # Store the trainer instance

#     def on_log(self, args, state, control, logs=None, **kwargs):
#         # Your custom logic
#         super().on_log(args, state, control, **kwargs)

#     # def on_epoch_end(self, args, state, control, **kwargs):
#     # def on_epoch_end(self, args, state, control, **kwargs):
#     def on_evaluate(self, args, state, control, **kwargs):
#         # super().on_log(args, state, control, **kwargs)
#         # super().on_epoch_end(self, args, state, control, **kwargs)  # Propagate logs

#         epoch = state.epoch
#         # Evaluate on the training dataset (using the same compute_metrics)
#         train_metrics = self.trainer.evaluate(
#             eval_dataset=self.trainer.train_dataset,
#             metric_key_prefix="train"  # Prefixes keys with "train_"
#         )
#         # if metrics.get('train_loss'):

#         # Print training stats
#         print(f"\nEpoch {epoch} Training Stats:")
#         print(f"  Training Loss: {train_metrics['train_loss']:.4f}")
#         print(f"  Training Accuracy: {train_metrics['train_accuracy']:.4f}")
#         print(f"  Training F1 Score: {train_metrics['train_f1']:.4f}")
#         print(f"  Training Precision (class 0): {train_metrics['train_precision0']:.4f}")
#         print(f"  Training Precision (class 1): {train_metrics['train_precision1']:.4f}")
#         print(f"  Training Precision (class 2): {train_metrics['train_precision2']:.4f}")

#         # This method is called after each evaluation
#         # trainer = self.trainer#kwargs.get("trainer")
#         eval_metrics = self.trainer.evaluate(eval_dataset=self.trainer.eval_dataset)
#         # if metrics.get('eval_loss'):
#         print(f"\nEpoch {epoch} Validation Stats:")
#         print(f"  Validation Loss: {eval_metrics['eval_loss']:.4f}")
#         print(f"  Validation Accuracy: {eval_metrics['eval_accuracy']:.4f}")
#         print(f"  Validation F1 Score: {eval_metrics['eval_f1']:.4f}")
#         print(f"  Validation Precision (class 0): {eval_metrics['eval_precision0']}")
#         print(f"  Validation Precision (class 1): {eval_metrics['eval_precision1']}")
#         print(f"  Validation Precision (class 2): {eval_metrics['eval_precision2']}")
#         # print(f"  Validation Precision (per class): {[round(p, 4) for p in metrics['eval_precision']]}")

#         # # # Extract the logs from kwargs
#         # # Inject training loss into log history
#         # state.log_history.append({
#         #     'loss': train_metrics['train_loss'],  # Key for training loss
#         #     'epoch': state.epoch,
#         # })

#         # # Propagate logs (critical for table updates)
#         # super().on_epoch_end(args, state, control, **kwargs)



In [14]:
class PrintValidationStatsCallback(TrainerCallback):
    def __init__(self, trainer):
        super().__init__()
        self.trainer = trainer

    def on_evaluate(self, args, state, control, **kwargs):
        # Get validation metrics from the last evaluation (already computed)
        if state.log_history:
          # if len(state.log_history) >= 2:
          #   train_metrics = state.log_history[-2]
          #   train_loss = train_metrics.get("loss", "N/A")
          eval_metrics = state.log_history[-1]  # Assumes evaluation happens at epoch end
          print(eval_metrics)

          # Compute training metrics via prediction (no evaluation loop)
          train_pred = self.trainer.predict(self.trainer.train_dataset)
          train_metrics = self.trainer.compute_metrics(train_pred)
          print(train_metrics)

          print(f"\nEpoch {state.epoch} Training Stats:")
          # if train_loss !='N/A':
          #   print(f"  Training Loss: {train_metrics.get('loss', 'N/A'):.4f}")
          print(f"  Training Accuracy: {train_metrics['accuracy']:.4f}")
          print(f"  Training F1 Score: {train_metrics['f1']:.4f}")
          print(f"  Training Precision (class 0): {train_metrics['precision0']:.4f}")
          print(f"  Training Precision (class 1): {train_metrics['precision1']:.4f}")
          print(f"  Training Precision (class 2): {train_metrics['precision2']:.4f}")

          print(f"\nEpoch {state.epoch} Validation Stats:")
          # print(f"  Validation Loss: {eval_metrics['eval_loss']:.4f}")
          print(f"  Validation Accuracy: {eval_metrics['eval_accuracy']:.4f}")
          print(f"  Validation F1 Score: {eval_metrics['eval_f1']:.4f}")
          print(f"  Validation Precision (class 0): {eval_metrics['eval_precision0']:.4f}")
          print(f"  Validation Precision (class 1): {eval_metrics['eval_precision1']:.4f}")
          print(f"  Validation Precision (class 2): {eval_metrics['eval_precision2']:.4f}")

In [15]:
from copy import deepcopy
import logging
logging.basicConfig(level=logging.INFO)
# 8. Initialize model
model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=3)

# 9. Define training arguments
training_args = TrainingArguments(
    output_dir='./results',
    num_train_epochs=5,
    per_device_train_batch_size=128,
    per_device_eval_batch_size=128,
    # eval_strategy="steps",
    save_strategy="epoch",
    logging_strategy="epoch",  # Log every steps
    evaluation_strategy="epoch", # Evaluate every steps
    # load_best_model_at_end=True,
    # eval_accumulation_steps=1,
    report_to=None,          # Disable external logging (e.g., WANDB),
    # logging_steps=1,#0.2,
    # eval_steps=1,#0.2,
    # save_steps=1,#0.2,
    log_level='debug',
    load_best_model_at_end=True,     # Load the best model (based on metric) at the end
    metric_for_best_model="eval_loss",      # Use F1-score to determine the best model
    greater_is_better=False,          # Higher F1-score is better
    save_total_limit=1,
    # prediction_loss_only=True
)

# # 10. Define compute metrics function
# def compute_metrics(pred):
#     labels = pred.label_ids
#     preds = np.argmax(pred.predictions, axis=1)

#     # Calculate accuracy
#     accuracy = accuracy_score(labels, preds)

#     # Calculate F1-score (macro average for multi-class)
#     f1 = f1_score(labels, preds, average='macro')
#     prec = precision_score(labels, preds, average=None)
#     return {
#         'accuracy': accuracy,
#         'f1': f1,
#         # 'precision': list(prec)
#     }

def compute_metrics(pred):
      labels = pred.label_ids
      preds = np.argmax(pred.predictions, axis=1)

      # Calculate accuracy
      accuracy = accuracy_score(labels, preds)

      # Calculate F1-score (macro average for multi-class)
      f1 = f1_score(labels, preds, average='macro')
      prec = precision_score(labels, preds, average=None)
      prec = [float(x) for x in prec]
      return {
          # 'loss':
          'accuracy': accuracy,
          'f1': f1,
          'precision0': prec[0],
          'precision1': prec[1],
          'precision2': prec[2]
      }
# 11. Create Trainer
# trainer = Trainer(
#     model=model,
#     args=training_args,
#     train_dataset=train_dataset,
#     eval_dataset=val_dataset,
#     compute_metrics=compute_metrics
# )
# trainer = CustomTrainer(
#  model=model,
#  args=training_args,
#  train_dataset=train_dataset,
#  eval_dataset=val_dataset,
#  compute_metrics=compute_metrics,
# #  callbacks=[CustomMetricsCallback]  # Add this line
# )
# 12. Create Trainer with the callback
trainer = Trainer(  # Assuming CustomTrainer was a typo; replace with your actual CustomTrainer if needed
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=val_dataset,
    compute_metrics=compute_metrics,
    # callbacks=[PrintValidationStatsCallback(trainer=trainer)]  # Add the custom callback
)
# Create the callback and pass the trainer instance
callback = PrintValidationStatsCallback(trainer=trainer)

# Add the callback to the trainer
trainer.add_callback(callback)

# 12. Train the model
trainer.train()


# 13. Evaluate on test set
test_results = trainer.evaluate(val_dataset)
print("\nTest set evaluation results:")
print(f"Test accuracy: {test_results['eval_accuracy']:.4f}")
print(f"Test loss: {test_results['eval_loss']:.4f}")

config.json:   0%|          | 0.00/932 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/499M [00:00<?, ?B/s]

Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).
Safetensors PR exists


model.safetensors:   0%|          | 0.00/499M [00:00<?, ?B/s]

Currently training with a batch size of: 128
***** Running training *****
  Num examples = 36,453
  Num Epochs = 5
  Instantaneous batch size per device = 128
  Total train batch size (w. parallel, distributed & accumulation) = 128
  Gradient Accumulation steps = 1
  Total optimization steps = 1,425
  Number of trainable parameters = 124,647,939
The following columns in the training set don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: text. If text are not expected by `RobertaForSequenceClassification.forward`,  you can safely ignore this message.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision0,Precision1,Precision2
1,0.7227,0.638159,0.720808,0.662407,0.787308,0.638166,0.580023
2,0.5423,0.606064,0.741001,0.683889,0.800138,0.63326,0.64418
3,0.401,0.69076,0.734855,0.681435,0.810295,0.618462,0.615193
4,0.2777,0.827551,0.736392,0.673455,0.787441,0.648681,0.630376
5,0.2045,0.905804,0.729368,0.675499,0.805616,0.606122,0.615288



***** Running Evaluation *****
  Num examples = 4556
  Batch size = 128
The following columns in the evaluation set don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: text. If text are not expected by `RobertaForSequenceClassification.forward`,  you can safely ignore this message.

***** Running Prediction *****
  Num examples = 36453
  Batch size = 128
The following columns in the test set don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: text. If text are not expected by `RobertaForSequenceClassification.forward`,  you can safely ignore this message.


{'eval_loss': 0.6381592750549316, 'eval_accuracy': 0.7208077260755048, 'eval_f1': 0.6624073799672007, 'eval_precision0': 0.7873082287308228, 'eval_precision1': 0.6381660470879802, 'eval_precision2': 0.5800227014755959, 'eval_runtime': 29.6449, 'eval_samples_per_second': 153.686, 'eval_steps_per_second': 1.214, 'epoch': 1.0, 'step': 285}


Saving model checkpoint to ./results/checkpoint-285
Configuration saved in ./results/checkpoint-285/config.json


{'accuracy': 0.7653416728389981, 'f1': 0.7180524735596759, 'precision0': 0.8221772953017185, 'precision1': 0.6821955719557196, 'precision2': 0.6607837687604224}

Epoch 1.0 Training Stats:
  Training Accuracy: 0.7653
  Training F1 Score: 0.7181
  Training Precision (class 0): 0.8222
  Training Precision (class 1): 0.6822
  Training Precision (class 2): 0.6608

Epoch 1.0 Validation Stats:
  Validation Accuracy: 0.7208
  Validation F1 Score: 0.6624
  Validation Precision (class 0): 0.7873
  Validation Precision (class 1): 0.6382
  Validation Precision (class 2): 0.5800


Model weights saved in ./results/checkpoint-285/model.safetensors

***** Running Evaluation *****
  Num examples = 4556
  Batch size = 128
The following columns in the evaluation set don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: text. If text are not expected by `RobertaForSequenceClassification.forward`,  you can safely ignore this message.

***** Running Prediction *****
  Num examples = 36453
  Batch size = 128
The following columns in the test set don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: text. If text are not expected by `RobertaForSequenceClassification.forward`,  you can safely ignore this message.


{'eval_loss': 0.6060636043548584, 'eval_accuracy': 0.7410008779631255, 'eval_f1': 0.6838892106719322, 'eval_precision0': 0.8001383125864454, 'eval_precision1': 0.6332599118942731, 'eval_precision2': 0.6441798941798942, 'eval_runtime': 29.7613, 'eval_samples_per_second': 153.085, 'eval_steps_per_second': 1.21, 'epoch': 2.0, 'step': 570}


Saving model checkpoint to ./results/checkpoint-570
Configuration saved in ./results/checkpoint-570/config.json


{'accuracy': 0.8609716621402902, 'f1': 0.8334906070320519, 'precision0': 0.8984590718802886, 'precision1': 0.7697026270169356, 'precision2': 0.8367692307692308}

Epoch 2.0 Training Stats:
  Training Accuracy: 0.8610
  Training F1 Score: 0.8335
  Training Precision (class 0): 0.8985
  Training Precision (class 1): 0.7697
  Training Precision (class 2): 0.8368

Epoch 2.0 Validation Stats:
  Validation Accuracy: 0.7410
  Validation F1 Score: 0.6839
  Validation Precision (class 0): 0.8001
  Validation Precision (class 1): 0.6333
  Validation Precision (class 2): 0.6442


Model weights saved in ./results/checkpoint-570/model.safetensors

***** Running Evaluation *****
  Num examples = 4556
  Batch size = 128
The following columns in the evaluation set don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: text. If text are not expected by `RobertaForSequenceClassification.forward`,  you can safely ignore this message.

***** Running Prediction *****
  Num examples = 36453
  Batch size = 128
The following columns in the test set don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: text. If text are not expected by `RobertaForSequenceClassification.forward`,  you can safely ignore this message.


{'eval_loss': 0.69076007604599, 'eval_accuracy': 0.7348551360842844, 'eval_f1': 0.6814351015412937, 'eval_precision0': 0.8102951763858891, 'eval_precision1': 0.6184615384615385, 'eval_precision2': 0.6151930261519303, 'eval_runtime': 29.6462, 'eval_samples_per_second': 153.679, 'eval_steps_per_second': 1.214, 'epoch': 3.0, 'step': 855}


Saving model checkpoint to ./results/checkpoint-855
Configuration saved in ./results/checkpoint-855/config.json


{'accuracy': 0.9251090445230845, 'f1': 0.9116205753801493, 'precision0': 0.9642576045187278, 'precision1': 0.822934863988021, 'precision2': 0.9211988304093567}

Epoch 3.0 Training Stats:
  Training Accuracy: 0.9251
  Training F1 Score: 0.9116
  Training Precision (class 0): 0.9643
  Training Precision (class 1): 0.8229
  Training Precision (class 2): 0.9212

Epoch 3.0 Validation Stats:
  Validation Accuracy: 0.7349
  Validation F1 Score: 0.6814
  Validation Precision (class 0): 0.8103
  Validation Precision (class 1): 0.6185
  Validation Precision (class 2): 0.6152


Model weights saved in ./results/checkpoint-855/model.safetensors
Deleting older checkpoint [results/checkpoint-285] due to args.save_total_limit

***** Running Evaluation *****
  Num examples = 4556
  Batch size = 128
The following columns in the evaluation set don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: text. If text are not expected by `RobertaForSequenceClassification.forward`,  you can safely ignore this message.

***** Running Prediction *****
  Num examples = 36453
  Batch size = 128
The following columns in the test set don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: text. If text are not expected by `RobertaForSequenceClassification.forward`,  you can safely ignore this message.


{'eval_loss': 0.8275513052940369, 'eval_accuracy': 0.7363915715539947, 'eval_f1': 0.6734548959128529, 'eval_precision0': 0.787441235728677, 'eval_precision1': 0.6486810551558753, 'eval_precision2': 0.6303763440860215, 'eval_runtime': 29.7519, 'eval_samples_per_second': 153.133, 'eval_steps_per_second': 1.21, 'epoch': 4.0, 'step': 1140}


Saving model checkpoint to ./results/checkpoint-1140
Configuration saved in ./results/checkpoint-1140/config.json


{'accuracy': 0.950072696348723, 'f1': 0.9401996970382881, 'precision0': 0.9745046355208143, 'precision1': 0.8680150395436277, 'precision2': 0.9642220902612827}

Epoch 4.0 Training Stats:
  Training Accuracy: 0.9501
  Training F1 Score: 0.9402
  Training Precision (class 0): 0.9745
  Training Precision (class 1): 0.8680
  Training Precision (class 2): 0.9642

Epoch 4.0 Validation Stats:
  Validation Accuracy: 0.7364
  Validation F1 Score: 0.6735
  Validation Precision (class 0): 0.7874
  Validation Precision (class 1): 0.6487
  Validation Precision (class 2): 0.6304


Model weights saved in ./results/checkpoint-1140/model.safetensors
Deleting older checkpoint [results/checkpoint-855] due to args.save_total_limit
Saving model checkpoint to ./results/checkpoint-1425
Configuration saved in ./results/checkpoint-1425/config.json
Model weights saved in ./results/checkpoint-1425/model.safetensors
Deleting older checkpoint [results/checkpoint-1140] due to args.save_total_limit

***** Running Evaluation *****
  Num examples = 4556
  Batch size = 128
The following columns in the evaluation set don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: text. If text are not expected by `RobertaForSequenceClassification.forward`,  you can safely ignore this message.

***** Running Prediction *****
  Num examples = 36453
  Batch size = 128
The following columns in the test set don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: text. If text are not expected by `Ro

{'eval_loss': 0.9058038592338562, 'eval_accuracy': 0.729367866549605, 'eval_f1': 0.6754994896243419, 'eval_precision0': 0.8056155507559395, 'eval_precision1': 0.6061224489795919, 'eval_precision2': 0.6152882205513784, 'eval_runtime': 30.164, 'eval_samples_per_second': 151.041, 'eval_steps_per_second': 1.193, 'epoch': 5.0, 'step': 1425}


Saving model checkpoint to ./results/checkpoint-1425
Configuration saved in ./results/checkpoint-1425/config.json


{'accuracy': 0.9601404548322497, 'f1': 0.9525864910843019, 'precision0': 0.9897059502377326, 'precision1': 0.8672178379055493, 'precision2': 0.9750257088291465}

Epoch 5.0 Training Stats:
  Training Accuracy: 0.9601
  Training F1 Score: 0.9526
  Training Precision (class 0): 0.9897
  Training Precision (class 1): 0.8672
  Training Precision (class 2): 0.9750

Epoch 5.0 Validation Stats:
  Validation Accuracy: 0.7294
  Validation F1 Score: 0.6755
  Validation Precision (class 0): 0.8056
  Validation Precision (class 1): 0.6061
  Validation Precision (class 2): 0.6153


Model weights saved in ./results/checkpoint-1425/model.safetensors


Training completed. Do not forget to share your model on huggingface.co/models =)


Loading best model from ./results/checkpoint-570 (score: 0.6060636043548584).
Deleting older checkpoint [results/checkpoint-1425] due to args.save_total_limit

***** Running Evaluation *****
  Num examples = 4556
  Batch size = 128
The following columns in the evaluation set don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: text. If text are not expected by `RobertaForSequenceClassification.forward`,  you can safely ignore this message.



***** Running Prediction *****
  Num examples = 36453
  Batch size = 128
The following columns in the test set don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: text. If text are not expected by `RobertaForSequenceClassification.forward`,  you can safely ignore this message.


{'eval_loss': 0.6060636043548584, 'eval_accuracy': 0.7410008779631255, 'eval_f1': 0.6838892106719322, 'eval_precision0': 0.8001383125864454, 'eval_precision1': 0.6332599118942731, 'eval_precision2': 0.6441798941798942, 'eval_runtime': 30.0079, 'eval_samples_per_second': 151.826, 'eval_steps_per_second': 1.2, 'epoch': 5.0, 'step': 1425}
{'accuracy': 0.8609716621402902, 'f1': 0.8334906070320519, 'precision0': 0.8984590718802886, 'precision1': 0.7697026270169356, 'precision2': 0.8367692307692308}

Epoch 5.0 Training Stats:
  Training Accuracy: 0.8610
  Training F1 Score: 0.8335
  Training Precision (class 0): 0.8985
  Training Precision (class 1): 0.7697
  Training Precision (class 2): 0.8368

Epoch 5.0 Validation Stats:
  Validation Accuracy: 0.7410
  Validation F1 Score: 0.6839
  Validation Precision (class 0): 0.8001
  Validation Precision (class 1): 0.6333
  Validation Precision (class 2): 0.6442

Test set evaluation results:
Test accuracy: 0.7410
Test loss: 0.6061


In [16]:

# # 13. Save the model
model.save_pretrained(MODEL_FILE)
tokenizer.save_pretrained(MODEL_FILE)


Configuration saved in /content/gdrive/MyDrive/CS7641-models/trained_cryptobert_model/config.json
Model weights saved in /content/gdrive/MyDrive/CS7641-models/trained_cryptobert_model/model.safetensors
tokenizer config file saved in /content/gdrive/MyDrive/CS7641-models/trained_cryptobert_model/tokenizer_config.json
Special tokens file saved in /content/gdrive/MyDrive/CS7641-models/trained_cryptobert_model/special_tokens_map.json


('/content/gdrive/MyDrive/CS7641-models/trained_cryptobert_model/tokenizer_config.json',
 '/content/gdrive/MyDrive/CS7641-models/trained_cryptobert_model/special_tokens_map.json',
 '/content/gdrive/MyDrive/CS7641-models/trained_cryptobert_model/vocab.json',
 '/content/gdrive/MyDrive/CS7641-models/trained_cryptobert_model/merges.txt',
 '/content/gdrive/MyDrive/CS7641-models/trained_cryptobert_model/added_tokens.json',
 '/content/gdrive/MyDrive/CS7641-models/trained_cryptobert_model/tokenizer.json')

In [17]:
confirm_results = trainer.evaluate(val_dataset)
print("\nTest set evaluation results:")
print(f"Test accuracy: {confirm_results['eval_accuracy']:.4f}")
print(f"Test loss: {confirm_results['eval_loss']:.4f}")


***** Running Evaluation *****
  Num examples = 4556
  Batch size = 128
The following columns in the evaluation set don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: text. If text are not expected by `RobertaForSequenceClassification.forward`,  you can safely ignore this message.



***** Running Prediction *****
  Num examples = 36453
  Batch size = 128
The following columns in the test set don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: text. If text are not expected by `RobertaForSequenceClassification.forward`,  you can safely ignore this message.


{'eval_loss': 0.6060636043548584, 'eval_accuracy': 0.7410008779631255, 'eval_f1': 0.6838892106719322, 'eval_precision0': 0.8001383125864454, 'eval_precision1': 0.6332599118942731, 'eval_precision2': 0.6441798941798942, 'eval_runtime': 30.0617, 'eval_samples_per_second': 151.555, 'eval_steps_per_second': 1.198, 'epoch': 5.0, 'step': 1425}
{'accuracy': 0.8609716621402902, 'f1': 0.8334906070320519, 'precision0': 0.8984590718802886, 'precision1': 0.7697026270169356, 'precision2': 0.8367692307692308}

Epoch 5.0 Training Stats:
  Training Accuracy: 0.8610
  Training F1 Score: 0.8335
  Training Precision (class 0): 0.8985
  Training Precision (class 1): 0.7697
  Training Precision (class 2): 0.8368

Epoch 5.0 Validation Stats:
  Validation Accuracy: 0.7410
  Validation F1 Score: 0.6839
  Validation Precision (class 0): 0.8001
  Validation Precision (class 1): 0.6333
  Validation Precision (class 2): 0.6442

Test set evaluation results:
Test accuracy: 0.7410
Test loss: 0.6061


In [18]:
confirm_results = trainer.evaluate(test_dataset)
print("\nTest set evaluation results:")
print(f"Test accuracy: {confirm_results['eval_accuracy']:.4f}")
print(f"Test loss: {confirm_results['eval_loss']:.4f}")


***** Running Evaluation *****
  Num examples = 4558
  Batch size = 128
The following columns in the evaluation set don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: text. If text are not expected by `RobertaForSequenceClassification.forward`,  you can safely ignore this message.



***** Running Prediction *****
  Num examples = 36453
  Batch size = 128
The following columns in the test set don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: text. If text are not expected by `RobertaForSequenceClassification.forward`,  you can safely ignore this message.


{'eval_loss': 0.603899359703064, 'eval_accuracy': 0.7356296621325142, 'eval_f1': 0.6779236237296669, 'eval_precision0': 0.8083157152924595, 'eval_precision1': 0.6138509968520461, 'eval_precision2': 0.6179921773142112, 'eval_runtime': 29.6101, 'eval_samples_per_second': 153.934, 'eval_steps_per_second': 1.216, 'epoch': 5.0, 'step': 1425}
{'accuracy': 0.8609716621402902, 'f1': 0.8334906070320519, 'precision0': 0.8984590718802886, 'precision1': 0.7697026270169356, 'precision2': 0.8367692307692308}

Epoch 5.0 Training Stats:
  Training Accuracy: 0.8610
  Training F1 Score: 0.8335
  Training Precision (class 0): 0.8985
  Training Precision (class 1): 0.7697
  Training Precision (class 2): 0.8368

Epoch 5.0 Validation Stats:
  Validation Accuracy: 0.7356
  Validation F1 Score: 0.6779
  Validation Precision (class 0): 0.8083
  Validation Precision (class 1): 0.6139
  Validation Precision (class 2): 0.6180

Test set evaluation results:
Test accuracy: 0.7356
Test loss: 0.6039


In [None]:
# Extract predictions (logits), probabilities, and true labels
predictions_output = trainer.predict(val_dataset)
logits = predictions_output.predictions  # Raw logits from the model
probs = torch.softmax(torch.tensor(logits), dim=-1).numpy()  # Convert logits to probabilities
preds = np.argmax(logits, axis=1)  # Predicted labels
true_labels = predictions_output.label_ids  # True labels

# 14. Evaluate on test set (already computed metrics are available in predictions_output)
test_results = trainer.evaluate(val_dataset)
print("\nTest set evaluation results:")
print(f"Test accuracy: {test_results['eval_accuracy']:.4f}")
print(f"Test loss: {test_results['eval_loss']:.4f}")


***** Running Prediction *****
  Num examples = 9114
  Batch size = 128



***** Running Evaluation *****
  Num examples = 9114
  Batch size = 128



Test set evaluation results:
Test accuracy: 0.7411
Test loss: 0.8707


In [None]:
# 15. Identify incorrect predictions
incorrect_indices = np.where(preds != true_labels)[0]  # Indices where predictions are wrong
num_incorrect = len(incorrect_indices)

# def sentiment_map(text):
#   if 'Bullish' in text:
#     return 0
#   elif 'Neutral' in text:
#     return 1
#   else:
#     return 2

print(f"\nNumber of incorrect predictions: {num_incorrect}")
print("\nIncorrect predictions:")
for idx in incorrect_indices[:10]:  # Limit to first 10 for brevity
    print('Text: ', texts[idx])#, labels[idx])
    print(f"Sample {idx}:")
    print(f"True label: {true_labels[idx]}")
    print(f"Predicted label: {preds[idx]}")
    print(f"Probabilities: {probs[idx]}")




Number of incorrect predictions: 2360

Incorrect predictions:
Text:  Crazy that $PRIME is going to >$1B market cap at the next cycle top as the “ $AXS of this cycle” and you still don’t own any anon. $75-100 exit or bust bitch.
Sample 0:
True label: 0
Predicted label: 1
Probabilities: [0.00534369 0.51875556 0.4759007 ]
Text:  $AVAX Stacking on support..
Sample 2:
True label: 1
Predicted label: 0
Probabilities: [9.9729925e-01 2.3489676e-03 3.5175710e-04]
Text:  $INJ - Twitter been super bullish on this one and this is the first real pullback since the bottom. Buying the daily re-touch of the supertrend support. Close below and I'll sell.
Sample 3:
True label: 0
Predicted label: 2
Probabilities: [0.02006538 0.00458542 0.97534925]
Text:  $BONK looks ready to get sent
Sample 4:
True label: 2
Predicted label: 1
Probabilities: [0.00489898 0.7302936  0.26480737]
Text:  $BONK negative funding. Shorts go to horny jail.
Sample 12:
True label: 1
Predicted label: 0
Probabilities: [0.6511122  0.06

In [None]:
from collections import Counter

In [None]:
Counter(true_labels)

Counter({0: 5600, 1: 1787, 2: 1727})

In [None]:
from transformers import BertTokenizer, BertForSequenceClassification
import torch

# Load the saved model and tokenizer
model_dir = "./trained_bert_model"
tokenizer = BertTokenizer.from_pretrained(model_dir)
model = BertForSequenceClassification.from_pretrained(model_dir)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
model.eval()  # Set to evaluation mode

# Function to perform inference
def predict_text(text):
    # Tokenize the input
    inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True, max_length=128)
    inputs = {key: value.to(device) for key, value in inputs.items()}

    # Get model predictions
    with torch.no_grad():  # Disable gradient calculation for inference
        outputs = model(**inputs)
        logits = outputs.logits
        predictions = torch.softmax(logits, dim=1)
        predicted_class = torch.argmax(predictions, dim=1).item()

    return predicted_class, predictions

# Example usage
test_text = "This is a great day!"
predicted_class, probabilities = predict_text(test_text)
print(f"Predicted class: {predicted_class}")  # e.g., 1 for positive, 0 for negative
print(f"Probabilities: {probabilities.cpu().numpy()}")

# Test with multiple sentences
test_texts = ["I love this!", "This is awful."]
for text in test_texts:
    pred_class, probs = predict_text(text)
    print(f"Text: {text} | Predicted class: {pred_class} | Probabilities: {probs.cpu().numpy()}")

OSError: Incorrect path_or_model_id: './trained_bert_model'. Please provide either the path to a local folder or the repo_id of a model on the Hub.