In [None]:
# !pip install --upgrade transformers
# !pip -q install -U spacy
# !python -m spacy download en_core_web_lg

In [2]:
# Standard library imports
import sys
import os
import warnings
warnings.filterwarnings('ignore')

import pandas as pd
from sklearn.model_selection import train_test_split
import numpy as np
import spacy
from sklearn.metrics import accuracy_score, f1_score, log_loss
from sklearn.metrics import precision_score, recall_score, confusion_matrix, f1_score, log_loss

import torch
from datasets import Dataset
from transformers import TrainingArguments, Trainer, AutoModelForSequenceClassification, AutoTokenizer, AutoModelForQuestionAnswering, AutoConfig
from transformers import pipeline, TrainerCallback, TrainingArguments, TrainerControl, TrainerState, Trainer
# from peft import get_peft_model, LoraConfig, TaskType



os.environ["WANDB_DISABLED"] = "true"      # hard-disable
os.environ["WANDB_MODE"] = "disabled"      # belt & suspenders
os.environ["HF_HUB_DISABLE_TELEMETRY"] = "1"

print("CUDA available:", torch.cuda.is_available())
nlp = spacy.load("en_core_web_lg")

CUDA available: False


# Read transcripts
split to train-val-test (Stratifed on pseudo-labels-actions)

In [176]:
def seed_everything(seed=42):
    import random
    import os
    import numpy as np
    import torch

    random.seed(seed)
    os.environ["PYTHONHASHSEED"] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

def assign_group(g):
    n = len(g)
    lab = np.empty(n, dtype=object)
    if n == 1:
        lab[0] = 'test'
    elif n == 2:
        lab[0] = 'test'
        lab[1] = 'train'
    else:
        base = np.array(['test','val','train'])
        k = min(3, n)
        lab[:k] = base[:k]
        if n > 3:
            rng = np.random.default_rng(3)
            lab[3:] = rng.choice(['train','val','test'], size=n-3, p=[0.55,0.15,0.3])
    return pd.Series(lab, index=g.index)


In [177]:
transcripts_df = pd.read_csv('data/transcrpits_processed.csv').drop_duplicates(ignore_index=True)
pseudo_labels = pd.read_csv('data/pseudo_actions_labels_with_id.csv')
augmented_texts_df = pd.read_csv('data/augmented_texts_processed.csv')[['sample_id', 'augmented_text', 'tokenized_augmented_text', 'action']]

In [178]:
seed_everything(42)
pseudo_labels['actions'] = pseudo_labels['action_detected'].apply(
    lambda x: " , ".join(eval(x)) if isinstance(x, str) and x not in [None, 'None'] else '')
transcripts_df['fold1'] = transcripts_df.merge(pseudo_labels[['sample_id', 'actions']], on='sample_id').groupby('actions', group_keys=False).apply(assign_group).reindex(transcripts_df.index)


train_ids = transcripts_df[transcripts_df['fold1'] == 'train'][['sample_id']]
val_ids = transcripts_df[transcripts_df['fold1'] == 'val'][['sample_id']]
test_ids = transcripts_df[transcripts_df['fold1'] == 'test'][['sample_id']]

print(f'train_ids samples: {len(train_ids)}')
print(f'val_ids samples: {len(val_ids)}')
print(f'test_ids samples: {len(test_ids)}')

train_ids samples: 568
val_ids samples: 201
test_ids samples: 336


# Combined Augmented data
balance using oversample + undersample

In [187]:

transcripts_train = transcripts_df.merge(train_ids, on='sample_id')
transcripts_val = transcripts_df.merge(val_ids, on='sample_id')
transcripts_test = transcripts_df.merge(test_ids, on='sample_id')

transcripts_train_aug = augmented_texts_df.merge(transcripts_train[['sample_id']], on='sample_id', how='inner').merge(transcripts_df, on='sample_id', how='inner')[['sample_id', 'Text', 'events', 'Label']]

transcripts_train_aug = transcripts_train_aug[['Text', 'events', 'Label']]
transcripts_train = transcripts_train[['Text', 'events', 'Label']]
transcripts_test = transcripts_test[['Text', 'events', 'Label']]
transcripts_val = transcripts_val[['Text', 'events', 'Label']]

train_neg = transcripts_train[transcripts_train['Label'] == 0]
train_neg_aug = transcripts_train_aug[transcripts_train_aug['Label'] == 0]
train_neg = (pd.concat([train_neg, train_neg, train_neg, train_neg_aug, train_neg_aug], ignore_index=True))

train_pos = transcripts_train[transcripts_train['Label'] == 1]
train_pos_aug = transcripts_train_aug[transcripts_train_aug['Label'] == 1].sample(frac=0.45, random_state=42)
train_pos = (pd.concat([train_pos, train_pos_aug], ignore_index=True))


# train_neg = (pd.concat([train_neg, train_neg, train_neg, train_neg], ignore_index=True))
# train_pos = (pd.concat([train_pos], ignore_index=True))

print(f'train_neg samples: {len(train_neg)}')
print(f'train_pos samples: {len(train_pos)}')

train_df_balanced = pd.concat([train_pos, train_neg], ignore_index=True).sample(frac=1, random_state=42).reset_index(drop=True)



train_neg samples: 681
train_pos samples: 664


# Normalize names
Change all players names to "David" (generic name), to reduce name bias.

In [197]:
ner = pipeline("token-classification", model="dslim/bert-base-NER", aggregation_strategy="simple")

def replace_person_with_david_hf(t):
    spans = sorted((e["start"], e["end"]) for e in ner(t) if e.get("entity_group") in {"PER","PERSON"})
    if not spans: return t
    merged = []
    for s,e in spans:
        if not merged or s > merged[-1][1]: merged.append([s,e])
        else: merged[-1][1] = max(merged[-1][1], e)
    out, i = [], 0
    for s,e in merged: out += [t[i:s], "David"]; i = e
    out.append(t[i:])
    return "".join(out)

train_df_balanced['Text_player_cleaned'] = train_df_balanced['Text'].apply(lambda x: replace_person_with_david_hf(x))
transcripts_val['Text_player_cleaned'] = transcripts_val['Text'].apply(lambda x: replace_person_with_david_hf(x))
transcripts_test['Text_player_cleaned'] = transcripts_test['Text'].apply(lambda x: replace_person_with_david_hf(x))
train_df_balanced.head(5)

Unnamed: 0,Text,events,Label,Text_player_cleaned,events_text,combined_text
0,The veteran Al Horford with a step back.,['2-pointer'],1,The veteran David with a step back.,2-pointer,2-pointer [SEP] The veteran David with a step ...
1,Paint gets in the paint steps back won't go.,['Missed Shot'],1,Paint gets in the paint steps back won't go.,Missed Shot,Missed Shot [SEP] Paint gets in the paint step...
2,Coast to coast now kicks it back out.,['Missed Shot'],0,Coast to coast now kicks it back out.,Missed Shot,Missed Shot [SEP] Coast to coast now kicks it ...
3,"Fraser had a screen from drama, gives it up to...","['2-pointer', 'Assist']",1,"David had a screen from drama, gives it up to ...","2-pointer, Assist","2-pointer, Assist [SEP] David had a screen fro..."
4,"That'll be critical, and Vince Carter hits not...","['3-pointer', 'Assist']",1,"That'll be critical, and David hits nothing bu...","3-pointer, Assist","3-pointer, Assist [SEP] That'll be critical, a..."


# Add events classification

In [201]:
def add_events_and_combined_text(df):
    df = df.copy()
    df['events_text'] = df['events'].apply(
        lambda x: ', '.join([str(i) for i in eval(x)]) if isinstance(x, str) and x not in [None, 'None'] else ''
    )
    df['Text_player_cleaned_and_events'] = df['events_text'] + " [SEP] " + df['Text_player_cleaned']
    # df['combined_text'] = df['events_text'] + " [SEP] " + df['Text']
    return df

train_df_balanced = add_events_and_combined_text(train_df_balanced)
transcripts_val = add_events_and_combined_text(transcripts_val)
transcripts_test = add_events_and_combined_text(transcripts_test)
train_df_balanced.head(5)

Unnamed: 0,Text,events,Label,Text_player_cleaned,events_text,Text_player_cleaned_and_events
0,The veteran Al Horford with a step back.,['2-pointer'],1,The veteran David with a step back.,2-pointer,2-pointer [SEP] The veteran David with a step ...
1,Paint gets in the paint steps back won't go.,['Missed Shot'],1,Paint gets in the paint steps back won't go.,Missed Shot,Missed Shot [SEP] Paint gets in the paint step...
2,Coast to coast now kicks it back out.,['Missed Shot'],0,Coast to coast now kicks it back out.,Missed Shot,Missed Shot [SEP] Coast to coast now kicks it ...
3,"Fraser had a screen from drama, gives it up to...","['2-pointer', 'Assist']",1,"David had a screen from drama, gives it up to ...","2-pointer, Assist","2-pointer, Assist [SEP] David had a screen fro..."
4,"That'll be critical, and Vince Carter hits not...","['3-pointer', 'Assist']",1,"That'll be critical, and David hits nothing bu...","3-pointer, Assist","3-pointer, Assist [SEP] That'll be critical, a..."


# Load pre-trained model (SportsBERT)

In [194]:
# model_name = 'emeraldgoose/bert-base-v1-sports'
# grads = ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']

# model_name = 'leomaurodesenv/bert-basketball-qa'
# grads = ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']

# model_name = 'SushantGautam/SportsSum'
# grads = ['classification_head.dense.bias', 'classification_head.dense.weight', 'classification_head.out_proj.bias', 'classification_head.out_proj.weight']

model_name = "microsoft/SportsBERT"
grads = ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"  # or set to "cpu" to force CPU
model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=2)

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at microsoft/SportsBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


# Run Expr
fine-tune for classification, select best model by Macro-F1 on validation set, predict on test_set
finetune for 2 epochs:
classification layer, pooling layers, last hidden layer. 8% of params in total.


In [205]:
def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)
    # Calculate accuracy
    acc = accuracy_score(labels, predictions)
    # Calculate micro F1
    micro_f1 = f1_score(labels, predictions, average='micro')
    macro_f1 = f1_score(labels, predictions, average='macro')
    weighted_f1 = f1_score(labels, predictions, average='weighted')
    # Calculate precision for label 0 and label 1
    from sklearn.metrics import precision_score, confusion_matrix, recall_score
    precision_label_0 = precision_score(labels, predictions, labels=[0], average='macro', zero_division=0)
    precision_label_1 = precision_score(labels, predictions, labels=[1], average='macro', zero_division=0)
    recall_label_0 = recall_score(labels, predictions, labels=[0], average='macro', zero_division=0)
    recall_label_1 = recall_score(labels, predictions, labels=[1], average='macro', zero_division=0)
    # Calculate confusion matrix
    tn, fp, fn, tp = confusion_matrix(labels, predictions, labels=[0,1]).ravel()
    return {
        "precision_label_0": round(precision_label_0, 3),
        "precision_label_1": round(precision_label_1, 3),
        "recall_label_0": round(recall_label_0, 3),
        "recall_label_1": round(recall_label_1, 3),
        "accuracy": round(acc, 3),
        "micro_f1": round(micro_f1, 3),
        "macro_f1": round(macro_f1, 3),
        "weighted_f1": round(weighted_f1, 3),
        "true_negative": int(tn),
        "false_positive": int(fp),
        "false_negative": int(fn),
        "true_positive": int(tp)
    }


def print_metrics(y_true, y_pred):
  acc = accuracy_score(y_true, y_pred)
  micro_f1 = f1_score(y_true, y_pred, average='micro')
  macro_f1 = f1_score(y_true, y_pred, average='macro')
  weighted_f1 = f1_score(y_true, y_pred, average='weighted')
  precision_label_0 = precision_score(y_true, y_pred, labels=[0], average='macro', zero_division=0)
  precision_label_1 = precision_score(y_true, y_pred, labels=[1], average='macro', zero_division=0)
  recall_label_0 = recall_score(y_true, y_pred, labels=[0], average='macro', zero_division=0)
  recall_label_1 = recall_score(y_true, y_pred, labels=[1], average='macro', zero_division=0)
  tn, fp, fn, tp = confusion_matrix(y_true, y_pred, labels=[0,1]).ravel()
  return pd.DataFrame({
      "precision_label_0": [round(precision_label_0, 3)],
      "precision_label_1": [round(precision_label_1, 3)],
      "recall_label_0": [round(recall_label_0, 3)],
      "recall_label_1": [round(recall_label_1, 3)],
      "micro_f1": [round(micro_f1, 3)],
      "macro_f1": [round(macro_f1, 3)],
      "true_negative": [int(tn)],
      "false_positive": [int(fp)],
      "false_negative": [int(fn)],
      "true_positive": [int(tp)]
  })

def preprocess_function(examples):
    return tokenizer(examples['text'], truncation=True, padding='max_length', max_length=128)

def start_expr(text_col, output_dir):
  seed_everything(42)

  tokenizer = AutoTokenizer.from_pretrained(model_name)
  model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=2)
  model.to(DEVICE)

  for name, p in model.named_parameters():
      p.requires_grad = name in grads or name.startswith(("bert.encoder.layer.11"))

  trainable = sum(p.numel() for p in model.parameters() if p.requires_grad)
  total     = sum(p.numel() for p in model.parameters())
  print(f"Trainable params: {trainable}/{total} ({100*trainable/total:.2f}%)")


  train_dataset = Dataset.from_pandas(train_df_balanced[[text_col, 'Label']].rename(columns={text_col: 'text', 'Label': 'label'}))
  test_dataset = Dataset.from_pandas(transcripts_val[[text_col, 'Label']].rename(columns={text_col: 'text', 'Label': 'label'}))



  tokenized_train = train_dataset.map(preprocess_function, batched=True)
  tokenized_test = test_dataset.map(preprocess_function, batched=True)

  # Define training arguments
  training_args = TrainingArguments(
      output_dir=f'data/{output_dir}',
      report_to="none",
      num_train_epochs=2,
      per_device_train_batch_size=32,
      per_device_eval_batch_size=32,
      eval_strategy="steps",
      eval_steps = 2,
      save_strategy="steps",
      save_steps=2,
      save_total_limit=2,
      logging_dir="./logs",
      logging_steps=2,
      learning_rate=1e-4,
      weight_decay=0.2,  # Further increased regularization
      gradient_accumulation_steps=1,
      lr_scheduler_type='polynomial',
      load_best_model_at_end=True,
      metric_for_best_model="macro_f1",
      greater_is_better=True,
  )

  class PrintLossCallback(TrainerCallback):
      def on_epoch_end(self, args, state, control, **kwargs):
          # Print train and eval loss at the end of each epoch
          train_loss = state.log_history[-2]['loss'] if len(state.log_history) > 1 and 'loss' in state.log_history[-2] else None
          eval_loss = state.log_history[-1]['eval_loss'] if len(state.log_history) > 0 and 'eval_loss' in state.log_history[-1] else None
          print(f"Epoch {int(state.epoch) if state.epoch is not None else '?'}: train_loss = {train_loss}, eval_loss = {eval_loss}")

  # Trainer
  trainer = Trainer(
      model=model,
      args=training_args,
      train_dataset=tokenized_train,
      eval_dataset=tokenized_test,
      compute_metrics=compute_metrics,
      callbacks=[PrintLossCallback()],
  )

  train_result = trainer.train()

  # Print the chosen/best checkpoint path
  if hasattr(trainer, 'state') and hasattr(trainer.state, 'best_model_checkpoint') and trainer.state.best_model_checkpoint is not None:
      print(f"Best model checkpoint: {trainer.state.best_model_checkpoint}")
  else:
      print("No best model checkpoint found.")

  # Predict on test
  classifier = pipeline("text-classification", model=trainer.model, tokenizer=tokenizer, device=0 if DEVICE == "cuda" else -1)
  test_dataset = Dataset.from_pandas(transcripts_test[[text_col, 'Label']].rename(columns={text_col: 'text', 'Label': 'label'}))
  y_true = test_dataset['label']
  y_pred = [int(d['label']) if isinstance(d['label'], int) else int(d['label'].split('_')[-1]) for d in classifier(list(test_dataset['text']), batch_size=32)]
  return print_metrics(y_true, y_pred)

# Run expr on Text original column

In [200]:
start_expr('Text', 'validator_raw_checkpoint')

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at microsoft/SportsBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Trainable params: 7680002/93722114 (8.19%)


Map:   0%|          | 0/1345 [00:00<?, ? examples/s]

Map:   0%|          | 0/201 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Precision Label 0,Precision Label 1,Recall Label 0,Recall Label 1,Accuracy,Micro F1,Macro F1,Weighted F1,True Negative,False Positive,False Negative,True Positive
2,0.6951,0.505779,0.0,0.836,0.0,1.0,0.836,0.836,0.455,0.761,0,33,0,168
4,0.7723,0.812309,0.168,1.0,1.0,0.024,0.184,0.184,0.167,0.086,33,0,164,4
6,0.6875,1.210286,0.164,0.0,1.0,0.0,0.164,0.164,0.141,0.046,33,0,168,0
8,0.8268,1.028179,0.164,0.0,1.0,0.0,0.164,0.164,0.141,0.046,33,0,168,0
10,0.6659,0.694702,0.211,0.897,0.727,0.464,0.507,0.507,0.469,0.565,24,9,90,78
12,0.6311,0.546289,0.421,0.863,0.242,0.935,0.821,0.821,0.602,0.8,8,25,11,157
14,0.6211,0.550948,0.367,0.871,0.333,0.887,0.796,0.796,0.614,0.792,11,22,19,149
16,0.567,0.631779,0.273,0.903,0.636,0.667,0.662,0.662,0.574,0.704,21,12,56,112
18,0.5784,0.663313,0.25,0.908,0.697,0.589,0.607,0.607,0.541,0.658,23,10,69,99
20,0.6168,0.569966,0.311,0.9,0.576,0.75,0.721,0.721,0.611,0.75,19,14,42,126


Epoch 1: train_loss = 0.5292, eval_loss = 0.47535067796707153
Epoch 2: train_loss = 0.2433, eval_loss = 0.543195903301239


Device set to use cuda:0


Best model checkpoint: data/validator_raw_checkpoint/checkpoint-62


Device set to use cuda:0


Unnamed: 0,precision_label_0,precision_label_1,recall_label_0,recall_label_1,micro_f1,macro_f1,true_negative,false_positive,false_negative,true_positive
0,0.484,0.902,0.647,0.825,0.789,0.708,44,24,47,221


# Run expr on Text_player_cleaned
(all player names normed)

In [195]:
start_expr('Text_player_cleaned', 'validator_again_checkpoint')

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at microsoft/SportsBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Trainable params: 7680002/93722114 (8.19%)


Map:   0%|          | 0/1345 [00:00<?, ? examples/s]

Map:   0%|          | 0/201 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Precision Label 0,Precision Label 1,Recall Label 0,Recall Label 1,Accuracy,Micro F1,Macro F1,Weighted F1,True Negative,False Positive,False Negative,True Positive
2,0.6908,0.501373,0.0,0.836,0.0,1.0,0.836,0.836,0.455,0.761,0,33,0,168
4,0.7597,0.809334,0.17,0.895,0.939,0.101,0.239,0.239,0.235,0.199,31,2,151,17
6,0.6739,1.223629,0.164,0.0,1.0,0.0,0.164,0.164,0.141,0.046,33,0,168,0
8,0.8149,1.028835,0.162,0.667,0.97,0.012,0.169,0.169,0.15,0.065,32,1,166,2
10,0.6386,0.68026,0.248,0.935,0.818,0.512,0.562,0.562,0.521,0.615,27,6,82,86
12,0.6004,0.525664,0.333,0.865,0.303,0.881,0.786,0.786,0.595,0.782,10,23,20,148
14,0.5975,0.523877,0.333,0.877,0.394,0.845,0.771,0.771,0.611,0.779,13,20,26,142
16,0.5194,0.613526,0.28,0.916,0.697,0.649,0.657,0.657,0.58,0.701,23,10,59,109
18,0.5401,0.638652,0.267,0.913,0.697,0.625,0.637,0.637,0.564,0.684,23,10,63,105
20,0.5759,0.549952,0.338,0.919,0.667,0.744,0.731,0.731,0.636,0.761,22,11,43,125


Epoch 1: train_loss = 0.602, eval_loss = 0.489679753780365
Epoch 2: train_loss = 0.2295, eval_loss = 0.5743603110313416


Device set to use cuda:0


Best model checkpoint: data/validator_again_checkpoint/checkpoint-64


Unnamed: 0,precision_label_0,precision_label_1,recall_label_0,recall_label_1,micro_f1,macro_f1,true_negative,false_positive,false_negative,true_positive
0,0.609,0.893,0.574,0.907,0.839,0.745,39,29,25,243


# Run expr on Text_player_cleaned + Events
using [SEP] token

In [206]:
start_expr('Text_player_cleaned_and_events', 'validator_Plyaer_cleaned_events_checkpoint')

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at microsoft/SportsBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Trainable params: 7680002/93722114 (8.19%)


Map:   0%|          | 0/1345 [00:00<?, ? examples/s]

Map:   0%|          | 0/201 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Precision Label 0,Precision Label 1,Recall Label 0,Recall Label 1,Accuracy,Micro F1,Macro F1,Weighted F1,True Negative,False Positive,False Negative,True Positive
2,0.6869,0.501825,0.0,0.836,0.0,1.0,0.836,0.836,0.455,0.761,0,33,0,168
4,0.7807,0.825445,0.157,0.5,0.939,0.012,0.164,0.164,0.146,0.064,31,2,166,2
6,0.697,1.219761,0.164,0.0,1.0,0.0,0.164,0.164,0.141,0.046,33,0,168,0
8,0.8103,1.032033,0.164,0.0,1.0,0.0,0.164,0.164,0.141,0.046,33,0,168,0
10,0.6465,0.686723,0.222,0.903,0.727,0.5,0.537,0.537,0.492,0.594,24,9,84,84
12,0.6105,0.523706,0.35,0.856,0.212,0.923,0.806,0.806,0.576,0.786,7,26,13,155
14,0.6137,0.524169,0.281,0.858,0.273,0.863,0.766,0.766,0.569,0.765,9,24,23,145
16,0.5565,0.636727,0.268,0.908,0.667,0.643,0.647,0.647,0.568,0.692,22,11,60,108
18,0.545,0.713127,0.241,0.925,0.788,0.512,0.557,0.557,0.514,0.611,26,7,82,86
20,0.5807,0.630016,0.278,0.91,0.667,0.661,0.662,0.662,0.579,0.704,22,11,57,111


Epoch 1: train_loss = 0.5555, eval_loss = 0.5102724432945251
Epoch 2: train_loss = 0.1935, eval_loss = 0.5745528936386108


Device set to use cuda:0


Best model checkpoint: data/validator_Plyaer_cleaned_events_checkpoint/checkpoint-62


Unnamed: 0,precision_label_0,precision_label_1,recall_label_0,recall_label_1,micro_f1,macro_f1,true_negative,false_positive,false_negative,true_positive
0,0.5,0.888,0.574,0.854,0.798,0.702,39,29,39,229
