<a href="https://colab.research.google.com/github/jsl5710/greenland/blob/main/Notebook/GREENLAND_Code_base.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Requirements

## Dependencies

In [1]:
# Dependencies installation
!pip install transformers datasets # Transformers installation
!pip install wandb # Weights and bias installation
!pip install colorama
!pip install evaluate

Collecting datasets
  Downloading datasets-2.16.1-py3-none-any.whl (507 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m507.1/507.1 kB[0m [31m8.3 MB/s[0m eta [36m0:00:00[0m
Collecting dill<0.3.8,>=0.3.0 (from datasets)
  Downloading dill-0.3.7-py3-none-any.whl (115 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m115.3/115.3 kB[0m [31m11.5 MB/s[0m eta [36m0:00:00[0m
Collecting multiprocess (from datasets)
  Downloading multiprocess-0.70.15-py310-none-any.whl (134 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m134.8/134.8 kB[0m [31m15.2 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: dill, multiprocess, datasets
Successfully installed datasets-2.16.1 dill-0.3.7 multiprocess-0.70.15
Collecting wandb
  Downloading wandb-0.16.2-py3-none-any.whl (2.2 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.2/2.2 MB[0m [31m25.5 MB/s[0m eta [36m0:00:00[0m
Collecting GitPython!=3.1.29,>=1.0.0

## Libraries

In [2]:
# Google Drive Libraries imports
import os
from google.colab import data_table
data_table.enable_dataframe_formatter()
import pandas as pd
from google.colab import drive

# Wieght and Biases imports
import wandb

#import colorama
from colorama import Fore

#Data
from datasets import Dataset, DatasetDict, load_dataset, ClassLabel, Value
from sklearn.model_selection import train_test_split
from transformers import DataCollatorWithPadding
import torch
from torch.utils.data import DataLoader #import DataLoader
from collections import Counter


#Modeling
import torch
from tqdm.auto import tqdm
from transformers import AdamW, AutoModelForSequenceClassification, get_scheduler, AutoTokenizer
import torch.nn as nn

#Evaluation
import evaluate
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score


# Parameters

In [3]:
#weights and biases
experiment = 'RQ1'
#model param
pretrained_model ='xlm-roberta-large'# pretrained model
num_labels = 2 # number of labels

#dataset
file_path = '/content/drive/MyDrive/GREENLAND/Datasets/MultiClaim/experiment_data.csv' #Path to your data source
format= 'csv' #dataloader file format

test_size=0.3 # train size is equal to 0.7 when test size equals to 0.3
stratify_by= ['language', 'labels'] #stratify dataset by the following
random_state=42 # random seed

#modeling
batch_size = 8 # batch size
num_epochs = 8 # number of epochs
learning_rate = 5e-5 # learning rate
warmup_steps = 0 # number of warmup steps
weight_decay = 0.01 # weight decay

# Functions

### **Setup**

In [4]:
#function to mount drive
def mount_drive():
  """
  Attempts to mount Google Drive at a specified path in a cloud environment (like Google Colab).

  Parameters:
  - None

  Behavior:
  - Uses the drive.mount method to mount Google Drive at the path '/content/drive/'.
  - If successful, prints a success message in blue.
  - If an exception occurs (e.g., authentication failure), it prints a failure message in red.

  Returns:
  - None: This function only prints messages indicating the outcome.
    """
  try:
    drive.mount('/content/drive/')
    print(Fore.BLUE +'Successfully mount Google drive')
  except:
    print(Fore.RED +'Failed to mount Google drive')
    pass


#function to attempt to forcibly remount Google drive
def remount_drive():
  """
  Attempts to forcibly remount Google Drive, typically used to refresh the connection.

  Parameters:
  - None

  Behavior:
  - Uses the drive.mount method with the force_remount=True argument.
  - Prints a success message in blue if remount is successful, or a failure message in red if it fails.

  Returns:
  - None: The function outputs messages indicating the success or failure of the remount operation.
  """
  try:
    drive.mount('/content/drive/', force_remount=True)
    print(Fore.BLUE + 'Successfully remount drive')
    print(Fore.RESET)
  except:
    print(Fore.RED +'Failed to remount drive')
    print(Fore.RESET)
    pass

#function to login to Weight and Biases and set project
def login_wandb():
  """
  Logs into Weights & Biases (a machine learning experiment tracking tool) and initializes a project.

  Parameters:
  - None

  Behavior:
  - Attempts to log in to Weights & Biases using wandb.login().
  - Initializes a new project named 'greenland' with wandb.init().
  - Prints a success message in blue upon successful login and initialization, or a failure message in red if it fails.

  Returns:
  - None: The function provides console output regarding the success or failure of the login and initialization process.
  """
  try:
    wandb.login()
    wandb.init(project='greenland', name=f"{experiment}_{pretrained_model}",) # change project name here
    print(Fore.BLUE + 'Successfully login to Weight and Biases')
    print(Fore.RESET)
  except:
    print(Fore.RED +'Failed to login to Weight and Biases')
    print(Fore.RESET)
    pass





### **Data Processing**

In [5]:
# function to load data as a Dataframe
def import_data(path, format):

  """
  Loads data from a specified file path into a pandas DataFrame.

  Parameters:
  - path (str): The file path of the data to be imported.
  - encoding (str): The encoding format to be used for reading the file.

  Behavior:
  - Attempts to read a CSV file from the given path using the specified encoding with pd.read_csv().
  - Prints a success message in blue if the data is imported successfully, or a failure message in red if it fails.

  Returns:
  - df (DataFrame): The DataFrame containing the imported data if successful; otherwise, None.
  """
  try:
    df = load_dataset(format, data_files=path, split='train')
    print(Fore.BLUE +'Successfully import data')
    print(Fore.RESET)
    return df
  except:
    print(Fore.RED +'Failed to import data')
    print(Fore.RESET)
    pass


# function to split data into train and test sets
def stratified_train_test_split(dataset, test_size, stratify_by, random_state):
    """
    Splits the dataset into train and test sets, stratified by specified columns.
    """
    try:
        # Convert to pandas DataFrame for stratification
        df = dataset.to_pandas()

        # Check if stratify columns exist
        for col in stratify_by:
            if col not in df.columns:
                raise ValueError(f"Stratify column '{col}' not found in the dataset.")

        # Create a combined column for stratification
        df['combined_stratify'] = df[stratify_by].apply(lambda row: '_'.join(map(str, row)), axis=1)

        # Filter out categories with less than 2 instances
        value_counts = df['combined_stratify'].value_counts()
        valid_stratify_values = value_counts[value_counts >= 2].index
        df = df[df['combined_stratify'].isin(valid_stratify_values)]

        # Perform stratified split
        train_idx, testvalid_idx = train_test_split(
            df.index,
            test_size=test_size,
            stratify=df['combined_stratify'],
            random_state=random_state
        )

        # Use indices to create train and test/validation datasets
        train_dataset = dataset.select(train_idx)
        testvalid_dataset = dataset.select(testvalid_idx)

        # Wrap the splits in a DatasetDict
        train_testvalid = DatasetDict({
            'train': train_dataset,
            'test': testvalid_dataset
        })
        train_testvalid = train_testvalid.cast_column('labels', ClassLabel(names=['True', 'False'])) # Cast column to class labels

        print(Fore.BLUE + 'Successfully split and stratified the dataset.')
        print(Fore.RESET)

        return train_testvalid

    except Exception as e:
        print(Fore.RED + f'Failed to split and stratify the dataset: {e}')
        print(Fore.RESET)



def tokenize_and_prepare_data(dataset_dict, tokenizer_checkpoint):
    """
    Tokenizes each subset of a DatasetDict using the specified tokenizer checkpoint, retains only 'text' and 'label' columns,
    and prepares a data collator for dynamic padding.

    Parameters:
    - dataset_dict (DatasetDict): The dataset dictionary to be tokenized. Should contain 'train' and 'test' datasets.
    - tokenizer_checkpoint (str): The checkpoint of the tokenizer to use.

    Returns:
    - tokenized_datasets (DatasetDict): The tokenized version of the input dataset with only 'text' and 'label' columns.
    - data_collator (DataCollatorWithPadding): A data collator that dynamically pads the batched data.
    """
    try:
      # Load tokenizer
      tokenizer = AutoTokenizer.from_pretrained(tokenizer_checkpoint)

      # Define the tokenize function
      def tokenize_function(example):
          # Tokenize and retain only 'text' and 'label' columns
          tokenized_output = tokenizer(example['text'], truncation=True)
          tokenized_output['labels'] = example['labels']
          return tokenized_output

      # Apply tokenization to each subset of the dataset and filter columns
      tokenized_datasets = DatasetDict({
          split: ds.map(tokenize_function, batched=True, remove_columns=[col for col in ds.column_names if col not in ['text', 'label']])
          for split, ds in dataset_dict.items()
      })

      # Initialize the data collator
      data_collator = DataCollatorWithPadding(tokenizer=tokenizer)

      print(Fore.BLUE + 'Successfully tokenized and prepared the dataset.')
      print(Fore.RESET)
      return tokenized_datasets, data_collator
    except Exception as e:
      print(Fore.RED + f'Failed to tokenize and prepare the dataset: {e}')
      print(Fore.RESET)


def prepare_dataloaders(dataset_dict, data_collator, batch_size=8):
    """
    Removes unwanted columns, sets dataset format to PyTorch tensors, and creates DataLoader for train and test sets.

    Parameters:
    - dataset_dict (DatasetDict): The tokenized dataset dictionary.
    - data_collator (DataCollatorWithPadding): A data collator that dynamically pads the batched data.
    - batch_size (int, optional): Batch size for the DataLoader. Default is 8.

    Returns:
    - train_dataloader (DataLoader): DataLoader for the training dataset.
    - eval_dataloader (DataLoader): DataLoader for the evaluation (test) dataset.
    - first_batch (dict): The first batch from the training DataLoader.
    """
    try:
      # Remove unwanted columns and set dataset format to torch tensors
      dataset_dict = dataset_dict.remove_columns(['text'])
      dataset_dict.set_format("torch")

      # Define the DataLoaders
      train_dataloader = DataLoader(
          dataset_dict["train"], shuffle=True, batch_size=batch_size, collate_fn=data_collator
      )
      eval_dataloader = DataLoader(
          dataset_dict["test"], batch_size=batch_size, collate_fn=data_collator
      )

      # Inspect the first batch
      first_batch = next(iter(train_dataloader))
      first_batch_inspection = {k: v.shape for k, v in first_batch.items()}

      print(Fore.BLUE + 'Successfully prepared the dataloaders.')
      print(Fore.RESET)

      return train_dataloader, eval_dataloader, first_batch_inspection
    except Exception as e:
      print(Fore.RED + f'Failed to prepare the dataloaders: {e}')
      print(Fore.RESET)


# Initialize a counter
label_counter = Counter()

def count_values(example):
    label_counter.update([example['labels']])
    return example


## Modeling

In [6]:
def create_sequence_classification_model(model_checkpoint, num_labels):
    """
    Initializes a model for sequence classification using a specified pre-trained model.

    Parameters:
    - model_checkpoint (str): The checkpoint identifier of the pre-trained model.
    - num_labels (int): The number of labels for the classification task.

    Returns:
    - model: The initialized sequence classification model.
    """
    model = AutoModelForSequenceClassification.from_pretrained(model_checkpoint, num_labels=num_labels)
    return model

def setup_training_tools(num_epochs, train_dataloader, optimizer, num_training_steps):
    """
    Sets up a learning rate scheduler and a progress bar for the training process.

    Parameters:
    - num_epochs (int): The number of epochs for training.
    - train_dataloader (DataLoader): The DataLoader used for training.
    - optimizer: The optimizer used for training.

    Returns:
    - lr_scheduler: A learning rate scheduler.
    - progress_bar: A tqdm progress bar for tracking training steps.
    """
    lr_scheduler = get_scheduler(
        "linear",
        optimizer=optimizer,
        num_warmup_steps=warmup_steps,
        num_training_steps=num_training_steps,
    )

    return lr_scheduler


def train_model(model, train_dataloader, optimizer, lr_scheduler, num_epochs, num_training_steps):
    """
    Trains the model for a specified number of epochs.

    Parameters:
    - model: The model to be trained.
    - train_dataloader (DataLoader): The DataLoader for the training data.
    - optimizer: The optimizer used for training.
    - lr_scheduler: The learning rate scheduler.
    - progress_bar: The tqdm progress bar for tracking training steps.
    - num_epochs (int): The number of epochs for training.
    - device: The device (CPU or GPU) to train the model on.

    Returns:
    - None: The function performs training and updates the model in place.
    """
    try:
      # Set up the progress bar
      progress_bar = tqdm(range(num_training_steps))
      model.train()
      for epoch in range(num_epochs):
          for batch in train_dataloader:
              # Move batch to the specified device
              batch = {k: v.to(device) for k, v in batch.items()}

              # Forward pass
              outputs = model(**batch)
              loss = outputs.loss

              # Backward pass
              loss.backward()

              # Update parameters and learning rate
              optimizer.step()
              lr_scheduler.step()
              optimizer.zero_grad()

              # Update the progress bar
              progress_bar.update(1)

      print(Fore.BLUE + 'Successfully trained the model.')
      print(Fore.RESET)
    except Exception as e:
      print(Fore.RED + f'Failed to train the model: {e}')
      print(Fore.RESET)



def train_model_with_wandb(model, train_dataloader, eval_dataloader, optimizer, lr_scheduler, num_epochs, num_training_steps, device):
    best_val_metric = float('inf')  # Initialize with a high value if monitoring loss
    best_model_state = None

    try:
        # Assume login_wandb() is a function that logs in to wandb
        login_wandb()
        wandb.watch(model, log='all')

        for epoch in range(num_epochs):
            model.train()
            total_train_loss = 0
            for batch in train_dataloader:
                batch = {k: v.to(device) for k, v in batch.items()}
                outputs = model(**batch)
                loss = outputs.loss
                total_train_loss += loss.item()

                loss.backward()
                optimizer.step()
                lr_scheduler.step()
                optimizer.zero_grad()

            avg_train_loss = total_train_loss / len(train_dataloader)

            # Evaluation
            model.eval()
            total_val_loss = 0
            all_predictions = []
            all_labels = []
            with torch.no_grad():
                for batch in eval_dataloader:
                    batch = {k: v.to(device) for k, v in batch.items()}
                    outputs = model(**batch)

                    val_loss = outputs.loss
                    total_val_loss += val_loss.item()

                    logits = outputs.logits
                    predictions = torch.argmax(logits, dim=-1).cpu().numpy()
                    labels = batch['labels'].cpu().numpy()

                    all_predictions.extend(predictions)
                    all_labels.extend(labels)

            avg_val_loss = total_val_loss / len(eval_dataloader)

            # Calculate metrics
            accuracy = accuracy_score(all_labels, all_predictions)
            precision = precision_score(all_labels, all_predictions, average='macro')
            recall = recall_score(all_labels, all_predictions, average='macro')
            f1 = f1_score(all_labels, all_predictions, average='macro')

            wandb.log({"epoch": epoch, "train_loss": avg_train_loss, "val_loss": avg_val_loss, "accuracy": accuracy, "precision": precision, "recall": recall, "f1_score": f1})

            # Print epoch results
            print(f"Epoch {epoch+1}/{num_epochs}:")
            print(f"Train Loss: {avg_train_loss}, Validation Loss: {avg_val_loss}")
            print(f"Accuracy: {accuracy}, Precision: {precision}, Recall: {recall}, F1: {f1}")

            # Check if the current model is the best
            if avg_val_loss < best_val_metric:
                best_val_metric = avg_val_loss
                best_model_state = model.state_dict().copy()

        # Save the best model
        if best_model_state is not None:
            torch.save(best_model_state, "best_model.pth")

        print('Successfully trained the model.')

    except Exception as e:
        print(f'Failed to train the model: {e}')

    finally:
        wandb.finish()



def evaluate_model(model, eval_dataloader, device):
    """
    Evaluates the model on the evaluation dataset using multiple metrics and computes average validation loss.

    Parameters:
    - model: The model to be evaluated.
    - eval_dataloader (DataLoader): The DataLoader for the evaluation data.
    - device: The device (CPU or GPU) for evaluation.

    Returns:
    - avg_loss: Average loss over the evaluation dataset.
    - results: A dictionary of aggregated metric scores.
    """
    metric = evaluate.combine(["accuracy", "f1", "precision", "recall"])

    total_loss = 0
    num_batches = 0

    model.eval()
    for batch in eval_dataloader:
        batch = {k: v.to(device) for k, v in batch.items()}

        with torch.no_grad():
            outputs = model(**batch)
            loss = outputs.loss

        logits = outputs.logits
        predictions = torch.argmax(logits, dim=-1)
        total_loss += loss.item()
        num_batches += 1

        metric.add_batch(predictions=predictions, references=batch["labels"])

    avg_loss = total_loss / num_batches if num_batches > 0 else 0
    metrics_result = metric.compute()
    return avg_loss, metrics_result


# def setup_device_and_model(model):
#     """
#     Sets up the device (GPU or CPU) for training and moves the model to the selected device.

#     Parameters:
#     - model: The model to be moved to the device.

#     Returns:
#     - device: The selected device (CUDA or CPU).
#     - model: The model moved to the device.
#     """
#     try:
#     # Set up the device
#       device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")

#       # Move the model to the device
#       model.to(device)
#       print(Fore.BLUE + 'Device: '+ device)
#       print(Fore.BLUE + 'Successfully set up the device and model.')
#       print(Fore.RESET)
#       return model
#     except Exception as e:
#       print(Fore.RED + f'Failed to set up the device and model: {e}')
#       print(Fore.RESET)

#     return model

# Setup

In [7]:
mount_drive() #mount drive
login_wandb() # login and access weights and bias project

Mounted at /content/drive/
[34mSuccessfully mount Google drive


<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33mjasonsamlucas[0m ([33mpike[0m). Use [1m`wandb login --relogin`[0m to force relogin


[34mSuccessfully login to Weight and Biases
[39m


# Data Preparation

In [8]:
#Prepare data
raw_datasets = import_data(file_path,format) # import dataset
train_testvalid = stratified_train_test_split(raw_datasets, test_size, stratify_by, random_state) # Split data into train, test, and validation sets
tokenized_datasets, data_collator = tokenize_and_prepare_data(train_testvalid, pretrained_model) # Tokenize and prepare data
train_dataloader, eval_dataloader, first_batch = prepare_dataloaders(tokenized_datasets, data_collator, batch_size) # Prepare dataloaders

Generating train split: 0 examples [00:00, ? examples/s]

[34mSuccessfully import data
[39m


Casting the dataset:   0%|          | 0/17224 [00:00<?, ? examples/s]

Casting the dataset:   0%|          | 0/7383 [00:00<?, ? examples/s]

[34mSuccessfully split and stratified the dataset.
[39m


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/616 [00:00<?, ?B/s]

sentencepiece.bpe.model:   0%|          | 0.00/5.07M [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/9.10M [00:00<?, ?B/s]

Map:   0%|          | 0/17224 [00:00<?, ? examples/s]

Map:   0%|          | 0/7383 [00:00<?, ? examples/s]

You're using a XLMRobertaTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


[34mSuccessfully tokenized and prepared the dataset.
[39m
[34mSuccessfully prepared the dataloaders.
[39m


In [9]:
print(first_batch) # Inspect the first batch to confirm if the shapes of 'input_ids' and 'attention_mask' are the same.

{'labels': torch.Size([8]), 'input_ids': torch.Size([8, 38]), 'attention_mask': torch.Size([8, 38])}


In [10]:
# Apply the map function
train_testvalid["test"].map(count_values)

# The label_counter now contains the counts
print(label_counter)


Map:   0%|          | 0/7383 [00:00<?, ? examples/s]

Counter({0: 6688, 1: 695})


In [11]:
print(train_testvalid["train"].features)

{'Unnamed: 0.1': Value(dtype='int64', id=None), 'Unnamed: 0': Value(dtype='int64', id=None), 'text': Value(dtype='string', id=None), 'labels': ClassLabel(names=['True', 'False'], id=None), 'language': Value(dtype='string', id=None), 'language_family': Value(dtype='string', id=None)}


# Model

In [12]:
model = create_sequence_classification_model(pretrained_model, num_labels) #load model

model.safetensors:   0%|          | 0.00/2.24G [00:00<?, ?B/s]

Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at xlm-roberta-large and are newly initialized: ['classifier.out_proj.bias', 'classifier.out_proj.weight', 'classifier.dense.bias', 'classifier.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [13]:
optimizer = AdamW(model.parameters(), lr=learning_rate) #define optimizer: create the default learning rate scheduler from Trainer



In [14]:
num_training_steps = num_epochs * len(train_dataloader)

In [15]:
lr_scheduler = setup_training_tools(num_epochs, train_dataloader, optimizer, num_training_steps) #setup training tools

In [16]:
# setup_device_and_model(model)
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
print(device)

cuda


In [17]:
model.to(device)

XLMRobertaForSequenceClassification(
  (roberta): XLMRobertaModel(
    (embeddings): XLMRobertaEmbeddings(
      (word_embeddings): Embedding(250002, 1024, padding_idx=1)
      (position_embeddings): Embedding(514, 1024, padding_idx=1)
      (token_type_embeddings): Embedding(1, 1024)
      (LayerNorm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): XLMRobertaEncoder(
      (layer): ModuleList(
        (0-23): 24 x XLMRobertaLayer(
          (attention): XLMRobertaAttention(
            (self): XLMRobertaSelfAttention(
              (query): Linear(in_features=1024, out_features=1024, bias=True)
              (key): Linear(in_features=1024, out_features=1024, bias=True)
              (value): Linear(in_features=1024, out_features=1024, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): XLMRobertaSelfOutput(
              (dense): Linear(in_features=1024, out_fe

In [18]:
# Example usage
# Make sure that model, train_dataloader, eval_dataloader, optimizer, lr_scheduler, num_epochs, num_training_steps, and device are defined
train_model_with_wandb(model, train_dataloader, eval_dataloader, optimizer, lr_scheduler, num_epochs, num_training_steps, device)




VBox(children=(Label(value='0.002 MB of 0.002 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

[34mSuccessfully login to Weight and Biases
[39m


  _warn_prf(average, modifier, msg_start, len(result))


Epoch 1/8:
Train Loss: 0.3236098265082, Validation Loss: 0.317957262798159
Accuracy: 0.9058648245970473, Precision: 0.45293241229852366, Recall: 0.5, F1: 0.47530381635988916


  _warn_prf(average, modifier, msg_start, len(result))


Epoch 2/8:
Train Loss: 0.3184431684530036, Validation Loss: 0.31662376615685994
Accuracy: 0.9058648245970473, Precision: 0.45293241229852366, Recall: 0.5, F1: 0.47530381635988916


  _warn_prf(average, modifier, msg_start, len(result))


Epoch 3/8:
Train Loss: 0.31690071730823005, Validation Loss: 0.3144638127408203
Accuracy: 0.9058648245970473, Precision: 0.45293241229852366, Recall: 0.5, F1: 0.47530381635988916


  _warn_prf(average, modifier, msg_start, len(result))


Epoch 4/8:
Train Loss: 0.31599890773954137, Validation Loss: 0.31226022161325423
Accuracy: 0.9058648245970473, Precision: 0.45293241229852366, Recall: 0.5, F1: 0.47530381635988916


  _warn_prf(average, modifier, msg_start, len(result))


Epoch 5/8:
Train Loss: 0.31526261265616723, Validation Loss: 0.3137472944731702
Accuracy: 0.9058648245970473, Precision: 0.45293241229852366, Recall: 0.5, F1: 0.47530381635988916


  _warn_prf(average, modifier, msg_start, len(result))


Epoch 6/8:
Train Loss: 0.31462199433849014, Validation Loss: 0.3120280461444369
Accuracy: 0.9058648245970473, Precision: 0.45293241229852366, Recall: 0.5, F1: 0.47530381635988916


  _warn_prf(average, modifier, msg_start, len(result))


Epoch 7/8:
Train Loss: 0.3134375043590568, Validation Loss: 0.31386386975242414
Accuracy: 0.9058648245970473, Precision: 0.45293241229852366, Recall: 0.5, F1: 0.47530381635988916


  _warn_prf(average, modifier, msg_start, len(result))


Epoch 8/8:
Train Loss: 0.3133166731753518, Validation Loss: 0.31246886420934455
Accuracy: 0.9058648245970473, Precision: 0.45293241229852366, Recall: 0.5, F1: 0.47530381635988916
Successfully trained the model.


VBox(children=(Label(value='0.002 MB of 0.002 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
accuracy,▁▁▁▁▁▁▁▁
epoch,▁▂▃▄▅▆▇█
f1_score,▁▁▁▁▁▁▁▁
precision,▁▁▁▁▁▁▁▁
recall,▁▁▁▁▁▁▁▁
train_loss,█▄▃▃▂▂▁▁
val_loss,█▆▄▁▃▁▃▂

0,1
accuracy,0.90586
epoch,7.0
f1_score,0.4753
precision,0.45293
recall,0.5
train_loss,0.31332
val_loss,0.31247


In [22]:
# train model without weights and bias
# train_model(model, train_dataloader, optimizer, lr_scheduler, num_epochs, num_training_steps)


  0%|          | 0/6459 [00:00<?, ?it/s]

[34mSuccessfully trained the model.
[39m


In [33]:
with wandb.init(project="greenland"):
    # Your evaluation code
    results = evaluate_model(model, eval_dataloader, device)
    print(results)
    # Any additional wandb logging


  _warn_prf(average, modifier, msg_start, len(result))


(0.31271060372128645, {'accuracy': 0.9058648245970473, 'f1': 0.0, 'precision': 0.0, 'recall': 0.0})


VBox(children=(Label(value='0.002 MB of 0.002 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

In [20]:
with wandb.init(project="greenland"):
  model.eval()  # Set the model to evaluation mode
  all_predictions = []
  all_labels = []

  for batch in eval_dataloader:
      # Move batch to the same device as the model
      batch = {k: v.to(device) for k, v in batch.items()}

      with torch.no_grad():
          outputs = model(**batch)

      logits = outputs.logits
      predictions = torch.argmax(logits, dim=-1).cpu().numpy()
      labels = batch['labels'].cpu().numpy()

      all_predictions.extend(predictions)
      all_labels.extend(labels)

# Calculate precision, recall, and F1 score
precision = precision_score(all_labels, all_predictions, average='macro')
recall = recall_score(all_labels, all_predictions, average='macro')
f1 = f1_score(all_labels, all_predictions, average='macro')

print(f'Precision: {precision}, Recall: {recall}, F1 Score: {f1}')


VBox(children=(Label(value='0.002 MB of 0.011 MB uploaded\r'), FloatProgress(value=0.2056437071763433, max=1.0…

Precision: 0.45293241229852366, Recall: 0.5, F1 Score: 0.47530381635988916


  _warn_prf(average, modifier, msg_start, len(result))
