In [None]:
!pip install --upgrade --no-cache-dir torch==2.0.1 torchvision==0.15.2 transformers==4.30.2

Collecting torch==2.0.1
  Downloading torch-2.0.1-cp310-cp310-manylinux1_x86_64.whl.metadata (24 kB)
Collecting torchvision==0.15.2
  Downloading torchvision-0.15.2-cp310-cp310-manylinux1_x86_64.whl.metadata (11 kB)
Collecting transformers==4.30.2
  Downloading transformers-4.30.2-py3-none-any.whl.metadata (113 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m113.6/113.6 kB[0m [31m265.8 MB/s[0m eta [36m0:00:00[0m
Collecting nvidia-cudnn-cu11==8.5.0.96 (from torch==2.0.1)
  Downloading nvidia_cudnn_cu11-8.5.0.96-2-py3-none-manylinux1_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu11==11.10.3.66 (from torch==2.0.1)
  Downloading nvidia_cublas_cu11-11.10.3.66-py3-none-manylinux1_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cusolver-cu11==11.4.0.1 (from torch==2.0.1)
  Downloading nvidia_cusolver_cu11-11.4.0.1-2-py3-none-manylinux1_x86_64.whl.metadata (1.6 kB)
Collecting triton==2.0.0 (from torch==2.0.1)
  Downloading triton-2.0.0-1-cp310-cp310-manylinux2

In [None]:
import os
import logging
import numpy as np
import torch
import torch.nn as nn
from datasets import load_dataset
from transformers import (
    AutoTokenizer,
    AutoModelForSequenceClassification,
    AutoConfig,
    default_data_collator,
)
from sklearn.metrics import roc_auc_score
import wandb
from torch.utils.data import DataLoader
from torch.optim import AdamW
from transformers import get_linear_schedule_with_warmup
from tqdm.auto import tqdm
import psutil
import warnings
from google.colab import userdata
import os


warnings.filterwarnings("ignore", category=UserWarning)
warnings.filterwarnings("ignore", category=FutureWarning)


os.environ["WANDB_API_KEY"] = "supersecretkey"

logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(message)s',
    handlers=[
        logging.StreamHandler()
    ]
)

global_config = {
    'wandb_project': 'NLP_GoEmotions_Classifcation',
    'wandb_run_name': 'roberta-multi-label-classification',

    'model': {
        'name': 'bert-large-uncased',
        'use_fast_tokenizer': True,
        'ignore_mismatched_sizes': True,
        'problem_type': 'multi_label_classification',
        'dropout': 0.3
    },

    'tokenizer': {
        'truncation': True,
        'padding': 'max_length',
        'max_length': 64
    },

    'training_args': {
        'output_dir': './roberta-go-emotions',
        'per_device_train_batch_size': 16,
        'per_device_eval_batch_size': 16,
        'num_train_epochs': 10,
        'learning_rate': 2e-5,
        'weight_decay': 0.01,
        'logging_steps': 50,
        'report_to': 'wandb',
        'remove_unused_columns': False,
        'load_best_model_at_end': True,
        'metric_for_best_model': 'auc_score',
        'greater_is_better': True
    },

    'dataloader': {
        'batch_size': 16
    },

    'optimizer': {
        'type': 'AdamW',
        'lr': 2e-5,
        'weight_decay': 0.01
    },

    'scheduler': {
        'type': 'linear',
        'warmup_ratio': 0.1
    }
}


sweep_config = {
    'method': 'random',
    'metric': {
        'name': 'auc_score',
        'goal': 'maximize'
    },
    'parameters': {
        'training_args.num_train_epochs': {'value': 10},

        'training_args.learning_rate': {
            'values': [5e-5, 3e-5]
        },
        'dataloader.batch_size': {
            'values': [32, 64]
        },
        'model.dropout': {
            'values': [0.3, 0.4, 0.5]
        },
        'tokenizer.max_length': {'value': 40},
    }
}


def wandb_login_with_secrets():
    try:
        wandb.login(key=os.environ["WANDB_API_KEY"])
        logging.info("W&B login successful.")
    except Exception as e:
        logging.error("W&B login failed.")
        raise RuntimeError("W&B login failed.") from e


def generate_run_name(config):
    try:
        lr = config["training_args.learning_rate"]
        batch_size = config["dataloader.batch_size"]
        epochs = config["training_args.num_train_epochs"]
        dropout = config["model.dropout"]
    except KeyError as e:
        raise KeyError(f"Missing configuration key: {e}")

    return f"lr-{lr}_batch-{batch_size}_epochs-{epochs}_dropout-{dropout}"


def build_dataset(tokenize_max_len):
    # Load the GoEmotions dataset
    dataset = load_dataset("go_emotions")

    # Extract label names and determine the number of labels
    label_names = dataset["train"].features["labels"].feature.names
    num_labels = len(label_names)
    logging.info(f"Number of labels: {num_labels}")

    # Function to multi-hot encode labels
    def multi_hot_encode(examples):
        encoded = np.zeros((len(examples["labels"]), num_labels), dtype=np.float32)
        for i, label_list in enumerate(examples["labels"]):
            for lbl in label_list:
                encoded[i, lbl] = 1.0
        examples["labels"] = encoded
        return examples

    # Apply multi-hot encoding to the dataset
    dataset = dataset.map(multi_hot_encode, batched=True)

    # Initialize the tokenizer using config parameters
    tokenizer = AutoTokenizer.from_pretrained(
        global_config['model']['name'],
        use_fast=global_config['model']['use_fast_tokenizer']
    )

    # Function to tokenize the dataset
    def preprocess_function(examples):
        tokenized = tokenizer(
            examples["text"],
            truncation=global_config['tokenizer']['truncation'],
            padding=global_config['tokenizer']['padding'],
            max_length=tokenize_max_len
        )
        tokenized["labels"] = examples["labels"]  # Pass labels unaltered
        return tokenized

    # Apply tokenization to the dataset
    encoded_dataset = dataset.map(preprocess_function, batched=True)

    # Function to cast labels to float32
    def cast_labels_to_float32(batch):
        batch["labels"] = np.array(batch["labels"], dtype=np.float32)
        return batch

    # Apply label casting
    encoded_dataset = encoded_dataset.map(cast_labels_to_float32, batched=True)

    # Verify dataset integrity
    for split in ["train", "validation", "test"]:
        assert len(encoded_dataset[split]) > 0, f"{split} split is empty!"
        logging.info(f"{split} split has {len(encoded_dataset[split])} examples.")

    # Set the dataset format for PyTorch, ensuring labels are tensors
    encoded_dataset.set_format(
        type="torch",
        columns=["input_ids", "attention_mask", "labels"]
    )

    return encoded_dataset, tokenizer

def float_labels_data_collator(features):
    batch = default_data_collator(features)
    if "labels" in batch:
        batch["labels"] = batch["labels"].float()
    return batch

def build_dataloader(train_dataset, validate_dataset, batch_size):
    train_data_loader = DataLoader(
        train_dataset,
        batch_size=batch_size,
        shuffle=True,
        collate_fn=float_labels_data_collator
    )

    validation_data_loader = DataLoader(
        validate_dataset,
        batch_size=batch_size,
        shuffle=False,
        collate_fn=float_labels_data_collator
    )

    logging.info("DataLoaders created successfully.")
    return train_data_loader, validation_data_loader


def get_model(n_train_steps, dropout, num_labels):
    # Load the configuration of the pre-trained model
    config_model = AutoConfig.from_pretrained(global_config['model']['name'])

    # Update the dropout rate
    config_model.hidden_dropout_prob = dropout
    config_model.attention_probs_dropout_prob = dropout
    config_model.classifier_dropout = dropout

    # Update num_labels and set problem_type explicitly
    config_model.num_labels = num_labels
    config_model.problem_type = 'multi_label_classification'  # Ensure correct problem_type

    # Initialize the model with the updated configuration
    model = AutoModelForSequenceClassification.from_pretrained(
        global_config['model']['name'],
        config=config_model,
        ignore_mismatched_sizes=global_config['model']['ignore_mismatched_sizes']
    )

    logging.info("Model initialized successfully.")
    return model

def get_optimizer(model, config_optimizer):
    optimizer = AdamW(
        model.parameters(),
        lr=config_optimizer['lr'],
        weight_decay=config_optimizer['weight_decay']
    )
    logging.info("Optimizer initialized successfully.")
    return optimizer

def get_scheduler(optimizer, n_train_steps, config_scheduler):
    if config_scheduler['type'] == 'linear':
        scheduler = get_linear_schedule_with_warmup(
            optimizer,
            num_warmup_steps=int(config_scheduler['warmup_ratio'] * n_train_steps),
            num_training_steps=n_train_steps
        )
        logging.info("Scheduler initialized successfully.")
    else:
        raise ValueError(f"Unsupported scheduler type: {config_scheduler['type']}")
    return scheduler


def compute_metrics(predictions, labels, label_names):
    # Apply sigmoid to logits to get probabilities
    probs = 1 / (1 + np.exp(-predictions))
    preds = probs >= 0.5
    preds = preds.astype(int)
    labels = labels.astype(int)

    # Compute overall AUC
    try:
        auc = roc_auc_score(labels, probs, average="micro")
    except ValueError:
        auc = 0.0

    # Compute per-class AUCs
    try:
        auc_per_class = roc_auc_score(labels, probs, average=None)
    except ValueError:
        # Handle cases where AUC cannot be computed for some classes
        auc_per_class = np.zeros(labels.shape[1])

    # Aggregate metrics
    metrics = {
        "auc_score": auc,
    }

    for i, class_name in enumerate(label_names):
        metrics[f"AUC per Class/{class_name}"] = auc_per_class[i]

    return metrics


def train_fn(train_data_loader, model, optimizer, device, scheduler):
    model.train()
    total_loss = 0

    loop = tqdm(train_data_loader, leave=False, desc="Training Batches")

    for batch_idx, batch in enumerate(loop):
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        labels = batch['labels'].to(device).float()

        optimizer.zero_grad()

        outputs = model(
            input_ids=input_ids,
            attention_mask=attention_mask,
            labels=labels
        )
        loss = outputs.loss

        if loss.dim() > 0:
            loss = loss.mean()

        loss.backward()

        torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)

        optimizer.step()
        scheduler.step()

        total_loss += loss.item()

        loop.set_postfix(loss=loss.item())

    avg_train_loss = total_loss / len(train_data_loader)
    logging.info(f"Average Training Loss: {avg_train_loss:.4f}")
    return avg_train_loss

def eval_fn(validation_data_loader, model, device):
    model.eval()
    total_loss = 0
    all_predictions = []
    all_labels = []

    loop = tqdm(validation_data_loader, leave=False, desc="Evaluating Batches")

    with torch.no_grad():
        for batch_idx, batch in enumerate(loop):
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            labels = batch['labels'].to(device).float()

            outputs = model(
                input_ids=input_ids,
                attention_mask=attention_mask,
                labels=labels
            )
            loss = outputs.loss
            logits = outputs.logits

            if loss.dim() > 0:
                loss = loss.mean()

            total_loss += loss.item()

            all_predictions.append(logits.cpu().numpy())
            all_labels.append(labels.cpu().numpy())

            loop.set_postfix(loss=loss.item())

    avg_val_loss = total_loss / len(validation_data_loader)
    all_predictions = np.concatenate(all_predictions, axis=0)
    all_labels = np.concatenate(all_labels, axis=0)

    logging.info(f"Average Validation Loss: {avg_val_loss:.4f}")
    return avg_val_loss, all_predictions, all_labels

def get_system_metrics():
    metrics = {}

    virtual_mem = psutil.virtual_memory()
    metrics["system/memory_utilization_percent"] = virtual_mem.percent

    net_io = psutil.net_io_counters()
    metrics["system/network_sent_bytes"] = net_io.bytes_sent
    metrics["system/network_recv_bytes"] = net_io.bytes_recv

    return metrics

def trainer():
    wandb_login_with_secrets()

    wandb.init(
        config=global_config,
        project=global_config['wandb_project'],
        reinit=True
    )

    run_name = generate_run_name(wandb.config)
    wandb.run.name = run_name
    logging.info(f"Run Name: {run_name}")

    run_config = wandb.config

    encoded_dataset, tokenizer = build_dataset(tokenize_max_len=run_config["tokenizer.max_length"])

    num_labels = len(encoded_dataset["train"].features["labels"].feature.names)
    label_names = encoded_dataset["train"].features["labels"].feature.names
    logging.info(f"Number of labels for this run: {num_labels}")
    logging.debug(f"Label names: {label_names}")

    train_data_loader, validation_data_loader = build_dataloader(
        train_dataset=encoded_dataset["train"],
        validate_dataset=encoded_dataset["validation"],
        batch_size=run_config["dataloader.batch_size"]
    )

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    logging.info(f"Using device: {device}")

    if torch.cuda.is_available():
        n_gpus = torch.cuda.device_count()
        logging.info(f"Using {n_gpus} GPU(s).")
    else:
        logging.info("Using CPU.")

    n_train_steps = int(
        len(train_data_loader.dataset) / run_config["dataloader.batch_size"] * run_config["training_args.num_train_epochs"]
    )
    logging.info(f"Number of training steps: {n_train_steps}")

    model = get_model(n_train_steps=n_train_steps, dropout=run_config["model.dropout"], num_labels=num_labels)
    model.to(device)

    if torch.cuda.device_count() > 1:
        model = nn.DataParallel(model)
        logging.info(f"Wrapped model with DataParallel for {torch.cuda.device_count()} GPUs.")

    wandb.watch(model, log="all")
    logging.info("W&B is watching the model.")

    optimizer = get_optimizer(model, run_config["optimizer"])
    scheduler = get_scheduler(optimizer, n_train_steps, run_config["scheduler"])

    n_epochs = run_config["training_args.num_train_epochs"]

    best_val_loss = float('inf')

    base_save_dir = "/content/drive/MyDrive/ECE-467-NLP/Final Project/Models"

    save_dir = os.path.join(base_save_dir, run_name)
    os.makedirs(save_dir, exist_ok=True)
    logging.info(f"Models will be saved to: {save_dir}")

    for epoch in range(n_epochs):
        try:
            logging.info(f"Epoch {epoch + 1}/{n_epochs}")
            train_loss = train_fn(train_data_loader, model, optimizer, device, scheduler)
            val_loss, predictions, labels = eval_fn(validation_data_loader, model, device)
            metrics = compute_metrics(predictions, labels, label_names)

            system_metrics = get_system_metrics()

            log_metrics = {
                "epoch": epoch + 1,
                "train_loss": train_loss,
                "val_loss": val_loss,
                "auc_score": metrics["auc_score"]
            }

            log_metrics.update(system_metrics)

            wandb.log(log_metrics, step=epoch + 1)
            logging.info(f"Logged metrics for epoch {epoch + 1}.")

            for class_name in label_names:
                auc_key = f"AUC per Class/{class_name}"
                if auc_key in metrics:
                    wandb.log({auc_key: metrics[auc_key]}, step=epoch + 1)
                    logging.debug(f"Logged {auc_key}: {metrics[auc_key]:.4f}")

            if val_loss < best_val_loss:
                best_val_loss = val_loss
                if isinstance(model, nn.DataParallel):
                    model_to_save = model.module
                else:
                    model_to_save = model

                best_model_path = os.path.join(save_dir, "best_model")
                model_to_save.save_pretrained(best_model_path)
                tokenizer.save_pretrained(best_model_path)
                logging.info(f"Best model saved at epoch {epoch + 1} with val_loss: {val_loss:.4f}")

        except Exception as e:
            wandb.log({"error": str(e)})
            logging.error(f"An error occurred: {e}")
            break

    wandb.finish()
    logging.info("W&B run finished.")


if __name__ == "__main__":
    wandb_login_with_secrets()

    sweep_id = wandb.sweep(sweep_config, project=global_config['wandb_project'])
    logging.info(f"Sweep initialized with ID: {sweep_id}")

    wandb.agent(sweep_id, function=trainer, count=10)
    logging.info("Sweep agent started with 10 run.")


[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


Create sweep with ID: lnmex713
Sweep URL: https://wandb.ai/Cooper-Union/NLP_GoEmotions_Classifcation/sweeps/lnmex713


[34m[1mwandb[0m: Agent Starting Run: no3aipk4 with config:
[34m[1mwandb[0m: 	dataloader.batch_size: 64
[34m[1mwandb[0m: 	model.dropout: 0.4
[34m[1mwandb[0m: 	tokenizer.max_length: 40
[34m[1mwandb[0m: 	training_args.learning_rate: 3e-05
[34m[1mwandb[0m: 	training_args.num_train_epochs: 10
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/571 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

Map:   0%|          | 0/43410 [00:00<?, ? examples/s]

Map:   0%|          | 0/5426 [00:00<?, ? examples/s]

Map:   0%|          | 0/5427 [00:00<?, ? examples/s]

Map:   0%|          | 0/43410 [00:00<?, ? examples/s]

Map:   0%|          | 0/5426 [00:00<?, ? examples/s]

Map:   0%|          | 0/5427 [00:00<?, ? examples/s]

model.safetensors:   0%|          | 0.00/1.34G [00:00<?, ?B/s]

Some weights of the model checkpoint at bert-large-uncased were not used when initializing BertForSequenceClassification: ['cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-large-uncased and are newly

Training Batches:   0%|          | 0/679 [00:00<?, ?it/s]

Evaluating Batches:   0%|          | 0/85 [00:00<?, ?it/s]

Training Batches:   0%|          | 0/679 [00:00<?, ?it/s]

Evaluating Batches:   0%|          | 0/85 [00:00<?, ?it/s]

Training Batches:   0%|          | 0/679 [00:00<?, ?it/s]

Evaluating Batches:   0%|          | 0/85 [00:00<?, ?it/s]

Training Batches:   0%|          | 0/679 [00:00<?, ?it/s]

Evaluating Batches:   0%|          | 0/85 [00:00<?, ?it/s]

Training Batches:   0%|          | 0/679 [00:00<?, ?it/s]

Evaluating Batches:   0%|          | 0/85 [00:00<?, ?it/s]

Training Batches:   0%|          | 0/679 [00:00<?, ?it/s]

Evaluating Batches:   0%|          | 0/85 [00:00<?, ?it/s]

Training Batches:   0%|          | 0/679 [00:00<?, ?it/s]

Evaluating Batches:   0%|          | 0/85 [00:00<?, ?it/s]

Training Batches:   0%|          | 0/679 [00:00<?, ?it/s]

Evaluating Batches:   0%|          | 0/85 [00:00<?, ?it/s]

Training Batches:   0%|          | 0/679 [00:00<?, ?it/s]

Evaluating Batches:   0%|          | 0/85 [00:00<?, ?it/s]

Training Batches:   0%|          | 0/679 [00:00<?, ?it/s]

Evaluating Batches:   0%|          | 0/85 [00:00<?, ?it/s]

VBox(children=(Label(value='1.156 MB of 1.272 MB uploaded\r'), FloatProgress(value=0.9084455034365175, max=1.0…

0,1
AUC per Class/admiration,▁▇████████
AUC per Class/amusement,▁▅████████
AUC per Class/anger,▁▇████████
AUC per Class/annoyance,▁▇▇███████
AUC per Class/approval,▁▅▇███████
AUC per Class/caring,▁▄▇▇██████
AUC per Class/confusion,▁▇████████
AUC per Class/curiosity,▁▇████████
AUC per Class/desire,▁▆▇███████
AUC per Class/disappointment,▁▆▇▇██████

0,1
AUC per Class/admiration,0.96429
AUC per Class/amusement,0.9772
AUC per Class/anger,0.93134
AUC per Class/annoyance,0.87833
AUC per Class/approval,0.84338
AUC per Class/caring,0.94508
AUC per Class/confusion,0.93091
AUC per Class/curiosity,0.96016
AUC per Class/desire,0.96166
AUC per Class/disappointment,0.87083


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 3hg54p24 with config:
[34m[1mwandb[0m: 	dataloader.batch_size: 32
[34m[1mwandb[0m: 	model.dropout: 0.5
[34m[1mwandb[0m: 	tokenizer.max_length: 40
[34m[1mwandb[0m: 	training_args.learning_rate: 3e-05
[34m[1mwandb[0m: 	training_args.num_train_epochs: 10
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


Map:   0%|          | 0/5426 [00:00<?, ? examples/s]

Map:   0%|          | 0/5426 [00:00<?, ? examples/s]

Some weights of the model checkpoint at bert-large-uncased were not used when initializing BertForSequenceClassification: ['cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-large-uncased and are newly

Training Batches:   0%|          | 0/1357 [00:00<?, ?it/s]

Evaluating Batches:   0%|          | 0/170 [00:00<?, ?it/s]

Training Batches:   0%|          | 0/1357 [00:00<?, ?it/s]

Evaluating Batches:   0%|          | 0/170 [00:00<?, ?it/s]

Training Batches:   0%|          | 0/1357 [00:00<?, ?it/s]

Evaluating Batches:   0%|          | 0/170 [00:00<?, ?it/s]

Training Batches:   0%|          | 0/1357 [00:00<?, ?it/s]

Evaluating Batches:   0%|          | 0/170 [00:00<?, ?it/s]

Training Batches:   0%|          | 0/1357 [00:00<?, ?it/s]

Evaluating Batches:   0%|          | 0/170 [00:00<?, ?it/s]

Training Batches:   0%|          | 0/1357 [00:00<?, ?it/s]

Evaluating Batches:   0%|          | 0/170 [00:00<?, ?it/s]

Training Batches:   0%|          | 0/1357 [00:00<?, ?it/s]

Evaluating Batches:   0%|          | 0/170 [00:00<?, ?it/s]

Training Batches:   0%|          | 0/1357 [00:00<?, ?it/s]

Evaluating Batches:   0%|          | 0/170 [00:00<?, ?it/s]

Training Batches:   0%|          | 0/1357 [00:00<?, ?it/s]

Evaluating Batches:   0%|          | 0/170 [00:00<?, ?it/s]

Training Batches:   0%|          | 0/1357 [00:00<?, ?it/s]

Evaluating Batches:   0%|          | 0/170 [00:00<?, ?it/s]

VBox(children=(Label(value='1.156 MB of 1.278 MB uploaded\r'), FloatProgress(value=0.9041766786561796, max=1.0…

0,1
AUC per Class/admiration,█▃▁▄▅▆▄▂▂▄
AUC per Class/amusement,▅█▁▆▃▂▄▅▄▂
AUC per Class/anger,▆█▄▆▇▅▅▃▂▁
AUC per Class/annoyance,▁▅█▅▅▆▄▆▃▄
AUC per Class/approval,▅▁▄▁▂▃▁▇█▇
AUC per Class/caring,▆▄█▂▃▄▁▃▂▅
AUC per Class/confusion,▇▁█▁▃▄▅▆██
AUC per Class/curiosity,▃▁█▄▆▄▄▁▄▂
AUC per Class/desire,▁▆▇▅▆▄▅▇█▅
AUC per Class/disappointment,▄▄▇▁▂▄▁█▆▆

0,1
AUC per Class/admiration,0.50679
AUC per Class/amusement,0.46568
AUC per Class/anger,0.42206
AUC per Class/annoyance,0.48278
AUC per Class/approval,0.53759
AUC per Class/caring,0.55345
AUC per Class/confusion,0.55525
AUC per Class/curiosity,0.47807
AUC per Class/desire,0.49709
AUC per Class/disappointment,0.53419


[34m[1mwandb[0m: Agent Starting Run: b6xs8m60 with config:
[34m[1mwandb[0m: 	dataloader.batch_size: 32
[34m[1mwandb[0m: 	model.dropout: 0.3
[34m[1mwandb[0m: 	tokenizer.max_length: 40
[34m[1mwandb[0m: 	training_args.learning_rate: 3e-05
[34m[1mwandb[0m: 	training_args.num_train_epochs: 10
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


Map:   0%|          | 0/5427 [00:00<?, ? examples/s]

Map:   0%|          | 0/5427 [00:00<?, ? examples/s]

Some weights of the model checkpoint at bert-large-uncased were not used when initializing BertForSequenceClassification: ['cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-large-uncased and are newly

Training Batches:   0%|          | 0/1357 [00:00<?, ?it/s]

Evaluating Batches:   0%|          | 0/170 [00:00<?, ?it/s]

Training Batches:   0%|          | 0/1357 [00:00<?, ?it/s]

Evaluating Batches:   0%|          | 0/170 [00:00<?, ?it/s]

Training Batches:   0%|          | 0/1357 [00:00<?, ?it/s]

Evaluating Batches:   0%|          | 0/170 [00:00<?, ?it/s]

Training Batches:   0%|          | 0/1357 [00:00<?, ?it/s]

Evaluating Batches:   0%|          | 0/170 [00:00<?, ?it/s]

Training Batches:   0%|          | 0/1357 [00:00<?, ?it/s]

Evaluating Batches:   0%|          | 0/170 [00:00<?, ?it/s]

Training Batches:   0%|          | 0/1357 [00:00<?, ?it/s]

Evaluating Batches:   0%|          | 0/170 [00:00<?, ?it/s]

Training Batches:   0%|          | 0/1357 [00:00<?, ?it/s]

Evaluating Batches:   0%|          | 0/170 [00:00<?, ?it/s]

Training Batches:   0%|          | 0/1357 [00:00<?, ?it/s]

Evaluating Batches:   0%|          | 0/170 [00:00<?, ?it/s]

Training Batches:   0%|          | 0/1357 [00:00<?, ?it/s]

Evaluating Batches:   0%|          | 0/170 [00:00<?, ?it/s]

Training Batches:   0%|          | 0/1357 [00:00<?, ?it/s]

Evaluating Batches:   0%|          | 0/170 [00:00<?, ?it/s]

VBox(children=(Label(value='1.156 MB of 1.273 MB uploaded\r'), FloatProgress(value=0.907981220657277, max=1.0)…

0,1
AUC per Class/admiration,▁▆████▇█▇▇
AUC per Class/amusement,▁█████████
AUC per Class/anger,▁▆▇███▇███
AUC per Class/annoyance,▁▅▇▇██▇█▇▇
AUC per Class/approval,▁▆████████
AUC per Class/caring,▁▇████████
AUC per Class/confusion,▁▆▆▇███▇▇▇
AUC per Class/curiosity,▁▅▆▇█▇▇▇▇▇
AUC per Class/desire,▁▇████████
AUC per Class/disappointment,▁▆▇▇████▇▇

0,1
AUC per Class/admiration,0.96301
AUC per Class/amusement,0.9766
AUC per Class/anger,0.9337
AUC per Class/annoyance,0.88099
AUC per Class/approval,0.84308
AUC per Class/caring,0.95495
AUC per Class/confusion,0.93021
AUC per Class/curiosity,0.95789
AUC per Class/desire,0.97416
AUC per Class/disappointment,0.87691


[34m[1mwandb[0m: Agent Starting Run: v915q3gf with config:
[34m[1mwandb[0m: 	dataloader.batch_size: 32
[34m[1mwandb[0m: 	model.dropout: 0.4
[34m[1mwandb[0m: 	tokenizer.max_length: 40
[34m[1mwandb[0m: 	training_args.learning_rate: 5e-05
[34m[1mwandb[0m: 	training_args.num_train_epochs: 10
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


Some weights of the model checkpoint at bert-large-uncased were not used when initializing BertForSequenceClassification: ['cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-large-uncased and are newly

Training Batches:   0%|          | 0/1357 [00:00<?, ?it/s]

Evaluating Batches:   0%|          | 0/170 [00:00<?, ?it/s]

Training Batches:   0%|          | 0/1357 [00:00<?, ?it/s]

Evaluating Batches:   0%|          | 0/170 [00:00<?, ?it/s]

Training Batches:   0%|          | 0/1357 [00:00<?, ?it/s]

Evaluating Batches:   0%|          | 0/170 [00:00<?, ?it/s]

Training Batches:   0%|          | 0/1357 [00:00<?, ?it/s]

Evaluating Batches:   0%|          | 0/170 [00:00<?, ?it/s]

Training Batches:   0%|          | 0/1357 [00:00<?, ?it/s]

Evaluating Batches:   0%|          | 0/170 [00:00<?, ?it/s]

Training Batches:   0%|          | 0/1357 [00:00<?, ?it/s]

Evaluating Batches:   0%|          | 0/170 [00:00<?, ?it/s]

Training Batches:   0%|          | 0/1357 [00:00<?, ?it/s]

Evaluating Batches:   0%|          | 0/170 [00:00<?, ?it/s]

Training Batches:   0%|          | 0/1357 [00:00<?, ?it/s]

Evaluating Batches:   0%|          | 0/170 [00:00<?, ?it/s]

Training Batches:   0%|          | 0/1357 [00:00<?, ?it/s]

Evaluating Batches:   0%|          | 0/170 [00:00<?, ?it/s]

Training Batches:   0%|          | 0/1357 [00:00<?, ?it/s]

Evaluating Batches:   0%|          | 0/170 [00:00<?, ?it/s]

VBox(children=(Label(value='1.156 MB of 1.270 MB uploaded\r'), FloatProgress(value=0.9095652722237695, max=1.0…

0,1
AUC per Class/admiration,▁▇████████
AUC per Class/amusement,▁▂████████
AUC per Class/anger,▁▇████████
AUC per Class/annoyance,▁▇████████
AUC per Class/approval,▂▁▇▇▇█████
AUC per Class/caring,▂▁▇▇██████
AUC per Class/confusion,▁▄▇███████
AUC per Class/curiosity,▁▂▇███████
AUC per Class/desire,▁▃▇▇██████
AUC per Class/disappointment,▁▅▇▇▇█████

0,1
AUC per Class/admiration,0.96352
AUC per Class/amusement,0.97443
AUC per Class/anger,0.9295
AUC per Class/annoyance,0.87741
AUC per Class/approval,0.84551
AUC per Class/caring,0.9512
AUC per Class/confusion,0.93154
AUC per Class/curiosity,0.96162
AUC per Class/desire,0.9593
AUC per Class/disappointment,0.8833


[34m[1mwandb[0m: Agent Starting Run: 2c5h0ivj with config:
[34m[1mwandb[0m: 	dataloader.batch_size: 32
[34m[1mwandb[0m: 	model.dropout: 0.5
[34m[1mwandb[0m: 	tokenizer.max_length: 40
[34m[1mwandb[0m: 	training_args.learning_rate: 5e-05
[34m[1mwandb[0m: 	training_args.num_train_epochs: 10
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


Some weights of the model checkpoint at bert-large-uncased were not used when initializing BertForSequenceClassification: ['cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-large-uncased and are newly

Training Batches:   0%|          | 0/1357 [00:00<?, ?it/s]

Evaluating Batches:   0%|          | 0/170 [00:00<?, ?it/s]

Training Batches:   0%|          | 0/1357 [00:00<?, ?it/s]

Evaluating Batches:   0%|          | 0/170 [00:00<?, ?it/s]

Training Batches:   0%|          | 0/1357 [00:00<?, ?it/s]

Evaluating Batches:   0%|          | 0/170 [00:00<?, ?it/s]

Training Batches:   0%|          | 0/1357 [00:00<?, ?it/s]

Evaluating Batches:   0%|          | 0/170 [00:00<?, ?it/s]

Training Batches:   0%|          | 0/1357 [00:00<?, ?it/s]

Evaluating Batches:   0%|          | 0/170 [00:00<?, ?it/s]

Training Batches:   0%|          | 0/1357 [00:00<?, ?it/s]

Evaluating Batches:   0%|          | 0/170 [00:00<?, ?it/s]

Training Batches:   0%|          | 0/1357 [00:00<?, ?it/s]

Evaluating Batches:   0%|          | 0/170 [00:00<?, ?it/s]

Training Batches:   0%|          | 0/1357 [00:00<?, ?it/s]

Evaluating Batches:   0%|          | 0/170 [00:00<?, ?it/s]

Training Batches:   0%|          | 0/1357 [00:00<?, ?it/s]

Evaluating Batches:   0%|          | 0/170 [00:00<?, ?it/s]

Training Batches:   0%|          | 0/1357 [00:00<?, ?it/s]

Evaluating Batches:   0%|          | 0/170 [00:00<?, ?it/s]

VBox(children=(Label(value='1.156 MB of 1.278 MB uploaded\r'), FloatProgress(value=0.9044338233263342, max=1.0…

0,1
AUC per Class/admiration,▄▅▄▅█▄▅▁▃▃
AUC per Class/amusement,▁▆▅▄▂▇▇█▇▆
AUC per Class/anger,▁▄▆█▆▄▁▅▃▃
AUC per Class/annoyance,▁▄▇█▆▇▇█▄▆
AUC per Class/approval,▅▇▂▁▁▅▅█▇█
AUC per Class/caring,▆▅▄▁▂▃▄▇██
AUC per Class/confusion,▄▄▂▁▁▆█▆▅▇
AUC per Class/curiosity,▆▇▆▇▇▅█▁▄▄
AUC per Class/desire,▆█▃▄▃▄▂▃▁▂
AUC per Class/disappointment,▄▄▁▂▂▅█▇▁▄

0,1
AUC per Class/admiration,0.44139
AUC per Class/amusement,0.56012
AUC per Class/anger,0.45291
AUC per Class/annoyance,0.47031
AUC per Class/approval,0.53124
AUC per Class/caring,0.58226
AUC per Class/confusion,0.55019
AUC per Class/curiosity,0.48147
AUC per Class/desire,0.45203
AUC per Class/disappointment,0.49468


[34m[1mwandb[0m: Agent Starting Run: sauixz4f with config:
[34m[1mwandb[0m: 	dataloader.batch_size: 32
[34m[1mwandb[0m: 	model.dropout: 0.5
[34m[1mwandb[0m: 	tokenizer.max_length: 40
[34m[1mwandb[0m: 	training_args.learning_rate: 5e-05
[34m[1mwandb[0m: 	training_args.num_train_epochs: 10
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


Some weights of the model checkpoint at bert-large-uncased were not used when initializing BertForSequenceClassification: ['cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-large-uncased and are newly

Training Batches:   0%|          | 0/1357 [00:00<?, ?it/s]

Evaluating Batches:   0%|          | 0/170 [00:00<?, ?it/s]

Training Batches:   0%|          | 0/1357 [00:00<?, ?it/s]

Evaluating Batches:   0%|          | 0/170 [00:00<?, ?it/s]

Training Batches:   0%|          | 0/1357 [00:00<?, ?it/s]

Evaluating Batches:   0%|          | 0/170 [00:00<?, ?it/s]

Training Batches:   0%|          | 0/1357 [00:00<?, ?it/s]

Evaluating Batches:   0%|          | 0/170 [00:00<?, ?it/s]

Training Batches:   0%|          | 0/1357 [00:00<?, ?it/s]

Evaluating Batches:   0%|          | 0/170 [00:00<?, ?it/s]

Training Batches:   0%|          | 0/1357 [00:00<?, ?it/s]

Evaluating Batches:   0%|          | 0/170 [00:00<?, ?it/s]

Training Batches:   0%|          | 0/1357 [00:00<?, ?it/s]

Evaluating Batches:   0%|          | 0/170 [00:00<?, ?it/s]

Training Batches:   0%|          | 0/1357 [00:00<?, ?it/s]

Evaluating Batches:   0%|          | 0/170 [00:00<?, ?it/s]

Training Batches:   0%|          | 0/1357 [00:00<?, ?it/s]

Evaluating Batches:   0%|          | 0/170 [00:00<?, ?it/s]

Training Batches:   0%|          | 0/1357 [00:00<?, ?it/s]

Evaluating Batches:   0%|          | 0/170 [00:00<?, ?it/s]

VBox(children=(Label(value='1.156 MB of 1.278 MB uploaded\r'), FloatProgress(value=0.9044959379271372, max=1.0…

0,1
AUC per Class/admiration,▄█▁▁▅▅▅▂▇▄
AUC per Class/amusement,▃▇█▃▅▂▁▂▄▅
AUC per Class/anger,▄██▁▂▇▂▅▃▂
AUC per Class/annoyance,▁▂▃▅▆▂▅▂█▅
AUC per Class/approval,█▂▁▅▅▄▆▅▆▂
AUC per Class/caring,▅▁▁▄▄▃▃▄█▅
AUC per Class/confusion,▂▂▁██▃▄▂▇▁
AUC per Class/curiosity,▁▄▄▃▅▂█▆▇▅
AUC per Class/desire,█▇█▇█▅▅▂▂▁
AUC per Class/disappointment,▃▂▃█▇▁▃▂█▅

0,1
AUC per Class/admiration,0.50285
AUC per Class/amusement,0.52183
AUC per Class/anger,0.45287
AUC per Class/annoyance,0.51245
AUC per Class/approval,0.47269
AUC per Class/caring,0.55352
AUC per Class/confusion,0.44035
AUC per Class/curiosity,0.51382
AUC per Class/desire,0.4044
AUC per Class/disappointment,0.497


[34m[1mwandb[0m: Agent Starting Run: jsdzbzxi with config:
[34m[1mwandb[0m: 	dataloader.batch_size: 64
[34m[1mwandb[0m: 	model.dropout: 0.3
[34m[1mwandb[0m: 	tokenizer.max_length: 40
[34m[1mwandb[0m: 	training_args.learning_rate: 3e-05
[34m[1mwandb[0m: 	training_args.num_train_epochs: 10
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


Some weights of the model checkpoint at bert-large-uncased were not used when initializing BertForSequenceClassification: ['cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-large-uncased and are newly

Training Batches:   0%|          | 0/679 [00:00<?, ?it/s]

Evaluating Batches:   0%|          | 0/85 [00:00<?, ?it/s]

Training Batches:   0%|          | 0/679 [00:00<?, ?it/s]

Evaluating Batches:   0%|          | 0/85 [00:00<?, ?it/s]

Training Batches:   0%|          | 0/679 [00:00<?, ?it/s]

Evaluating Batches:   0%|          | 0/85 [00:00<?, ?it/s]

Training Batches:   0%|          | 0/679 [00:00<?, ?it/s]

Evaluating Batches:   0%|          | 0/85 [00:00<?, ?it/s]

Training Batches:   0%|          | 0/679 [00:00<?, ?it/s]

Evaluating Batches:   0%|          | 0/85 [00:00<?, ?it/s]

Training Batches:   0%|          | 0/679 [00:00<?, ?it/s]

Evaluating Batches:   0%|          | 0/85 [00:00<?, ?it/s]

Training Batches:   0%|          | 0/679 [00:00<?, ?it/s]

Evaluating Batches:   0%|          | 0/85 [00:00<?, ?it/s]

Training Batches:   0%|          | 0/679 [00:00<?, ?it/s]

Evaluating Batches:   0%|          | 0/85 [00:00<?, ?it/s]

Training Batches:   0%|          | 0/679 [00:00<?, ?it/s]

Evaluating Batches:   0%|          | 0/85 [00:00<?, ?it/s]

Training Batches:   0%|          | 0/679 [00:00<?, ?it/s]

Evaluating Batches:   0%|          | 0/85 [00:00<?, ?it/s]

VBox(children=(Label(value='1.156 MB of 1.274 MB uploaded\r'), FloatProgress(value=0.906688906399311, max=1.0)…

0,1
AUC per Class/admiration,▁█████████
AUC per Class/amusement,▁█████████
AUC per Class/anger,▁▇████████
AUC per Class/annoyance,▁▇████████
AUC per Class/approval,▁▇▇███████
AUC per Class/caring,▁▇████████
AUC per Class/confusion,▁█████████
AUC per Class/curiosity,▁█████████
AUC per Class/desire,▁▇████████
AUC per Class/disappointment,▁▇████████

0,1
AUC per Class/admiration,0.96211
AUC per Class/amusement,0.97668
AUC per Class/anger,0.93148
AUC per Class/annoyance,0.88564
AUC per Class/approval,0.84623
AUC per Class/caring,0.9529
AUC per Class/confusion,0.92797
AUC per Class/curiosity,0.96138
AUC per Class/desire,0.96797
AUC per Class/disappointment,0.87582


[34m[1mwandb[0m: Agent Starting Run: 69ogzjk0 with config:
[34m[1mwandb[0m: 	dataloader.batch_size: 32
[34m[1mwandb[0m: 	model.dropout: 0.4
[34m[1mwandb[0m: 	tokenizer.max_length: 40
[34m[1mwandb[0m: 	training_args.learning_rate: 5e-05
[34m[1mwandb[0m: 	training_args.num_train_epochs: 10
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


Some weights of the model checkpoint at bert-large-uncased were not used when initializing BertForSequenceClassification: ['cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-large-uncased and are newly

Training Batches:   0%|          | 0/1357 [00:00<?, ?it/s]

Evaluating Batches:   0%|          | 0/170 [00:00<?, ?it/s]

Training Batches:   0%|          | 0/1357 [00:00<?, ?it/s]

Evaluating Batches:   0%|          | 0/170 [00:00<?, ?it/s]

Training Batches:   0%|          | 0/1357 [00:00<?, ?it/s]

Evaluating Batches:   0%|          | 0/170 [00:00<?, ?it/s]

Training Batches:   0%|          | 0/1357 [00:00<?, ?it/s]

Evaluating Batches:   0%|          | 0/170 [00:00<?, ?it/s]

Training Batches:   0%|          | 0/1357 [00:00<?, ?it/s]

Evaluating Batches:   0%|          | 0/170 [00:00<?, ?it/s]

Training Batches:   0%|          | 0/1357 [00:00<?, ?it/s]

Evaluating Batches:   0%|          | 0/170 [00:00<?, ?it/s]

Training Batches:   0%|          | 0/1357 [00:00<?, ?it/s]

Evaluating Batches:   0%|          | 0/170 [00:00<?, ?it/s]

Training Batches:   0%|          | 0/1357 [00:00<?, ?it/s]

In [None]:
!ls /content/drive/MyDrive/ECE-467-NLP/'Final Project'/Models

lr-3e-05_batch-32_epochs-10_dropout-0.5  lr-5e-05_batch-64_epochs-10_dropout-0.5


In [None]:
import torch

cuda_available = torch.cuda.is_available()
print(f"CUDA Available: {cuda_available}")

if cuda_available:
    gpu_count = torch.cuda.device_count()
    print(f"Number of GPUs: {gpu_count}")

    for i in range(gpu_count):
        gpu_name = torch.cuda.get_device_name(i)
        print(f"GPU {i}: {gpu_name}")

    current_gpu = torch.cuda.current_device()
    print(f"Current GPU Device ID: {current_gpu}")
    print(f"Current GPU Name: {torch.cuda.get_device_name(current_gpu)}")
else:
    print("No GPU detected. Please enable GPU in your Colab environment.")


CUDA Available: True
Number of GPUs: 1
GPU 0: NVIDIA A100-SXM4-40GB
Current GPU Device ID: 0
Current GPU Name: NVIDIA A100-SXM4-40GB
