In [1]:
from transformers import AutoModelForCausalLM, AutoTokenizer, DataCollatorWithPadding, AutoModelForSequenceClassification
from huggingface_hub import login
from peft import PeftModel
import torch
from datasets import load_dataset
from torch.utils.data import DataLoader
from sklearn.metrics import accuracy_score, matthews_corrcoef
from tqdm import tqdm

In [2]:
print(torch.cuda.is_available())  # Should return True if CUDA is available
print(torch.cuda.device_count())  # Number of GPUs detected
print(torch.cuda.get_device_name(0))

True
1
NVIDIA GeForce GTX 1650


In [3]:
torch.cuda.empty_cache()
torch.cuda.reset_peak_memory_stats()

## Configurations

In [4]:
device = "cuda" if torch.cuda.is_available() else "cpu"
if device == "cuda":
    compute_dtype = torch.bfloat16 # Or torch.float16 depending on your GPU
else:
    compute_dtype = torch.float32

In [5]:
model_id = "mistralai/Mistral-7B-v0.1"

base_model = AutoModelForSequenceClassification.from_pretrained(
    model_id,
    torch_dtype=compute_dtype,
    device_map={"":"cuda"}, # Automatically distributes across GPUs if available/needed
    # offload_folder='offload/'
)
tokenizer = AutoTokenizer.from_pretrained(model_id)

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Some weights of MistralForSequenceClassification were not initialized from the model checkpoint at mistralai/Mistral-7B-v0.1 and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [8]:
# if tokenizer.pad_token is None:
#     tokenizer.pad_token = tokenizer.eos_token  # or you can use '[PAD]'

# # Load dataset (CoLA example)
# dataset = load_dataset("glue", "cola")
dataset = load_dataset("Rowan/hellaswag")
# # val_data = cuad["validation"]
# # dataset = load_dataset("nyu-mll/cola")
test_dataset = dataset["validation"]  # or "test" if it includes labels

README.md:   0%|          | 0.00/6.84k [00:00<?, ?B/s]

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development


hellaswag.py:   0%|          | 0.00/4.36k [00:00<?, ?B/s]

0000.parquet:   0%|          | 0.00/24.4M [00:00<?, ?B/s]

0000.parquet:   0%|          | 0.00/6.11M [00:00<?, ?B/s]

0000.parquet:   0%|          | 0.00/6.32M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/39905 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/10003 [00:00<?, ? examples/s]

Generating validation split:   0%|          | 0/10042 [00:00<?, ? examples/s]

In [7]:
# Tokenize and preprocess
def preprocess(example):
    return tokenizer(example["sentence"], truncation=True)

test_dataset = test_dataset.map(preprocess, batched=True)
test_dataset.set_format(type="torch", columns=["input_ids", "attention_mask", "label"])

In [8]:
# add adapter, if not then base model
model = base_model

model.to(device)

MistralForCausalLM(
  (model): MistralModel(
    (embed_tokens): Embedding(32000, 4096)
    (layers): ModuleList(
      (0-31): 32 x MistralDecoderLayer(
        (self_attn): MistralSdpaAttention(
          (q_proj): Linear(in_features=4096, out_features=4096, bias=False)
          (k_proj): Linear(in_features=4096, out_features=1024, bias=False)
          (v_proj): Linear(in_features=4096, out_features=1024, bias=False)
          (o_proj): Linear(in_features=4096, out_features=4096, bias=False)
          (rotary_emb): MistralRotaryEmbedding()
        )
        (mlp): MistralMLP(
          (gate_proj): Linear(in_features=4096, out_features=14336, bias=False)
          (up_proj): Linear(in_features=4096, out_features=14336, bias=False)
          (down_proj): Linear(in_features=14336, out_features=4096, bias=False)
          (act_fn): SiLU()
        )
        (input_layernorm): MistralRMSNorm((4096,), eps=1e-05)
        (post_attention_layernorm): MistralRMSNorm((4096,), eps=1e-05)
     

In [None]:
data_collator = DataCollatorWithPadding(tokenizer)
def collate_with_labels(batch):
    # Separate labels from the rest
    labels = [example["label"] for example in batch]
    batch = data_collator(batch)
    batch["label"] = torch.tensor(labels)
    return batch
test_loader = DataLoader(test_dataset, batch_size=16, collate_fn=collate_with_labels)

all_preds, all_labels = [], []

# Wrap test_loader with tqdm for progress bar
for batch in tqdm(test_loader, desc="Evaluating"):
    with torch.no_grad():
        input_ids = batch["input_ids"].to(device)
        attention_mask = batch["attention_mask"].to(device)
        labels = batch["label"].to(device)

        outputs = model(input_ids=input_ids, attention_mask=attention_mask)
        preds = torch.argmax(outputs.logits, dim=-1)

        all_preds.extend(preds.cpu().numpy())
        all_labels.extend(labels.cpu().numpy())

# Evaluate the results (e.g., MCC for CoLA)


Evaluating:   0%|          | 0/66 [00:00<?, ?it/s]We detected that you are passing `past_key_values` as a tuple and this is deprecated and will be removed in v4.43. Please use an appropriate `Cache` class (https://huggingface.co/docs/transformers/v4.41.3/en/internal/generation_utils#transformers.Cache)
Evaluating: 100%|██████████| 66/66 [50:35<00:00, 45.99s/it] 


ValueError: Classification metrics can't handle a mix of binary and unknown targets

In [17]:
all_preds_flat = np.concatenate(all_preds).tolist()
# all_labels_flat = np.concatenate(all_labels).tolist()

# Now you can safely compute accuracy
from sklearn.metrics import accuracy_score

acc = accuracy_score(all_labels, all_preds_flat)
print(f"Accuracy: {acc:.4f}")

ValueError: Found input variables with inconsistent numbers of samples: [1043, 19537]

In [13]:
from sklearn.metrics import accuracy_score

acc = accuracy_score(all_labels, all_preds)
print(f"Accuracy: {acc:.4f}")

ValueError: Classification metrics can't handle a mix of binary and unknown targets

In [12]:
import numpy as np
all_preds = [int(p) if not isinstance(p, np.ndarray) else int(p.item()) for p in all_preds]
all_labels = [int(l) if not isinstance(l, np.ndarray) else int(l.item()) for l in all_labels]

# Check the unique values
print("Preds:", set(all_preds))
print("Labels:", set(all_labels))

ValueError: can only convert an array of size 1 to a Python scalar

In [None]:
mcc = matthews_corrcoef(all_labels, all_preds)
print(f"Matthews Correlation Coefficient (CoLA): {mcc:.4f}")