This code was created following the tutorial: [hugging face - fine tune mistral](https://huggingface.co/blog/sirluk/multilabel-llm)

In [1]:
"""
!pip install scikit-multilearn
!pip install datasets
!pip install peft
!pip install bitsandbytes
!pip install accelerate
!pip install wandb
!pip install ipywidgets
"""

'\n!pip install scikit-multilearn\n!pip install datasets\n!pip install peft\n!pip install bitsandbytes\n!pip install accelerate\n!pip install wandb\n!pip install ipywidgets\n'

In [2]:
# exit()

In [1]:
import os
import random
import functools
import csv
import numpy as np
import pandas as pd
import torch
import torch.nn.functional as F
from sklearn.metrics import f1_score
from skmultilearn.model_selection import iterative_train_test_split
from datasets import Dataset, DatasetDict
from peft import PeftModel, PeftConfig
from peft import (
    LoraConfig,
    prepare_model_for_kbit_training,
    get_peft_model
)
from transformers import (
    AutoModelForSequenceClassification,
    AutoTokenizer,
    BitsAndBytesConfig,
    TrainingArguments,
    Trainer
)

In [2]:
import wandb
wandb.login()

# let's log every trained
# %env WANDB_LOG_MODEL=true

wandb_project_name = "LLP2-test"

[34m[1mwandb[0m: Currently logged in as: [33mrstern[0m. Use [1m`wandb login --relogin`[0m to force relogin


In [3]:
from huggingface_hub import login

login(token="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")

Token has not been saved to git credential helper. Pass `add_to_git_credential=True` if you want to set the git credential as well.
Token is valid (permission: write).
Your token has been saved to /home/amin/.cache/huggingface/token
Login successful


In [4]:
# Method and Model Configuration
# ---------------------------------------------------------
entity = "rstern"
retrain_from_checkpoint = False
checkpoint = "checkpoint-810"
debugg = False
model_name = 'mistralai/Mistral-7B-v0.1'
# ---------------------------------------------------------
model_config = "org_aug_a" # possible values: org_model, frozen
augmented_data = True # True: aug, False: org
author_label_only = True # False: b and True: a
experiment_name = "org_aug_a" # model_config augmented_data author_label_only
# model name


## Load Dataset

In [5]:
try:
  import google.colab
  IN_COLAB = True
except:
  IN_COLAB = False
if IN_COLAB:
  from google.colab import drive
  import sys
  drive.mount('/content/drive')
  # sys.path.append('/content/drive/MyDrive/ucph/LP Project') # If working in collab change this path
  path = '/content/drive/MyDrive/ucph/LP Project/'
  test_df_0 = pd.read_csv(f'test_df_0.csv')
  test_df_1 = pd.read_csv(f'test_df_1.csv')
  test_df_2 = pd.read_csv(f'test_df_2.csv')
  if augmented_data:
    train_df = pd.read_csv(f'{path}balanced_train.csv')
    val_df = pd.read_csv(f'{path}balanced_val.csv')
  else:
    train_df = pd.read_csv(f'{path}df_train.csv')
    val_df = pd.read_csv(f'{path}df_validation.csv')
else:
    test_df_0 = pd.read_csv(f'multilabel_classification/test_df_0.csv')
    test_df_1 = pd.read_csv(f'multilabel_classification/test_df_1.csv')
    test_df_2 = pd.read_csv(f'multilabel_classification/test_df_2.csv')
    train_df_aug = pd.read_csv(f'balanced_train.csv')
    val_df_aug = pd.read_csv(f'balanced_val.csv')
    train_df = pd.read_csv(f'df_train.csv')
    val_df = pd.read_csv(f'df_validation.csv')

# shuffle dataset
train_df = train_df.sample(frac=1, random_state=42)
val_df = val_df.sample(frac=1, random_state=42)
print(train_df.sample(5))

       Unnamed: 0                                         paragraph1  \
11766       11766  In general, be courteous to others. Debate/dis...   
44975       44975  Canada's at 7% EV sales in 2022, and there's h...   
38921       38921  1000%. I absolutely despise trump, but people ...   
26964       26964  In general, be courteous to others. Debate/dis...   
15799       15799  Exactly this is what people fail to understand...   

                                              paragraph2  label_author  \
11766  For those who have questions regarding any med...             0   
44975  It's not that hard to believe that Canada coul...             0   
38921  I now weigh 130-ish, thanks to the blind luck ...             0   
26964  For those who have questions regarding any med...             0   
15799  Apparently, in your world, China can either te...             1   

       label_dataset  n_authors  fileindex  
11766              1          4        131  
44975              2          3 

In [6]:
print(train_df.columns)

def create_sequences(row):
  sequence = str(row["paragraph1"]) + "[LP2]" + str(row["paragraph2"])
  return sequence

train_df["input"] = train_df.apply(create_sequences, axis=1)
val_df["input"] = val_df.apply(create_sequences, axis=1)  
train_df_aug["input"] = train_df_aug.apply(create_sequences, axis=1)
val_df_aug["input"] = val_df_aug.apply(create_sequences, axis=1)  
test_df_0["input"] = test_df_0.apply(create_sequences, axis=1)
test_df_1["input"] = test_df_1.apply(create_sequences, axis=1)
test_df_2["input"] = test_df_2.apply(create_sequences, axis=1)

x_train = train_df["input"].values
x_val = val_df["input"].values
x_train_aug = train_df_aug["input"].values
x_val_aug = val_df_aug["input"].values
x_test_0 = test_df_0["input"].values
x_test_1 = test_df_1["input"].values
x_test_2 = test_df_2["input"].values

def create_multilabel(row):
  label_dataset = max(min(1, row["label_dataset"]), 0)
  multilabel = np.array([int(row["label_author"]), int(label_dataset)])
  return multilabel

def create_singlelabel(row):
  label = np.array([int(row["label_author"])])
  return label

if author_label_only:
  train_df["label"] = train_df.apply(create_singlelabel, axis=1)
  val_df["label"] = val_df.apply(create_singlelabel, axis=1)
  train_df_aug["label"] = train_df_aug.apply(create_singlelabel, axis=1)
  val_df_aug["label"] = val_df_aug.apply(create_singlelabel, axis=1)
  label_weights = [1]
  test_df_0["label"] = test_df_0.apply(create_singlelabel, axis=1)
  test_df_1["label"] = test_df_1.apply(create_singlelabel, axis=1)
  test_df_2["label"] = test_df_2.apply(create_singlelabel, axis=1)
else:
  train_df["label"] = train_df.apply(create_multilabel, axis=1)
  val_df["label"] = val_df.apply(create_multilabel, axis=1)
  # weight author label heavier than topic change label
  label_weights = [2,1]
  test_df_0["label"] = test_df_0.apply(create_multilabel, axis=1)
  test_df_1["label"] = test_df_1.apply(create_multilabel, axis=1)
  test_df_2["label"] = test_df_2.apply(create_multilabel, axis=1)

y_train = train_df["label"].values
y_train = np.stack(y_train)
y_val = val_df["label"].values
y_val = np.stack(y_val)
y_train_aug = train_df_aug["label"].values
y_train_aug = np.stack(y_train_aug)
y_val_aug = val_df_aug["label"].values
y_val_aug = np.stack(y_val_aug)
y_test_0 = test_df_0["label"].values
y_test_1 = test_df_1["label"].values
y_test_2 = test_df_2["label"].values
y_test_0 = np.stack(y_test_0)
y_test_1 = np.stack(y_test_1)
y_test_2 = np.stack(y_test_2)



Index(['Unnamed: 0', 'paragraph1', 'paragraph2', 'label_author',
       'label_dataset', 'n_authors', 'fileindex'],
      dtype='object')


In [7]:
print(x_train.shape, y_train.shape, y_train[0])
print(x_train[0])
print(x_val.shape, y_val.shape, y_val[0])
print(x_val[0])

(51993,) (51993, 1) [1]
That’s -80.86 ° F. I live in Montana and so far this winter it’s gotten down to -42°F. I can’t even imagine twice as cold as that and most people can’t even imagine -42°F.[LP2]Always amazes me that people can live in such extreme cold. Like as of this post, it’s -51 F (-41 C) real temp, but “feels like” -67 F (-55 C).
(11198,) (11198, 1) [0]
In general, be courteous to others. Debate/discuss/argue the merits of ideas, don't attack people. Personal insults, shill or troll accusations, hate speech, any suggestion or support of harm, violence, or death, and other rule violations can result in a permanent ban.[LP2]For those who have questions regarding any media outlets being posted on this subreddit, please click to review our details as to our approved domains list and outlet criteria.


In [8]:
x_train_sub = x_train[0:50]
y_train_sub = y_train[0:50]
x_val_sub = x_val[0:50]
y_val_sub = y_val[0:50]


In [9]:
if debugg:
  ds = DatasetDict({
    'train': Dataset.from_dict({'text': x_train_sub, 'labels': y_train_sub}),
    'val': Dataset.from_dict({'text': x_val_sub, 'labels': y_val_sub})
  })
else:
  ds = DatasetDict({
      'train': Dataset.from_dict({'text': x_train, 'labels': y_train}),
      'val': Dataset.from_dict({'text': x_val, 'labels': y_val}),
      'train_aug': Dataset.from_dict({'text': x_train_aug, 'labels': y_train_aug}),
      'val_aug': Dataset.from_dict({'text': x_val_aug, 'labels': y_val_aug}),
      'test_0': Dataset.from_dict({'text': x_test_0, 'labels': y_test_0}),
      'test_1': Dataset.from_dict({'text': x_test_1, 'labels': y_test_1}),
      'test_2': Dataset.from_dict({'text': x_test_2, 'labels': y_test_2}),
  })


Load the model

In [10]:
# qunatization config
quantization_config = BitsAndBytesConfig(
    load_in_4bit = True, # enable 4-bit quantization
    bnb_4bit_quant_type = 'nf4', # information theoretically optimal dtype for normally distributed weights
    bnb_4bit_use_double_quant = True, # quantize quantized weights //insert xzibit meme
    bnb_4bit_compute_dtype = torch.bfloat16 # optimized fp format for ML
)

# lora config
lora_config = LoraConfig(
    r = 16, # the dimension of the low-rank matrices
    lora_alpha = 8, # scaling factor for LoRA activations vs pre-trained weight activations
    target_modules = ['q_proj', 'k_proj', 'v_proj', 'o_proj'],
    lora_dropout = 0.05, # dropout probability of the LoRA layers
    bias = 'none', # wether to train bias weights, set to 'none' for attention layers
    task_type = 'SEQ_CLS'
)

#### FREEZING FUCNTIONS
def freeze_all(model):
  for param in model.parameters():
    param.requires_grad = False  # Freeze all parameters
  #for param in model.classifier.parameters():
   # param.requires_grad = True   # Unfreeze only the last layer (classifier)

def freeze_specific_layers(model, layer_names_to_unfreeze):
  for name, param in model.named_parameters():
    if any(layer_name in name for layer_name in layer_names_to_unfreeze):
      param.requires_grad = True  # Unfreeze specified layers
    else:
      param.requires_grad = False  # Freeze other parameters

# preprocess dataset with tokenizer
def tokenize_examples(examples, tokenizer):
    tokenized_inputs = tokenizer(examples['text'])
    tokenized_inputs['labels'] = examples['labels']
    return tokenized_inputs


In [11]:
tokenizer = AutoTokenizer.from_pretrained(model_name)

#####
# new tokens
new_tokens = ["[LP2]"]

# check if the tokens are already in the vocabulary
new_tokens = set(new_tokens) - set(tokenizer.vocab.keys())

# add the tokens to the tokenizer vocabulary
tokenizer.add_tokens(list(new_tokens))

# add new, random embeddings for the new tokens
tokenizer.pad_token = tokenizer.eos_token
tokenized_ds = ds.map(functools.partial(tokenize_examples, tokenizer=tokenizer), batched=True)
tokenized_ds = tokenized_ds.with_format('torch')

# define if you retrain from local checkpoint or want to train from scratch
if retrain_from_checkpoint:
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    # new tokens
    new_tokens = ["[LP2]"]
    # check if the tokens are already in the vocabulary
    new_tokens = set(new_tokens) - set(tokenizer.vocab.keys())
    # add the tokens to the tokenizer vocabulary
    tokenizer.add_tokens(list(new_tokens))
    # add new, random embeddings for the new tokens
    tokenizer.pad_token = tokenizer.eos_token
    
    config = PeftConfig.from_pretrained(f'multilabel_classification/{model_config}/{checkpoint}')
    org_model = AutoModelForSequenceClassification.from_pretrained(config.base_model_name_or_path)
    org_model.resize_token_embeddings(len(tokenizer))
    model = PeftModel.from_pretrained(org_model, f'multilabel_classification/{model_config}/{checkpoint}')
    # this is not working as we added an additional token
    # model = AutoModelForSequenceClassification.from_pretrained(f'multilabel_classification/{model_config}/{checkpoint}')


model = AutoModelForSequenceClassification.from_pretrained(
    model_name,
    quantization_config=quantization_config,
    num_labels=y_train.shape[1]
)
model = prepare_model_for_kbit_training(model)
model = get_peft_model(model, lora_config)
model.config.pad_token_id = tokenizer.pad_token_id
model.resize_token_embeddings(len(tokenizer))


Map:   0%|          | 0/51993 [00:00<?, ? examples/s]

Map:   0%|          | 0/11198 [00:00<?, ? examples/s]

Map:   0%|          | 0/125600 [00:00<?, ? examples/s]

Map:   0%|          | 0/28262 [00:00<?, ? examples/s]

Map:   0%|          | 0/2236 [00:00<?, ? examples/s]

Map:   0%|          | 0/2236 [00:00<?, ? examples/s]

Map:   0%|          | 0/2236 [00:00<?, ? examples/s]

`low_cpu_mem_usage` was None, now set to True since model is quantized.


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Some weights of MistralForSequenceClassification were not initialized from the model checkpoint at mistralai/Mistral-7B-v0.1 and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Embedding(32001, 4096)

In [12]:
# define custom batch preprocessor
def collate_fn(batch):
    dict_keys = ['input_ids', 'attention_mask', 'labels']
    d = {k: [dic[k] for dic in batch] for k in dict_keys}
    d['input_ids'] = torch.nn.utils.rnn.pad_sequence(
        d['input_ids'], batch_first=True, padding_value=tokenizer.pad_token_id
    )
    d['attention_mask'] = torch.nn.utils.rnn.pad_sequence(
        d['attention_mask'], batch_first=True, padding_value=0
    )
    d['labels'] = torch.stack(d['labels'])
    return d

# define which metrics to compute for evaluation
def compute_metrics(p):
    predictions, labels = p
    f1_micro = f1_score(labels, predictions > 0, average = 'micro')
    f1_macro = f1_score(labels, predictions > 0, average = 'macro')
    f1_weighted = f1_score(labels, predictions > 0, average = 'weighted')
    return {
        'f1_micro': f1_micro,
        'f1_macro': f1_macro,
        'f1_weighted': f1_weighted
    }


# create custom trainer class to be able to pass label weights and calculate multilabel loss
class CustomTrainer(Trainer):

    def __init__(self, label_weights, **kwargs):
        super().__init__(**kwargs)
        self.label_weights = label_weights

    def compute_loss(self, model, inputs, return_outputs=False):
        labels = inputs.pop("labels")

        # forward pass
        outputs = model(**inputs)
        logits = outputs.get("logits")

        # compute custom loss
        loss = F.binary_cross_entropy_with_logits(logits, labels.to(torch.float32), pos_weight=self.label_weights)
        return (loss, outputs) if return_outputs else loss


def print_trainable_parameters(model):
    """
    Prints the number of trainable parameters in the model.
    """
    trainable_params = 0
    all_param = 0
    for _, param in model.named_parameters():
        all_param += param.numel()
        if param.requires_grad:
            trainable_params += param.numel()
    print(
        f"trainable params: {trainable_params} || all params: {all_param} || trainable%: {100 * trainable_params / all_param}"
    )


# Define training function with freezing strategies
def train_and_evaluate(model, model_name, experiment_name, train, val, frozen=False):
    wandb.init(entity=entity, project="lp2", name=experiment_name)  # Update name with model name

    # define training args with potentially different learning rates for frozen models
    training_args = TrainingArguments(
        output_dir=f'{experiment_name}/{model_name}',  # Separate output directory
        learning_rate=1e-5 if frozen else 1e-4,  # Adjust learning rate for frozen models (optional)
        per_device_train_batch_size=16,
        per_device_eval_batch_size=16,
        num_train_epochs=5,
        weight_decay=0.01,
        evaluation_strategy='steps',
        save_strategy='steps',  # Save based on steps
        load_best_model_at_end=True,
        logging_steps=1000, # When to start reporting loss
        logging_dir=f"./logs/{model_name}",  # Separate log directory
        save_steps=1000, # Save checkpoints every 25 steps
        report_to=["wandb"],
        max_steps=2001  # Set maximum number of training steps to 400
    )

    # training_args = training_args.set_save(strategy="steps", steps=10)

    # train
    trainer = CustomTrainer(
        model=model,
        args=training_args,
        train_dataset=tokenized_ds[train],
        eval_dataset=tokenized_ds[val],
        tokenizer=tokenizer,
        data_collator=functools.partial(collate_fn),
        compute_metrics=compute_metrics,
        label_weights=torch.tensor(label_weights, device=model.device),
      )

    trainer.train()
    trainer.push_to_hub(f"mistral-lp2-{experiment_name}")

    # save model and tokenizer separately
    model_save_path = f"multilabel_mistral_lp2_{model_name}"
    trainer.model.save_pretrained(model_save_path)
    tokenizer.save_pretrained(model_save_path)
    print(f"Model and tokenizer saved to {model_save_path}")
    
    # save model local and on the hub if training was successfull.
    #test_results = trainer.evaluate(tokenized_ds[val])
    print(test_results)
    trainer.save_model()
    trainer.save_state()

    torch.cuda.empty_cache()
    return trainer.model

In [13]:
import torch
torch.cuda.empty_cache()
torch.cuda.is_available()


True

In [None]:
# Train the original model (without freezing)
print_trainable_parameters(model)
model = train_and_evaluate(model, model_config, experiment_name, 'train_aug', 'val_aug')  # Pass model name for wandb


trainable params: 13635584 || all params: 3634642944 || trainable%: 0.3751560802556786


max_steps is given, it will override any value given in num_train_epochs
`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...


Step,Training Loss,Validation Loss


In [None]:

from torch.utils.data import DataLoader

def evaluate(ds_name, model):
    # Create DataLoader
    test_loader = DataLoader(tokenized_ds[ds_name], batch_size=1, collate_fn=collate_fn)
    
    # Evaluation loop
    model.eval()
    all_predictions = []
    all_labels = []
    
    with torch.no_grad():
        for batch in test_loader:
            input_ids = batch['input_ids']
            attention_mask = batch['attention_mask']
            labels = batch['labels']
    
            outputs = model(input_ids=input_ids, attention_mask=attention_mask)
            logits = outputs.logits
            predictions = torch.sigmoid(logits)
            all_predictions.extend(predictions.numpy())
            all_labels.extend(labels.numpy())
    
    # Convert predictions and labels to numpy arrays
    all_predictions = np.array(all_predictions)
    all_labels = np.array(all_labels)
    
    # Compute metrics
    all_predictions_binary = (all_predictions > 0.5).astype(int)
    if author_label_only:
        f1_micro = f1_score(all_labels, all_predictions_binary, average='micro')
        f1_macro = f1_score(all_labels, all_predictions_binary, average='macro')
        f1_weighted = f1_score(all_labels, all_predictions_binary, average='weighted')
    else:
        f1_micro = f1_score(all_labels[:,0], all_predictions_binary[:,0], average='micro')
        f1_macro = f1_score(all_labels[:,0], all_predictions_binary[:,0], average='macro')
        f1_weighted = f1_score(all_labels[:,0], all_predictions_binary[:,0], average='weighted')
    
    metrics = {
        'f1_micro': f1_micro,
        'f1_macro': f1_macro,
        'f1_weighted': f1_weighted
    }
    wandb.log(metrics)

    print(ds_name, metrics)

In [None]:
torch.cuda.empty_cache()
evaluate('test_0', model)
evaluate('test_1', model)
evaluate('test_2', model)
wandb.finish()

In [None]:
model = AutoModelForSequenceClassification.from_pretrained(
    model_name,
    quantization_config=quantization_config,
    num_labels=y_train.shape[1]
)
model = prepare_model_for_kbit_training(model)
model = get_peft_model(model, lora_config)
model.config.pad_token_id = tokenizer.pad_token_id
model.resize_token_embeddings(len(tokenizer))

model_config = "org_org_a" # possible values: org_model, frozen
augmented_data = False # True: aug, False: org
author_label_only = True # False: b and True: a
experiment_name = "org_org_a" # model_config augmented_data author_label_only

In [None]:
torch.cuda.empty_cache()
print_trainable_parameters(model)
model = train_and_evaluate(model, model_config, experiment_name, 'train', 'val')  # Pass model name for wandb


In [None]:
torch.cuda.empty_cache()
evaluate('test_0', model)
evaluate('test_1', model)
evaluate('test_2', model)
wandb.finish()

In [None]:
# Method and Model Configuration
# ---------------------------------------------------------
entity = "rstern"
retrain_from_checkpoint = False
checkpoint = "checkpoint-810"
debugg = False
model_name = 'mistralai/Mistral-7B-v0.1'
# ---------------------------------------------------------
model_config = "org_aug_b" # possible values: org_model, frozen
augmented_data = True # True: aug, False: org
author_label_only = False # False: b and True: a
experiment_name = "org_aug_b" # model_config augmented_data author_label_only
# model name


In [None]:
if author_label_only:
  train_df["label"] = train_df.apply(create_singlelabel, axis=1)
  val_df["label"] = val_df.apply(create_singlelabel, axis=1)
  train_df_aug["label"] = train_df_aug.apply(create_singlelabel, axis=1)
  val_df_aug["label"] = val_df_aug.apply(create_singlelabel, axis=1)
  label_weights = [1]
  test_df_0["label"] = test_df_0.apply(create_singlelabel, axis=1)
  test_df_1["label"] = test_df_1.apply(create_singlelabel, axis=1)
  test_df_2["label"] = test_df_2.apply(create_singlelabel, axis=1)
else:
  train_df["label"] = train_df.apply(create_multilabel, axis=1)
  val_df["label"] = val_df.apply(create_multilabel, axis=1)
  # weight author label heavier than topic change label
  label_weights = [2,1]
  test_df_0["label"] = test_df_0.apply(create_multilabel, axis=1)
  test_df_1["label"] = test_df_1.apply(create_multilabel, axis=1)
  test_df_2["label"] = test_df_2.apply(create_multilabel, axis=1)

y_train = train_df["label"].values
y_train = np.stack(y_train)
y_val = val_df["label"].values
y_val = np.stack(y_val)
y_train_aug = train_df_aug["label"].values
y_train_aug = np.stack(y_train_aug)
y_val_aug = val_df_aug["label"].values
y_val_aug = np.stack(y_val_aug)
y_test_0 = test_df_0["label"].values
y_test_1 = test_df_1["label"].values
y_test_2 = test_df_2["label"].values
y_test_0 = np.stack(y_test_0)
y_test_1 = np.stack(y_test_1)
y_test_2 = np.stack(y_test_2)

In [None]:
if debugg:
  ds = DatasetDict({
    'train': Dataset.from_dict({'text': x_train_sub, 'labels': y_train_sub}),
    'val': Dataset.from_dict({'text': x_val_sub, 'labels': y_val_sub})
  })
else:
  ds = DatasetDict({
      'train': Dataset.from_dict({'text': x_train, 'labels': y_train}),
      'val': Dataset.from_dict({'text': x_val, 'labels': y_val}),
      'train_aug': Dataset.from_dict({'text': x_train_aug, 'labels': y_train_aug}),
      'val_aug': Dataset.from_dict({'text': x_val_aug, 'labels': y_val_aug}),
      'test_0': Dataset.from_dict({'text': x_test_0, 'labels': y_test_0}),
      'test_1': Dataset.from_dict({'text': x_test_1, 'labels': y_test_1}),
      'test_2': Dataset.from_dict({'text': x_test_2, 'labels': y_test_2}),
  })

In [None]:
model = AutoModelForSequenceClassification.from_pretrained(
    model_name,
    quantization_config=quantization_config,
    num_labels=y_train.shape[1]
)
model = prepare_model_for_kbit_training(model)
model = get_peft_model(model, lora_config)
model.config.pad_token_id = tokenizer.pad_token_id
model.resize_token_embeddings(len(tokenizer))


In [None]:
torch.cuda.empty_cache()
print_trainable_parameters(model)
model = train_and_evaluate(model, model_config, experiment_name, 'train_aug', 'val_aug')  # Pass model name for wandb


In [None]:
torch.cuda.empty_cache()
evaluate('test_0', model)
evaluate('test_1', model)
evaluate('test_2', model)
wandb.finish()

In [None]:
# Method and Model Configuration
# ---------------------------------------------------------
entity = "rstern"
retrain_from_checkpoint = False
checkpoint = "checkpoint-810"
debugg = False
model_name = 'mistralai/Mistral-7B-v0.1'
# ---------------------------------------------------------
model_config = "org_org_b" # possible values: org_model, frozen
augmented_data = False # True: aug, False: org
author_label_only = False # False: b and True: a
experiment_name = "org_org_b" # model_config augmented_data author_label_only
# model name


In [None]:
model = AutoModelForSequenceClassification.from_pretrained(
    model_name,
    quantization_config=quantization_config,
    num_labels=y_train.shape[1]
)
model = prepare_model_for_kbit_training(model)
model = get_peft_model(model, lora_config)
model.config.pad_token_id = tokenizer.pad_token_id
model.resize_token_embeddings(len(tokenizer))


In [None]:
torch.cuda.empty_cache()
print_trainable_parameters(model)
model = train_and_evaluate(model, model_config, experiment_name, 'train', 'val')  # Pass model name for wandb


In [None]:
torch.cuda.empty_cache()
evaluate('test_0', model)
evaluate('test_1', model)
evaluate('test_2', model)
wandb.finish()