In [None]:
from transformers import RobertaTokenizer, RobertaForSequenceClassification, TrainingArguments, Trainer, default_data_collator
from datasets import load_dataset

# Load tokenizer and model
tokenizer = RobertaTokenizer.from_pretrained("roberta-large")
model = RobertaForSequenceClassification.from_pretrained("roberta-large", num_labels=3)  # 3 for entailment, neutral, contradiction

# Download and load MultiNLI dataset
train_data = load_dataset("glue", name="mnli", split="train")
# Sample 10% of the data for training
train_data = train_data.select(range(len(train_data) // 30))  # Select first 3% of the data

dev_data = load_dataset("glue", name="mnli", split="validation_matched")  # Use matched validation set

# Preprocess data (tokenization, padding)
def preprocess_function(examples):
  return tokenizer(examples["premise"], examples["hypothesis"], padding="max_length", truncation=True)

train_data = train_data.map(preprocess_function, batched=True)
dev_data = dev_data.map(preprocess_function, batched=True)

# Define training arguments
training_args = TrainingArguments(
    output_dir="./results",
    per_device_train_batch_size=4,  # Adjust batch size based on your GPU memory
    learning_rate=2e-5,
    num_train_epochs=3,  
    logging_dir='./logs',
    report_to="none",  
    save_strategy="no",  
)


# Define metrics function
def compute_metrics(pred):
    labels = pred.label_ids
    preds = pred.predictions.argmax(-1)
    return {"accuracy": (preds == labels).mean()}

# Define Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_data,
    eval_dataset=dev_data,
    data_collator=default_data_collator,
    compute_metrics=compute_metrics,
)

# Enable quantization
trainer.quantized_training = True

# Train the model
trainer.train()


In [None]:
results = trainer.evaluate()


In [5]:

# Print the evaluation metrics
print("Evaluation Results:")
for key, value in results.items():
  print(f"{key}: {value}")

Evaluation Results:
eval_loss: 0.7565864324569702
eval_accuracy: 0.8645950076413652
eval_runtime: 554.7175
eval_samples_per_second: 17.694
eval_steps_per_second: 1.107
epoch: 3.0


In [8]:
modelnum_params = sum(p.numel() for p in model.parameters() if p.requires_grad)

print("Number of trainable parameters:", modelnum_params)


Number of trainable parameters: 355362819


### 2

In [None]:
!pip install transformers==4.33.1 peft==0.11.1 datasets


In [None]:
from transformers import RobertaTokenizer, RobertaForSequenceClassification
from datasets import load_dataset
from transformers import TrainingArguments, Trainer, default_data_collator
from peft import LoraConfig, get_peft_model

# Load tokenizer and model
model = RobertaForSequenceClassification.from_pretrained("roberta-large", num_labels=3)  # 3 for entailment, neutral, contradiction


# Configure LoRA
lora_config = LoraConfig(
    lora_alpha=16,
    lora_dropout=0.1,  
    r=8,  
    task_type="SEQ_CLS",  
    target_modules=["query", "key", "value", "dense"]  
)

# Apply LoRA to the model
model = get_peft_model(model, lora_config)


# Define training arguments
training_args = TrainingArguments(
    output_dir="./results",
    per_device_train_batch_size=4,  
    learning_rate=2e-5,
    num_train_epochs=3, 
    logging_dir='./logs',
    report_to="none",  
    save_strategy="no",  
)

# Define metrics function
def compute_metrics(pred):
    labels = pred.label_ids
    preds = pred.predictions.argmax(-1)
    return {"accuracy": (preds == labels).mean()}

modelnum_params = sum(p.numel() for p in model.parameters() if p.requires_grad)

print("Number of trainable parameters:", modelnum_params)

# Define Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_data,
    eval_dataset=dev_data,
    data_collator=default_data_collator,
    compute_metrics=compute_metrics,
)



# Train the model
trainer.train()



In [15]:
results = trainer.evaluate()


In [16]:
# Print the evaluation metrics
print("Evaluation Results:")
for key, value in results.items():
  print(f"{key}: {value}")

Evaluation Results:
eval_loss: 0.4594654440879822
eval_accuracy: 0.8445236882322975
eval_runtime: 654.8536
eval_samples_per_second: 14.988
eval_steps_per_second: 0.938
epoch: 3.0


### 3

In [None]:
!pip install -q -U git+https://github.com/huggingface/transformers.git
!pip install -q -U git+https://github.com/huggingface/peft.git
!pip install -q -U git+https://github.com/huggingface/accelerate.git
!pip install -q trl xformers wandb datasets einops gradio sentencepiece bitsandbytes

In [None]:
!pip install transformers==4.33.1 peft==0.11.1 datasets


In [None]:
from transformers import RobertaTokenizer, RobertaForSequenceClassification, TrainingArguments, Trainer, default_data_collator
from datasets import load_dataset
from peft import PeftModel, PromptTuningConfig, get_peft_model

# Load tokenizer and model
tokenizer = RobertaTokenizer.from_pretrained("roberta-large")
model = RobertaForSequenceClassification.from_pretrained("roberta-large", num_labels=3)  # 3 for entailment, neutral, contradiction


train_data = load_dataset("glue", name="mnli", split="train")

train_data = train_data.select(range(len(train_data) // 30))  # Select first 3% of the data

dev_data = load_dataset("glue", name="mnli", split="validation_matched")  # Use matched validation set

def preprocess_function(examples):
    return tokenizer(examples["premise"], examples["hypothesis"], padding="max_length", truncation=True,max_length=128)

train_data = train_data.map(preprocess_function, batched=True)
dev_data = dev_data.map(preprocess_function, batched=True)

# Define training arguments
training_args = TrainingArguments(
    output_dir="./results",
    per_device_train_batch_size=4,  
    learning_rate=2e-5,
    num_train_epochs=3,  
    logging_dir='./logs',
    report_to="none",  
    save_strategy="no", 
)

# Define metrics function
def compute_metrics(pred):
    labels = pred.label_ids
    preds = pred.predictions.argmax(-1)
    return {"accuracy": (preds == labels).mean()}


In [None]:
from peft import PromptEncoderConfig
model = RobertaForSequenceClassification.from_pretrained("roberta-large", num_labels=3)

peft_config = PromptEncoderConfig(
    task_type="SEQ_CLS",
    num_virtual_tokens=20,
    encoder_hidden_size=128
)
model = get_peft_model(model, peft_config)

In [55]:
modelnum_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
print("Number of trainable parameters:", modelnum_params)


Number of trainable parameters: 1352963


In [None]:
from transformers import DataCollatorWithPadding

training_args = TrainingArguments(
    output_dir="./results",
    per_device_train_batch_size=4,  
    learning_rate=2e-5,
    num_train_epochs=3,  
    logging_dir='./logs',
    report_to="none",  
    save_strategy="no",  
)
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_data,
    eval_dataset=dev_data,
    tokenizer=tokenizer,
    compute_metrics=compute_metrics,
)


trainer.train()



In [57]:
results = trainer.evaluate()


In [58]:
# Print the evaluation metrics
print("Evaluation Results:")
for key, value in results.items():
  print(f"{key}: {value}")

Evaluation Results:
eval_loss: 1.0957759618759155
eval_accuracy: 0.3392766174223128
eval_runtime: 219.8003
eval_samples_per_second: 44.654
eval_steps_per_second: 2.793
epoch: 3.0
