### Practice: Parameter Efficient Fine-Tuning
In this notebook, you're gonna fine-tune large language models within limited GPU memory.

Prompt Tuning

In [None]:

import torch
import torch.nn as nn
import torch.nn.functional as F
from tqdm import tqdm

import transformers
from tqdm.auto import tqdm, trange
assert torch.cuda.is_available(), "you need cuda for this part"
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [None]:
# Load model directly
from transformers import AutoTokenizer, AutoModelForCausalLM
model_name = "Qwen/Qwen2.5-0.5B"

# tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen2.5-0.5B")
# model = AutoModelForCausalLM.from_pretrained("Qwen/Qwen2.5-0.5B")
tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen2.5-0.5B",token = "", local_files_only = True)
model = AutoModelForCausalLM.from_pretrained("Qwen/Qwen2.5-0.5B",token = "", local_files_only = True)

In [None]:
from transformers import AutoModelForCausalLM, AutoTokenizer, Trainer, TrainingArguments
from datasets import Dataset, DatasetDict
import json
from peft import get_peft_model, LoraConfig, TaskType

# Charger le dataset à partir du fichier JSON

import os

file_path = r"C:\Users\facto22020\Downloads\OneDrive_2025-02-08\Seminaire_Master_DataScience\J5_IA_Generative_RAG\call_center_slang_fr.json"
print("Chemin absolu attendu :", os.path.abspath(file_path))

# Vérifie si le fichier existe
if os.path.exists(os.path.abspath(file_path)):
    print("✅ Fichier trouvé !")
else:
    print("❌ Fichier introuvable !")



with open(file_path, 'r', encoding='utf-8') as f:
    data = json.load(f)

# Préparer les données sous forme de Dataset Hugging Face
# Préparer les données sous forme de Dataset Hugging Face
def create_dataset(data):
    return Dataset.from_dict({
        'input_text': [item['slang'] for item in data],   # Entrée : Argot
        'target_text': [item['formal'] for item in data]  # Sortie : Ton formel
    })

dataset = create_dataset(data)
dataset = dataset.train_test_split(test_size=0.1)


# Tokeniser les données (slang en entrée, formal en sortie)
def tokenize_function(examples):
    inputs = tokenizer(examples['input_text'], truncation=True, padding='max_length', max_length=128)
    targets = tokenizer(examples['target_text'], truncation=True, padding='max_length', max_length=128)

    inputs["labels"] = targets["input_ids"]  # Associer la sortie formelle comme labels
    return inputs

tokenized_datasets = dataset.map(tokenize_function, batched=True)



In [None]:
dataset["train"][0]

In [None]:


# Appliquer LoRA (Low-Rank Adaptation) sur le modèle avec r=1
lora_config = LoraConfig(
    r=8,  # Rang de LoRA défini à 1
    lora_alpha=32,  # Facteur de mise à l'échelle
    lora_dropout=0.1,  # Taux de dropout
    task_type=TaskType.CAUSAL_LM  # Tâche de modèle causal
)

model = get_peft_model(model, lora_config)



# Print trainable parameters
model.print_trainable_parameters()



In [None]:

# Configuration de l'entraînement
training_args = TrainingArguments(
    output_dir="./lora_model",
    evaluation_strategy="epoch",
    learning_rate=5e-5,
    per_device_train_batch_size=2,
    per_device_eval_batch_size=2,
    num_train_epochs=30,
    weight_decay=0.01,
    save_steps=10_000,
    save_total_limit=2,
    logging_dir='./logs',
    logging_steps=20,
    fp16=False,
    push_to_hub=False,
)

# Création du Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_datasets['train'],
    eval_dataset=tokenized_datasets['test'],
    tokenizer=tokenizer,
)

# Lancer l'entraînement
trainer.train()

# Sauvegarder le modèle fine-tuné
model.save_pretrained("./lora_model_finetuned")
tokenizer.save_pretrained("./lora_model_finetuned")


In [None]:
# --- TEST THE MODEL WITH A NEW INPUT ---
def generate_statement(user_input):

    # prompt = user_input
    # messages = [
    #     {"role": "system", "content": "You are Qwen, created by Alibaba Cloud. You are a helpful assistant."},
    #     {"role": "user", "content": prompt}
    # ]
    # text = tokenizer.apply_chat_template(
    #     messages,
    #     tokenize=False,
    #     add_generation_prompt=True
    # )

    # messages = [
    #     {"role": "system", "content": "Your task is to negate any user statement."},
    #     {"role": "user", "content": user_input}
    # ]
    
    # # text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
    text = user_input
    model_inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True).to(model.device)


    # model_inputs = tokenizer(text, truncation=True, padding='max_length', max_length=128).to(model.device)
    # model_inputs = tokenizer([text], return_tensors="pt").to(model.device)

    generated_ids = model.generate(
        **model_inputs,
        max_new_tokens=50
    )
    generated_text = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
    
    return generated_text

In [None]:
# Example test case
user_input = "T’inquiète, je gère."
negated_response = generate_statement(user_input)
print(f"User: {user_input}")
print(f"Model: {negated_response}")
