<a href="https://colab.research.google.com/github/ludoveltz/test_github_fev25/blob/main/Daily_challenge.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [9]:
# 1. Installations (à exécuter une seule fois)
!pip install peft==0.4.0
!pip install datasets
!mkdir cache

# 2. Importations
import os
import time
from datasets import load_dataset
from transformers import AutoModelForCausalLM, AutoTokenizer, Trainer, TrainingArguments
import transformers
from peft import LoraConfig, get_peft_model, PeftModel
import torch

mkdir: cannot create directory ‘cache’: File exists


In [10]:
# 3. Chargement du modèle et tokenizer
print("Chargement du modèle et tokenizer...")
model_name = "bigscience/bloomz-560m"
tokenizer = AutoTokenizer.from_pretrained(model_name)
foundation_model = AutoModelForCausalLM.from_pretrained(
    model_name,
    torch_dtype=torch.float32  # Forcer float32 pour plus de stabilité
)

# 4. Préparation du dataset
print("Préparation du dataset...")
data = load_dataset("Abirate/english_quotes", split="train")
sample_size = int(len(data) * 0.1)
data = data.select(range(sample_size))

# Amélioration de la préparation des données
def preprocess_function(examples):
    # Ajout de tokens spéciaux pour mieux guider l'apprentissage
    texts = [f"<quote>{quote}</quote>" for quote in examples["quote"]]
    return tokenizer(
        texts,
        truncation=True,
        max_length=128,
        padding="max_length",
        return_tensors="pt"
    )

data = data.map(
    preprocess_function,
    batched=True,
    remove_columns=data.column_names
)

# 5. Configuration de LoRA
print("Configuration de LoRA...")
lora_config = LoraConfig(
    r=16,  # Augmenté pour plus de capacité
    lora_alpha=32,
    target_modules=["query_key_value"],
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM"
)

# 6. Application de LoRA
print("Application de LoRA...")
peft_model = get_peft_model(foundation_model, lora_config)
print(peft_model.print_trainable_parameters())




Chargement du modèle et tokenizer...
Préparation du dataset...


Map:   0%|          | 0/250 [00:00<?, ? examples/s]

Configuration de LoRA...
Application de LoRA...
trainable params: 1,572,864 || all params: 560,787,456 || trainable%: 0.2804741766549072
None


In [13]:
class CustomTrainer(Trainer):
    def compute_loss(self, model, inputs, return_outputs=False, num_items_in_batch=None):  # Ajout du paramètre num_items_in_batch
        # Vérification de la présence des labels dans les inputs
        if "labels" not in inputs:
            labels = inputs["input_ids"].clone()
        else:
            labels = inputs.pop("labels")

        outputs = model(**inputs)
        logits = outputs.logits

        # Shift logits and labels for language modeling
        shift_logits = logits[..., :-1, :].contiguous()
        shift_labels = labels[..., 1:].contiguous()

        loss_fct = torch.nn.CrossEntropyLoss()
        loss = loss_fct(shift_logits.view(-1, shift_logits.size(-1)), shift_labels.view(-1))

        if return_outputs:
            return loss, outputs
        return loss

# Configuration des arguments d'entraînement
training_args = TrainingArguments(
    output_dir="./solweig_izar_model",
    per_device_train_batch_size=4,
    gradient_accumulation_steps=4,
    learning_rate=1e-4,
    num_train_epochs=3,
    logging_steps=5,
    logging_first_step=True,
    save_strategy="steps",
    save_steps=50,
    weight_decay=0.01,
    warmup_ratio=0.1,
    max_grad_norm=1.0,
    report_to="none"
)

# Callback pour le monitoring
class DetailedCallback(transformers.TrainerCallback):
    def on_step_end(self, args, state, control, logs=None, **kwargs):
        if state.is_local_process_zero and logs:
            print(f"Step {state.global_step}: Loss = {logs.get('loss', 'N/A')}")

# Initialisation et lancement du trainer
trainer = CustomTrainer(
    model=peft_model,
    args=training_args,
    train_dataset=data,
    data_collator=transformers.DataCollatorForLanguageModeling(tokenizer, mlm=False),
    callbacks=[DetailedCallback()]
)

# Lancement de l'entraînement
train_result = trainer.train()
print(f"Résultats d'entraînement : {train_result}")




Step,Training Loss
1,15.5857
5,14.3467
10,13.7379
15,12.2176
20,11.9637
25,11.036
30,10.339
35,11.4435
40,10.061
45,10.2968


Résultats d'entraînement : TrainOutput(global_step=45, training_loss=11.743329387240939, metrics={'train_runtime': 74.1498, 'train_samples_per_second': 10.115, 'train_steps_per_second': 0.607, 'total_flos': 171771143651328.0, 'train_loss': 11.743329387240939, 'epoch': 2.9523809523809526})


In [15]:
# 9. Sauvegarde du modèle
time_now = int(time.time())
peft_model_path = f"./peft_outputs/model_{time_now}"
trainer.model.save_pretrained(peft_model_path)

# 10. Test du modèle
generation_params = {
    "max_length": 100,
    "num_return_sequences": 1,
    "temperature": 0.85,
    "top_p": 0.92,
    "repetition_penalty": 1.5,
    "no_repeat_ngram_size": 3,
    "do_sample": True
}

# 2. Définir les prompts de test
test_prompts = [
    "Two things are infinite: ",
    "In the world of luxury design, ",
    "The art of creativity lies in ",
    "Digital transformation requires "
]

# 3. Utiliser les paramètres dans la boucle de génération
print("\nTest du modèle :")
for prompt in test_prompts:
    inputs = tokenizer(prompt, return_tensors="pt").to(peft_model.device)
    outputs = peft_model.generate(
        input_ids=inputs["input_ids"],
        **generation_params  # Utilisation des paramètres ici
    )

    generated_text = tokenizer.batch_decode(outputs, skip_special_tokens=True)[0]
    print(f"\nPrompt: {prompt}")
    print(f"Génération: {generated_text}")


Test du modèle :

Prompt: Two things are infinite: 
Génération: Two things are infinite:  space and time

Prompt: In the world of luxury design, 
Génération: In the world of luxury design,  one is born with a certain innate sense that all things must be within their range.

Prompt: The art of creativity lies in 
Génération: The art of creativity lies in  the way we think. And when it comes to writing a book, thinking goes beyond imagination.  As authorship requires us not only “to understand” but also “make sense out”; so how can you be creative if your work does n't seem like that?

Prompt: Digital transformation requires 
Génération: Digital transformation requires  to work with a different type of people
