In [1]:
import os
import shutil
import subprocess

# Cloner le repo dans /content/IA_locales (supprime si déjà présent)
repo_dir = "/content/IA_locales"
if os.path.exists(repo_dir):
    shutil.rmtree(repo_dir)
    print(f"[INFO] Répertoire déjà présent supprimé : {repo_dir}")

subprocess.run(["git", "clone", "https://github.com/mtaileb/IA_locales.git", repo_dir], check=True)
print(f"[INFO] Dépôt cloné dans {repo_dir}")

# Copier tous les fichiers et dossiers dans /content (en écrasant ceux éventuellement déjà présents)
src_dir = repo_dir
dst_dir = "/content"

for item in os.listdir(src_dir):
    s = os.path.join(src_dir, item)
    d = os.path.join(dst_dir, item)
    if os.path.isdir(s):
        if os.path.exists(d):
            shutil.rmtree(d)
            print(f"[INFO] Dossier existant écrasé : {d}")
        shutil.copytree(s, d)
    else:
        if os.path.exists(d):
            print(f"[INFO] Fichier existant écrasé : {d}")
        shutil.copy2(s, d)


[INFO] Dépôt cloné dans /content/IA_locales


In [2]:
# Avant d'exécuter le code suivant, qui récupère les fichiers nécessaires à l'inférence, pensez à monter votre drive Google Drive (panneau de gauche -> Files -> Mount Drive)
!cp -r /content/drive/MyDrive/model_backup /content/model

In [3]:
!pip install -r requirements.txt

Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch->-r requirements.txt (line 1))
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch->-r requirements.txt (line 1))
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch->-r requirements.txt (line 1))
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch->-r requirements.txt (line 1))
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch->-r requirements.txt (line 1))
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cufft-cu12==11.2.1.3 (from torch->-r requirements.txt (line 1))
  Downl

In [4]:
import torch
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    Trainer,
    TrainingArguments,
    DataCollatorForLanguageModeling,
)
import os
import pandas as pd
from datasets import Dataset, DatasetDict
import argparse
import wandb

# Configuration
BASE_MODEL_NAME = "EleutherAI/gpt-neo-1.3B"
DATA_DIR = "data"
OUTPUT_DIR = "./model"

# Désactive wandb par défaut
os.environ["WANDB_DISABLED"] = "true"

def load_parquet_datasets(data_dir=DATA_DIR):
    train_path = os.path.join(data_dir, "train.parquet")
    test_path = os.path.join(data_dir, "test.parquet")

    if not os.path.exists(train_path):
        raise FileNotFoundError(f"Train file not found: {train_path}")
    if not os.path.exists(test_path):
        raise FileNotFoundError(f"Test file not found: {test_path}")

    train_df = pd.read_parquet(train_path)
    test_df = pd.read_parquet(test_path)

    return DatasetDict({
        "train": Dataset.from_pandas(train_df),
        "test": Dataset.from_pandas(test_df)
    })

def tokenize_function(examples):
    texts = [q + "\n" + a for q, a in zip(examples['question'], examples['answer'])]
    tokens = tokenizer(
        texts,
        truncation=True,
        padding="max_length",
        max_length=512,
        return_tensors="pt"
    )
    tokens["labels"] = tokens["input_ids"].clone()
    return tokens

def train_model():
    print("Loading datasets...")
    datasets = load_parquet_datasets()
    print(f"Train samples: {len(datasets['train'])}")
    print(f"Test samples: {len(datasets['test'])}")

    print("Tokenizing datasets...")
    tokenized_datasets = datasets.map(
        tokenize_function,
        batched=True,
        remove_columns=['question', 'answer']
    )

    training_args = TrainingArguments(
        output_dir=OUTPUT_DIR,
        num_train_epochs=3,
        per_device_train_batch_size=2,
        per_device_eval_batch_size=2,
        eval_steps=1000,
        save_steps=5000,
        save_total_limit=2,
        logging_steps=500,
        load_best_model_at_end=True,
        fp16=torch.cuda.is_available(),
        eval_strategy="steps",
        report_to="none"
    )

    data_collator = DataCollatorForLanguageModeling(
        tokenizer=tokenizer,
        mlm=False,
    )

    trainer = Trainer(
        model=finetuned_model,
        args=training_args,
        train_dataset=tokenized_datasets["train"],
        eval_dataset=tokenized_datasets["test"],
        data_collator=data_collator,
    )

    print("Starting training...")
    trainer.train()

    print("Saving model...")
    finetuned_model.save_pretrained(OUTPUT_DIR)
    tokenizer.save_pretrained(OUTPUT_DIR)

def generate_text(prompt, model, tokenizer, max_new_tokens=300):
    prompt = prompt + "\nAnswer:"  # Guide le modèle
    inputs = tokenizer(
        prompt,
        return_tensors='pt',
        padding=True,
        truncation=True,
        max_length=512
    ).to(model.device)

    output = model.generate(
        input_ids=inputs.input_ids,
        attention_mask=inputs.attention_mask,
        pad_token_id=tokenizer.eos_token_id,
        max_new_tokens=max_new_tokens,
        do_sample=True,
        temperature=0.4,  # Relativement bas pour moins de créativité sur ces prompts de maths
        top_k=50,
        top_p=0.9,
        no_repeat_ngram_size=2,
        early_stopping=True,
        eos_token_id=tokenizer.eos_token_id
    )
    full_output = tokenizer.decode(output[0], skip_special_tokens=True)
    return full_output[len(prompt):].strip()

def compare_models(prompt):
    print("\n--- Base Model ---")
    base_output = generate_text(prompt, base_model, base_tokenizer)
    print(base_output)

    print("\n--- Finetuned Model ---")
    finetuned_output = generate_text(prompt, finetuned_model, tokenizer)
    print(finetuned_output)

if __name__ == '__main__':
    print("Loading base model...")
    base_tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL_NAME)
    base_tokenizer.pad_token = base_tokenizer.eos_token
    base_model = AutoModelForCausalLM.from_pretrained(BASE_MODEL_NAME)
    if torch.cuda.is_available():
        base_model.cuda()

    print("Loading finetuned model...")
    tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL_NAME)
    tokenizer.pad_token = tokenizer.eos_token

    if os.path.exists(OUTPUT_DIR):
        finetuned_model = AutoModelForCausalLM.from_pretrained(OUTPUT_DIR)
    else:
        finetuned_model = AutoModelForCausalLM.from_pretrained(BASE_MODEL_NAME)

    if torch.cuda.is_available():
        finetuned_model.cuda()

    # Train (aucun dans ce cas, pour mémoire)
    # train_model()

    # Chargement des données de validation
    val_datasets = load_parquet_datasets()["test"]

    # Comparaison des réponses sur les 3 premiers exemples du jeu de données de validation
    for i in range(3):
        prompt = val_datasets[i]['question']
        correct_answer = val_datasets[i]['answer']

        print(f"\n\n=== Exemple {i+1} ===")
        print(f"\n1) PROMPT: {prompt}")
        print(f"\n2) BONNE REPONSE: {correct_answer}")

        print("\n3) REPONSE DU MODELE DE BASE:")
        base_output = generate_text(prompt, base_model, base_tokenizer)
        print(base_output.split(prompt)[-1].strip())

        print("\n4) REPONSE DU MODELE AVEC FINETUNING:")
        finetuned_output = generate_text(prompt, finetuned_model, tokenizer)
        print(finetuned_output.split(prompt)[-1].strip())


Loading base model...


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/200 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/1.35k [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/798k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/90.0 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/5.31G [00:00<?, ?B/s]

Loading finetuned model...


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

The following generation flags are not valid and may be ignored: ['early_stopping']. Set `TRANSFORMERS_VERBOSITY=info` for more details.




=== Exemple 1 ===

1) PROMPT: Janet’s ducks lay 16 eggs per day. She eats three for breakfast every morning and bakes muffins for her friends every day with four. She sells the remainder at the farmers' market daily for $2 per fresh duck egg. How much in dollars does she make every day at the farmers' market?

2) BONNE REPONSE: Janet sells 16 - 3 - 4 = <<16-3-4=9>>9 duck eggs a day.
She makes 9 * 2 = $<<9*2=18>>18 every day at the farmer’s market.
#### 18

3) REPONSE DU MODELE DE BASE:


The following generation flags are not valid and may be ignored: ['early_stopping']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


$0.00

What is the average price of a fresh egg? (In dollars)
A fresh chicken egg costs $1.25.
An egg from a duck costs only $.50. (The cost of the duck is not included.)
How many eggs does Janet sell at a farmers‘ market each day?

4) REPONSE DU MODELE AVEC FINETUNING:


The following generation flags are not valid and may be ignored: ['early_stopping']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


16 x 3 = <<16*3=48>>48
Janets sells 48 eggs for 48/2 = $<<48/ 2=24>>24.
#### 24
She makes 24 - 3 - 4 =$<<24-3-4=11>>11 a day
Every day she makes $11 x 4= $22.   She makes 22 x 7 =  $121.00
Subtract the cost of the eggs from the total cost to find the profit: $21.50 - $12.25 = -$121
Multiply the number of eggs sold to get the earnings from each egg: 48 / 4 eggs = 16 * $0.20 = 4.80
Divide the sales price to determine the price of a duck: 4 egg * 2 = 12.5
Dividing the selling price by the value of each duck's egg shows that there are 7 eggs in a fresh egg, so the egg costs 2.8 = 2 eggs
Then divide the remaining eggs by their price and multiply the answer by 2 to convert the result to dollars: 7 / 2 * 1250 = 1050
Finally, multiply Janets earnings by her number eggs to figure out her total income: 1050 eggs * 3 dollars/egg = 3,350 dollars
The final number is 7 - 2 - 1 =<<7-2-1=6>>6 eggs she sells
Her total earnings are 3352 dollars from eggs or


=== Exemple 2 ===

1) PROMPT: A robe takes 2

The following generation flags are not valid and may be ignored: ['early_stopping']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


It takes two bolts, but it takes half as much as it does if you use the white.

A:
   A robe is a robe. A dress is also a dress.

4) REPONSE DU MODELE AVEC FINETUNING:
2/2=<<2/(2)=1>>1
2 bolts:1(2)/2
Total:<<1*2+1=3>>3 bolts
#### 3
3-1)=<<3+-3=6>>6 bolts needed
So it takes 6/5= <<6/ 5= 2>>2 times more white fibers than blue
That means it uses 2*1 = <<2*=2>> 2 more bolts for white
It therefore takes a total of 2+2 =<<4=4>>4 bolts.
Since the robe uses 3 bolts, it therefore uses 4*3 = 12 bolts."

A: <<12=12>>12 bolts used
B: 12/12 = 1/4 of the bolts are blue. So there are 12*(1/ 4) = 3 blue bolts"
C: 3(12)=36 bolts total. "
D: 36/6= 12 blue bolt
E:12(3)=72 bolts "#### 72" is the number of bolts the robes takes. Therefore, the total number is 12+72+36+12+6 = 96 bolts that the bolt uses. How much did it use? (12/96)*100=1,000
The robe used 1,100 bolts because 3+4+ 12 =1120
1.1K=100, so the whole bolt set uses 100, which is 1120 bolts or 100*


The following generation flags are not valid and may be ignored: ['early_stopping']. Set `TRANSFORMERS_VERBOSITY=info` for more details.




=== Exemple 3 ===

1) PROMPT: Josh decides to try flipping a house.  He buys a house for $80,000 and then puts in $50,000 in repairs.  This increased the value of the house by 150%.  How much profit did he make?

2) BONNE REPONSE: The cost of the house and repairs came out to 80,000+50,000=$<<80000+50000=130000>>130,000
He increased the value of the house by 80,000*1.5=<<80000*1.5=120000>>120,000
So the new value of the house is 120,000+80,000=$<<120000+80000=200000>>200,000
So he made a profit of 200,000-130,000=$<<200000-130000=70000>>70,000
#### 70000

3) REPONSE DU MODELE DE BASE:


The following generation flags are not valid and may be ignored: ['early_stopping']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


$100,00

A:
  
How much did you make on your house?

4) REPONSE DU MODELE AVEC FINETUNING:
$800000-$50000=$<<80000-50000=20000>>20,200
So he made a profit of 20,201*.15=$15,500
#### 15, 500
That means he paid 20200-15500=$18,250
Since he bought the home for 80, 000 and spent $18250 on repairs, he had a net profit
$1825-80=$<<182500-82500=12500>>12,525
He had to pay 20700+12.525=$27,850
Thus he spent a total of $27850 on the repairs and had an overall profit or $28, 250
If you take out the cost of repairs from the total cost, then Josh made $12250-2785=$11,350
Finally, if you add up all his costs, it comes to 12, 535+18 250=$ <<1235=12735>>127,35
The profit was 127, 35-20 200=$6,  which means Josh got a loss of 6,127 on his house
In total, Josh lost 6127-127=<<6127=-12737>>- 12737
His profit after paying the loss is $12738-$182425=$8,762
Subtracting the repair cost from his total profit, we get $8762-$1225= $<<8726-122500=-6726=6716>>67


In [5]:
!python app_v2.py


2025-06-05 15:38:57.998166: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1749137938.308454    8624 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1749137938.381673    8624 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
Loading checkpoint shards: 100% 2/2 [00:00<00:00,  3.34it/s]
 * Serving Flask app 'app_v2'
 * Debug mode: off
 * Running on all addresses (0.0.0.0)
 * Running on http://127.0.0.1:5000
 * Running on http://172.28.0.12:5000
INFO:werkzeug:[33mPress CTRL+C to quit[0m
The attention mask is not set and cannot be inferred from input because pad token is same as eos token. As a consequence, you may observe unexpected behavior. Please pass 

In [None]:
!python run_inference.py --prompt "How many centiliters is there is half a liter?"

Loading model from: ./model
2025-06-05 16:51:46.515044: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1749142306.968495   26139 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1749142307.085676   26139 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
