In [3]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
import pandas as pd
import numpy as np
from sklearn.metrics import accuracy_score
import requests
import re
from tqdm.auto import tqdm

# Définition du device basé sur l'environnement précédent (assumé CUDA)
if 'device' not in globals():
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# --- 1. Téléchargement et Chargement des Données ---
print("\n--- 1. Data Setup ---")
base_url = 'https://www.raphaelcousin.com/modules/data-science-practice/module8/exercise/'
train_url = base_url + 'train.csv'
test_url = base_url + 'test.csv'

def download_file(url, filename):
    try:
        response = requests.get(url, timeout=10)
        response.raise_for_status()
        with open(filename, 'wb') as f:
            f.write(response.content)
    except requests.exceptions.RequestException as e:
        print(f"Error downloading {filename}: {e}")

download_file(train_url, 'train.csv')
download_file(test_url, 'test.csv')

train_data = pd.read_csv('train.csv')
test_data = pd.read_csv('test.csv')

print(f"Train set size: {len(train_data)}")
print(f"Test set size: {len(test_data)}")

# --- 2. Fonctions Utilitaires (Nouvelle Extraction Simplifiée) ---

def check_accuracy(predictions, ground_truth, tolerance=0.01):
    correct = 0
    for pred, truth in zip(predictions, ground_truth):
        if round(pred, 2) == round(truth, 2) or abs(pred - truth) <= tolerance:
            correct += 1
    return correct / len(predictions)

def extract_numeric_answer(response):
    """Extrait le premier nombre (int ou float) de la réponse du modèle."""
    # Pattern: trouve un nombre négatif ou positif, entier ou décimal
    match = re.search(r"-?\d+(?:\.\d+)?", response)
    if match:
        # On utilise le float() directement car les nombres sont simples dans ce pattern
        return float(match.group())
    return None

# Utiliser la nouvelle extraction simple
extract_number = extract_numeric_answer


# --- 3. Chargement du Modèle (Qwen 2.5-Math-1.5B OPTIMISÉ) ---
print("\n--- 3. Model Loading (Qwen 2.5-Math-1.5B OPTIMISÉ) ---")
model_name = "Qwen/Qwen2.5-Math-1.5B"

print(f"Loading {model_name} in Float16 (FP16)...")

tokenizer = AutoTokenizer.from_pretrained(model_name)

# Chargement optimisé pour CUDA (FP16)
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    torch_dtype=torch.float16,
    device_map="auto"
)

if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

print(f"{model_name} loaded")
print(f"Model size: {sum(p.numel() for p in model.parameters()) / 1e6:.1f}M params")

# --- 4. Fonction de Génération (Simplification du Prompt) ---

def generate_answer(problem, prompt_template="simple", max_new_tokens=50, temperature=0.1):
    """
    Generate answer using different prompt templates.
    """
    # Mise à jour de la valeur par défaut pour la vitesse/stabilité
    max_new_tokens = 50
    temperature = 0.1

    if prompt_template == "simple":
        prompt = f"{problem}\nAnswer:"

    elif prompt_template == "instruction":
        prompt = f"Solve this math problem and provide only the numerical answer.\n\nProblem: {problem}\nAnswer:"

    elif prompt_template == "cot":
        # Chain-of-Thought simple
        prompt = f"Solve this math problem step by step, then provide the final numerical answer.\n\nProblem: {problem}\nSolution:\n"

    elif prompt_template == "few_shot":
        n_examples = min(5, len(train_data))
        examples = []

        for i in range(n_examples):
            ex_problem = train_data["problem"].iloc[i].strip()
            ex_solution = train_data["solution"].iloc[i]
            # Format Few-Shot simplifié Q/A sans chat template
            examples.append(f"Q: {ex_problem}\nA: {ex_solution}")

        examples_text = "\n\n".join(examples)

        # Prompt final basé sur le format Few-Shot simple
        prompt = (
            "You are a helpful math assistant. "
            "Read each problem carefully and answer only the last problem with the numeric result.\n\n"
            f"Here are some examples:\n{examples_text}\n\n"
            f"Now solve the following problem, step by step:\nQ: {problem}\nA:"
        )

    else:
        prompt = problem

    # Generate
    inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=512).to(device)

    with torch.no_grad():
        outputs = model.generate(
            **inputs,
            max_new_tokens=max_new_tokens,
            temperature=temperature,
            do_sample=True if temperature > 0 else False,
            pad_token_id=tokenizer.eos_token_id
        )

    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
    # Remove the prompt from response
    response = response[len(prompt):].strip()

    return response

# --- 5. Évaluation sur l'Ensemble de Validation ---
print("\n--- 5. Validation Evaluation ---")
# On revient au Few-Shot simple (Q/A)
best_template = "few_shot"
val_data = train_data.tail(50)
predictions = []
ground_truth = val_data['solution'].tolist()

print(f"Evaluating on {len(val_data)} validation problems using template '{best_template}'...")

for idx, row in tqdm(val_data.iterrows(), total=len(val_data), desc="Validation"):
    problem = row['problem']

    response = generate_answer(problem, prompt_template=best_template)
    prediction = extract_number(response)

    if prediction is None:
        prediction = 0.0

    predictions.append(prediction)

accuracy = check_accuracy(predictions, ground_truth)
print(f"\nValidation Accuracy: {accuracy:.2%}")
print(f"Need to achieve: 70% on test set")

# --- 6. Génération des Prédictions sur l'Ensemble de Test ---
print("\n--- 6. Test Predictions Generation ---")
print(f"Generating predictions on {len(test_data)} test problems...")

test_predictions = []

for idx, row in tqdm(test_data.iterrows(), total=len(test_data), desc="Test Prediction"):
    problem = row['problem']

    response = generate_answer(problem, prompt_template=best_template)
    prediction = extract_number(response)

    if prediction is None:
        prediction = 0.0
        if (idx + 1) % 10 != 0:
             print(f"\n⚠️ Warning: No number extracted for problem {idx}: {problem[:50]}...")

    test_predictions.append(prediction)

print("\nAll test predictions generated!")

# --- 7. Création du Fichier de Soumission ---
print("\n--- 7. Submission File Creation ---")
submission = pd.DataFrame({
    'id': test_data['id'],
    'solution': test_predictions
})

submission.to_csv('submission.csv', index=False)

print("Submission file created: submission.csv")
print("\nSubmission preview:")
print(submission.head())

# Vérification finale
non_numeric = submission['solution'].isna().sum()
if non_numeric > 0:
    print(f"\n⚠️ WARNING: {non_numeric} predictions are not numerical!")
else:
    print("\n✓ All predictions are numerical")

Using device: cuda

--- 1. Data Setup ---
Train set size: 900
Test set size: 100

--- 3. Model Loading (Qwen 2.5-Math-1.5B OPTIMISÉ) ---
Loading Qwen/Qwen2.5-Math-1.5B in Float16 (FP16)...
Qwen/Qwen2.5-Math-1.5B loaded
Model size: 1543.7M params

--- 5. Validation Evaluation ---
Evaluating on 50 validation problems using template 'few_shot'...


Validation:   0%|          | 0/50 [00:00<?, ?it/s]


Validation Accuracy: 90.00%
Need to achieve: 70% on test set

--- 6. Test Predictions Generation ---
Generating predictions on 100 test problems...


Test Prediction:   0%|          | 0/100 [00:00<?, ?it/s]


All test predictions generated!

--- 7. Submission File Creation ---
Submission file created: submission.csv

Submission preview:
   id  solution
0   0      98.1
1   1     314.0
2   2     224.0
3   3      96.5
4   4     102.0

✓ All predictions are numerical
