In [2]:
import boto3
import os
import tempfile
from transformers import RobertaTokenizer, RobertaForSequenceClassification
import torch

def load_roberta_from_s3(bucket_path):
    # Séparer bucket et prefix
    if "/" not in bucket_path:
        raise ValueError("Le chemin doit être de la forme 'bucket/prefix'")
    bucket_name, prefix = bucket_path.split("/", 1)
    prefix = prefix.rstrip("/")  # enlever / final si présent

    s3 = boto3.client("s3")
    tmp_dir = tempfile.mkdtemp()
    print(f"Dossier temporaire: {tmp_dir}")

    # Lister les fichiers dans le prefix
    response = s3.list_objects_v2(Bucket=bucket_name, Prefix=prefix)
    if "Contents" not in response:
        raise FileNotFoundError(f"Aucun fichier trouvé dans s3://{bucket_name}/{prefix}")

    # Télécharger tous les fichiers
    for obj in response["Contents"]:
        key = obj["Key"]
        if key.endswith("/"):
            continue
        local_path = os.path.join(tmp_dir, os.path.relpath(key, prefix))
        os.makedirs(os.path.dirname(local_path), exist_ok=True)
        print(f"Téléchargement: s3://{bucket_name}/{key} -> {local_path}")
        s3.download_file(bucket_name, key, local_path)

    # Charger modèle et tokenizer
    tokenizer = RobertaTokenizer.from_pretrained(tmp_dir)
    model = RobertaForSequenceClassification.from_pretrained(tmp_dir)

    return tokenizer, model

# ===== Utilisation =====
bucket_path = "sagemaker-studio-oxs6vznjds/writing_task_models/accuracy/checkpoint-1800/"

tokenizer, model = load_roberta_from_s3(bucket_path)


Dossier temporaire: /tmp/tmpl2e7aqm0
Téléchargement: s3://sagemaker-studio-oxs6vznjds/writing_task_models/accuracy/checkpoint-1800/config.json -> /tmp/tmpl2e7aqm0/config.json
Téléchargement: s3://sagemaker-studio-oxs6vznjds/writing_task_models/accuracy/checkpoint-1800/merges.txt -> /tmp/tmpl2e7aqm0/merges.txt
Téléchargement: s3://sagemaker-studio-oxs6vznjds/writing_task_models/accuracy/checkpoint-1800/model.safetensors -> /tmp/tmpl2e7aqm0/model.safetensors
Téléchargement: s3://sagemaker-studio-oxs6vznjds/writing_task_models/accuracy/checkpoint-1800/optimizer.pt -> /tmp/tmpl2e7aqm0/optimizer.pt
Téléchargement: s3://sagemaker-studio-oxs6vznjds/writing_task_models/accuracy/checkpoint-1800/rng_state.pth -> /tmp/tmpl2e7aqm0/rng_state.pth
Téléchargement: s3://sagemaker-studio-oxs6vznjds/writing_task_models/accuracy/checkpoint-1800/scaler.pt -> /tmp/tmpl2e7aqm0/scaler.pt
Téléchargement: s3://sagemaker-studio-oxs6vznjds/writing_task_models/accuracy/checkpoint-1800/scheduler.pt -> /tmp/tmpl2e7a

In [39]:
example_input = {
    "ef_level": 10,
    "activity_instructions": "Read the email from your manager. Then respond with an email that has several ideas to help her solve the budget problem. Type in the input box. Write between 80 and 100 words. Use your own words where possible. ",
    "student_submission": "Response: Hi Carla,\n\nThe financial report was shocking. We have a budget crisis and I have a list of options how to deal with this crisis on a long-team basis. \n\n-First I would recommend that we would cut down everyone’s working hours. The company would save about $10000 per worker each year. \n-Secondly we should think about offering older workers a large retirement bonus if they accept our resignation package. If we lay off senior workers we could save about $300 000 every year.\n-Thirdly I would also recommend updating our offices to present-day. We have many offices which are too huge and expensive and old-fashioned. If we move office space to another location we could save money in rent. By changing location we could possibly save about $10000"
}

In [40]:
def format_text_inference(ef_level, activity_instructions, student_submission):
    return (
        f"Prompt Level: {ef_level} [SEP] Prompt: {activity_instructions} [SEP] Response: {student_submission}"
    )

formatted_text = format_text_inference(
    example_input["ef_level"],
    example_input["activity_instructions"],
    example_input["student_submission"]
)

formatted_text

'Prompt Level: 10 [SEP] Prompt: Read the email from your manager. Then respond with an email that has several ideas to help her solve the budget problem. Type in the input box. Write between 80 and 100 words. Use your own words where possible.  [SEP] Response: Response: Hi Carla,\n\nThe financial report was shocking. We have a budget crisis and I have a list of options how to deal with this crisis on a long-team basis. \n\n-First I would recommend that we would cut down everyone’s working hours. The company would save about $10000 per worker each year. \n-Secondly we should think about offering older workers a large retirement bonus if they accept our resignation package. If we lay off senior workers we could save about $300\xa0000 every year.\n-Thirdly I would also recommend updating our offices to present-day. We have many offices which are too huge and expensive and old-fashioned. If we move office space to another location we could save money in rent. By changing location we could 

In [41]:
inputs = tokenizer(formatted_text, return_tensors="pt", truncation=True, padding=True, max_length=512)

with torch.no_grad():
    outputs = model(**inputs)

logits = outputs.logits  # tenseur [1, num_classes]

# Calcul des probabilités avec softmax sur la dimension des classes
probs = torch.softmax(logits, dim=1).squeeze()

# Affichage proba par classe
for i, p in enumerate(probs):
    print(f"Class {i} : {p.item():.2f}")

# Classe prédite
predicted_class = torch.argmax(probs).item()
print(f"\nPredicted Class : {predicted_class}")

Class 0 : 0.00
Class 1 : 0.00
Class 2 : 0.00
Class 3 : 0.11
Class 4 : 0.88
Class 5 : 0.00

Predicted Class : 4
