## Download models from s3

In [None]:
#!pip install onnxruntime
#!pip install transformers

In [2]:
import boto3
import onnxruntime as ort

# Define S3 bucket and model key
bucket_name = 'sagemaker-studio-oxs6vznjds'
model_key = 'writing_task_models/coherence/model_1200_roberta_large.onnx'
local_model_path = '/tmp/roberta-large-ft-coh-writing-task-1200.onnx'  # or wherever you want to save temporarily

# Initialize boto3 S3 client
s3 = boto3.client('s3')

# Download the ONNX model from S3 to local path
s3.download_file(bucket_name, model_key, local_model_path)

# Load the ONNX model using onnxruntime
session_coh_model = ort.InferenceSession(local_model_path)

print("ONNX coherence model loaded successfully.")

ONNX coherence model loaded successfully.


In [3]:
# Define S3 bucket and model key
model_key = 'writing_task_models/accuracy/model_1800_quantized_roberta_large.onnx'
local_model_path = '/tmp/roberta-large-ft-acc-writing-task-1800-quant.onnx'  # or wherever you want to save temporarily

# Initialize boto3 S3 client
s3 = boto3.client('s3')

# Download the ONNX model from S3 to local path
s3.download_file(bucket_name, model_key, local_model_path)

# Load the ONNX model using onnxruntime
session_acc_model = ort.InferenceSession(local_model_path)

print("ONNX accuracy model loaded successfully.")

ONNX accuracy model loaded successfully.


## Validation data

In [4]:
import pandas as pd

In [5]:
df = pd.read_csv("validation_global_score.csv")
df.rename(columns={'majority_value': 'global_score'}, inplace=True)
df.head()

Unnamed: 0,recordId,gpt4o_judge_score,nova_judge_score,llama3_judge_score,majority_value,agreement_percentage,writing_id,task_id,cefr_level,ef_level,activity_instructions,student_submission
0,CALL0001329,0.0,0.0,0.0,0.0,100.0,36580a25-45fe-47c5-8735-760e25aad34c,bf43a9ca-521f-4f13-bdbe-414077f8b33f,B1,2.0,Now write up the research proposal. Use the no...,Ticket:\nJan 17: Miami (MIA) 6am departure\nto...
1,CALL0000415,5.0,3.0,5.0,5.0,66.666667,61123e53-d0d3-4511-9d11-8a85b8a28f3a,8dab61c3-5199-4396-87e2-1618ae59b47a,B1,7.0,Read the class assignment. Then write your res...,I disagree with the proposal to cut physical e...
2,CALL0001781,4.0,4.0,4.0,4.0,100.0,5fb32e87-e250-44b7-9dc4-fb6ea9a29de8,de7a1159-0c06-49bf-a273-7bdd195d6a32,B2,11.0,You are applying for a position as a manager a...,"Dear Ms. Rivers,\n\nI am excited to apply for ..."
3,CALL0000367,1.0,1.0,1.0,1.0,100.0,51067947-db59-410b-a8e4-b3718b983a3f,f5795954-97ae-4728-b576-81b7571f3755,B1,2.0,Listen to Nicholson describe where he wants to...,He wants to be in a senior management position...
4,CALL0001895,0.0,0.0,0.0,0.0,100.0,3519e5b9-b7da-443e-9646-5ef3ef288b50,78f976f6-a505-4597-877b-94d9bca3b217,B1,2.0,You're asked to prepare the script for the ope...,"Hi,\n\nAs you know, we're planning a presentat..."


In [6]:
df.shape

(1674, 12)

## inference

In [33]:
from scipy.special import softmax
import numpy as np

def predict_class(session, level, prompt, response):
    # Construire le texte d'entrée
    text = f"Prompt Level: {level} [SEP] Prompt: {prompt} [SEP] Response: {response}"
    
    # Tokenisation
    inputs = tokenizer(text, padding=True, truncation=True, max_length=256, return_tensors="pt")
    input_ids = inputs["input_ids"].numpy()
    attention_mask = inputs["attention_mask"].numpy()
    
    # Inférence ONNX
    onnx_inputs = {"input_ids": input_ids, "attention_mask": attention_mask}
    logits = session.run(None, onnx_inputs)[0]

    # Classe prédite (celle avec le logit maximal)
    pred = np.argmax(logits[0])

    return pred


In [12]:
from transformers import AutoTokenizer
tokenizer = AutoTokenizer.from_pretrained("FacebookAI/roberta-large")

tokenizer_config.json:   0%|          | 0.00/25.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/482 [00:00<?, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

merges.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

In [37]:
level = "Facile"
prompt = "Complète la phrase suivante"
response = "Le chat dort sur le canapé."

# Appel de la fonction
pred = predict_class(session_acc_model, level, prompt, response)


In [41]:
from tqdm import tqdm

# Activer la barre de progression pour pandas
tqdm.pandas()

# Appliquer la fonction avec barre de progression
df["coh_score"] = df.progress_apply(
    lambda row: predict_class(
        session_coh_model, 
        row["ef_level"], 
        row["activity_instructions"], 
        row["student_submission"]
    ),
    axis=1
)


100%|██████████| 1674/1674 [12:45<00:00,  2.19it/s]


In [42]:
from tqdm import tqdm

# Activer la barre de progression pour pandas
tqdm.pandas()

# Appliquer la fonction avec barre de progression
df["acc_score"] = df.progress_apply(
    lambda row: predict_class(
        session_acc_model, 
        row["ef_level"], 
        row["activity_instructions"], 
        row["student_submission"]
    ),
    axis=1
)


100%|██████████| 1674/1674 [10:37<00:00,  2.62it/s]


In [43]:
df.to_csv('validation_global_score_with_coh_and_acc.csv', index=False)  # index=False pour ne pas sauvegarder l'index


## Learn Weights

In [44]:
from sklearn.linear_model import LinearRegression

In [46]:
# Variables explicatives
X = df[["coh_score", "acc_score"]].values

# Variable cible
y = df["majority_value"].values

# Régression linéaire
model = LinearRegression()
model.fit(X, y)

0,1,2
,fit_intercept,True
,copy_X,True
,tol,1e-06
,n_jobs,
,positive,False


In [49]:
# Poids optimaux
w1, w2 = model.coef_
b = model.intercept_

print(f"Poids optimal : coh_score={w1:.3f}, acc_score={w2:.3f}, biais={b:.3f}")

# Prédiction avec les poids optimaux
df["pred_global_score"] = model.predict(X)

Poids optimal : coh_score=0.405, acc_score=0.570, biais=-0.059


In [52]:
total = w1 + w2
w1_norm = w1 / total
w2_norm = w2 / total

print(round(w1_norm, 2), round(w2_norm, 2))

0.42 0.58


In [54]:
from sklearn.metrics import mean_squared_error, r2_score

mse = mean_squared_error(y, df["pred_global_score"])
r2 = r2_score(y, df["pred_global_score"])

print(f"MSE: {mse:.2f}, R2: {r2:.2f}")

MSE: 0.33, R2: 0.91


In [58]:
# Target variable
y = df["majority_value"].values

# 1️⃣ Both features together
X_both = df[["coh_score", "acc_score"]].values
model_both = LinearRegression().fit(X_both, y)
y_pred_both = model_both.predict(X_both)
print("With coh_score + acc_score:")
print(f"MSE = {mean_squared_error(y, y_pred_both):.4f}, R2 = {r2_score(y, y_pred_both):.4f}")

# 2️⃣ Coh_score only
X_coh = df[["coh_score"]].values
model_coh = LinearRegression().fit(X_coh, y)
y_pred_coh = model_coh.predict(X_coh)
print("With coh_score only:")
print(f"MSE = {mean_squared_error(y, y_pred_coh):.4f}, R2 = {r2_score(y, y_pred_coh):.4f}")

# 3️⃣ Acc_score only
X_acc = df[["acc_score"]].values
model_acc = LinearRegression().fit(X_acc, y)
y_pred_acc = model_acc.predict(X_acc)
print("With acc_score only:")
print(f"MSE = {mean_squared_error(y, y_pred_acc):.4f}, R2 = {r2_score(y, y_pred_acc):.4f}")


With coh_score + acc_score:
MSE = 0.3336, R2 = 0.9103
With coh_score only:
MSE = 0.4835, R2 = 0.8700
With acc_score only:
MSE = 0.4142, R2 = 0.8886
