In [1]:
#!pip install onnxruntime

In [2]:
#!pip install gradio

## load model from s3

In [3]:
import boto3
import onnxruntime as ort

# Define S3 bucket and model key
bucket_name = 'sagemaker-studio-oxs6vznjds'

model_key = 'writing_task_models/coherence/model_1200_roberta_large.onnx'
local_model_path = '/tmp/roberta-large-ft-coh-writing-task-1200.onnx'  # or wherever you want to save temporarily

# Initialize boto3 S3 client
s3 = boto3.client('s3')

# Download the ONNX model from S3 to local path
s3.download_file(bucket_name, model_key, local_model_path)

# Load the ONNX model using onnxruntime
session_coh = ort.InferenceSession(local_model_path)

print("ONNX coherence model loaded successfully.")

ONNX coherence model loaded successfully.


In [4]:
model_key = 'writing_task_models/accuracy/model_1800_quantized_roberta_large.onnx'
local_model_path = '/tmp/roberta-large-ft-acc-writing-task-1800-quant.onnx'  # or wherever you want to save temporarily

# Initialize boto3 S3 client
s3 = boto3.client('s3')

# Download the ONNX model from S3 to local path
s3.download_file(bucket_name, model_key, local_model_path)

# Load the ONNX model using onnxruntime
session_acc = ort.InferenceSession(local_model_path)

print("ONNX accuracy model loaded successfully.")

ONNX accuracy model loaded successfully.


In [5]:
from transformers import AutoTokenizer
tokenizer = AutoTokenizer.from_pretrained("FacebookAI/roberta-large")

In [6]:
max_length = 256  # Ajuste selon la taille maximale de ton modèle
import torch.nn.functional as F # pour softmax
import torch  
import numpy as np

def format_text_inference(ef_level, activity_instructions, student_submission):
    return (
        f"Prompt Level: {ef_level} [SEP] Prompt: {activity_instructions} [SEP] Response: {student_submission}"
    )

def inference(input_json, onnx_model):
    ef_level = int(input_json["ef_level"])
    activity_instructions = input_json["activity_instructions"]
    student_submission = input_json["student_submission"]
    
    formatted_text = format_text_inference(ef_level, activity_instructions, student_submission)
    
    inputs = tokenizer(
        formatted_text, 
        padding=True, 
        truncation=True, 
        max_length=max_length, 
        return_tensors="pt"
    )
    
    input_ids = inputs["input_ids"].cpu().numpy()
    attention_mask = inputs["attention_mask"].cpu().numpy()
    onnx_inputs = {"input_ids": input_ids, "attention_mask": attention_mask}
    
    onnx_outputs = onnx_model.run(None, onnx_inputs)
    logits = onnx_outputs[0]
    
    predicted_class = int(np.argmax(logits, axis=1)[0])
    
    probs = F.softmax(torch.tensor(logits), dim=1).numpy().squeeze()
    predicted_prob = float(probs[predicted_class])
    
    mapped_score = map_score_linear(predicted_class)
    
    output = {
        "cefr_scoring": predicted_class,
        "cefr_scoring_100": mapped_score,
        "predicted_probability": round(predicted_prob, 2),
        "scorer": {
            "version": "roberta_large_onnx_scorer",
            "release": "0.1"
        }
    }
    return output

def map_score_linear(score):
    evp_to_score = {
        0: 17,
        1: 33,
        2: 50,
        3: 67,
        4: 83,
        5: 100,
    }
    return evp_to_score.get(score)


In [7]:
example_input = {
    "ef_level": 10,
    "activity_instructions": "Read the email from your manager. Then respond with an email that has several ideas to help her solve the budget problem. Type in the input box. Write between 80 and 100 words. Use your own words where possible. ",
    "student_submission": "Response: Hi Carla,\n\nThe financial report was shocking. We have a budget crisis and I have a list of options how to deal with this crisis on a long-team basis. \n\n-First I would recommend that we would cut down everyone’s working hours. The company would save about $10000 per worker each year. \n-Secondly we should think about offering older workers a large retirement bonus if they accept our resignation package. If we lay off senior workers we could save about $300 000 every year.\n-Thirdly I would also recommend updating our offices to present-day. We have many offices which are too huge and expensive and old-fashioned. If we move office space to another location we could save money in rent. By changing location we could possibly save about $10000"
}

result_acc = inference(example_input, onnx_model=session_acc)
result_coh = inference(example_input, onnx_model=session_coh)

print(result_acc)
print(result_coh)

{'cefr_scoring': 4, 'cefr_scoring_100': 83, 'predicted_probability': 0.66, 'scorer': {'version': 'roberta_large_onnx_scorer', 'release': '0.1'}}
{'cefr_scoring': 4, 'cefr_scoring_100': 83, 'predicted_probability': 0.94, 'scorer': {'version': 'roberta_large_onnx_scorer', 'release': '0.1'}}


## Demo

In [8]:
import gradio as gr
import numpy as np
from scipy.special import softmax

In [24]:
def format_text_inference(ef_level, activity_instructions, student_submission):
    return (
        f"Prompt Level: {ef_level} [SEP] Prompt: {activity_instructions} [SEP] Response: {student_submission}"
    )

def inference(input_json, onnx_model):
    ef_level = int(float(input_json["ef_level"]))
    cefr_level = input_json["cefr_level"]
    activity_instructions = input_json["activity_instructions"]
    student_submission = input_json["student_submission"]

    formatted_text = format_text_inference(ef_level, activity_instructions, student_submission)

    inputs = tokenizer(
        formatted_text,
        padding=True,
        truncation=True,
        max_length=max_length,
        return_tensors="pt"
    )

    input_ids = inputs["input_ids"].cpu().numpy()
    attention_mask = inputs["attention_mask"].cpu().numpy()
    onnx_inputs = {"input_ids": input_ids, "attention_mask": attention_mask}

    logits = onnx_model.run(None, onnx_inputs)[0]
    probs = F.softmax(torch.tensor(logits), dim=1).numpy().squeeze()

    predicted_class = int(np.argmax(probs))
    mapped_score = map_score_linear(predicted_class)

    return {
        "cefr_scoring": predicted_class,
        "score_100": mapped_score
    }

def map_score_linear(score):
    evp_to_score = {
        0: 17,
        1: 33,
        2: 50,
        3: 67,
        4: 83,
        5: 100,
    }
    return evp_to_score.get(score)


In [28]:
evp_to_score = {
    "A1": 17,
    "A2": 33,
    "B1": 50,
    "B2": 67,
    "C1": 83,
    "C2": 100,
}

# Corresponding colors for CEFR bands
cefr_colors = {
    "A1": "#F44336",  # red
    "A2": "#FF9800",  # orange
    "B1": "#FFEB3B",  # yellow
    "B2": "#CDDC39",  # lime
    "C1": "#4CAF50",  # green
    "C2": "#2E7D32",  # dark green
}

def score_to_cefr(score):
    # Find the CEFR level closest to the score
    for level, thresh in evp_to_score.items():
        if score <= thresh:
            return level
    return "C2"


def predict(ef_level, cefr_level, prompt, response):
    input_json = {
        "ef_level": ef_level,
        "cefr_level": cefr_level,
        "activity_instructions": prompt,
        "student_submission": response
    }

    acc_result = inference(input_json, session_acc)
    coh_result = inference(input_json, session_coh)

    acc_level = score_to_cefr(acc_result["score_100"])
    coh_level = score_to_cefr(coh_result["score_100"])

    # Return slider values + colored labels
    acc_text = f"**Accuracy Score:** {acc_result['score_100']} ({acc_level})"
    coh_text = f"**Coherence Score:** {coh_result['score_100']} ({coh_level})"

    return acc_result['score_100'], coh_result['score_100'], acc_text, coh_text


In [29]:
import pandas as pd
df_sample_data = pd.read_csv("/home/ec2-user/workspace/llm_fine_tuning/slm/writing_task_class_ai_acc/data/acc_data.csv", nrows=300)
# Filtrer les lignes où ef_level n'est pas null
df_sample_data = df_sample_data[df_sample_data["ef_level"].notnull()]
print(df_sample_data.shape)

def get_random_example():
    ligne_random = df_sample_data.sample(1).iloc[0]
    return ligne_random["ef_level"], ligne_random["cefr_level"],  ligne_random["activity_instructions"], ligne_random["student_submission"]

get_random_example()

(174, 13)


(10.0,
 'B2',
 'Your manager sent you an email, asking you to do the company update. In your own words, write an outline for your presentation. Your outline should include an introduction, the main points, a summary and next steps. Type in the input box. Write 80-120 words. Use your own words where possible.',
 'Outline for Company Update Presentation\n\n1. Introduction\n\nWelcome the team and thank everyone for attending.\n\nBriefly explain the purpose of the update: to share key achievements, challenges, and upcoming priorities.\n\n\n2. Main Points\n\nPerformance Highlights: Review recent successes, including metrics and milestones achieved.\n\nCurrent Challenges: Discuss any obstacles and how the team is addressing them.\n\nUpcoming Projects: Outline key initiatives and goals for the next quarter.\n\n\n3. Summary\n\nRecap the key takeaways: progress made, challenges faced, and priorities ahead.\n\n\n4. Next Steps\n\nShare specific actions for the team, including deadlines and collab

In [30]:
with gr.Blocks() as demo:
    gr.Markdown("## 🧠 ONNX Scorer (Accuracy & Coherence)\n"
                "Enter the prompt level, prompt text, and student response. "
                "Use Random Example to fill inputs automatically.")

    with gr.Row():
        ef_level = gr.Textbox(label="Prompt Level", placeholder="e.g. 2", max_lines=1)
        cefr_level = gr.Textbox(label="Prompt CEFR Level", placeholder="e.g. A1", max_lines=1)
        prompt = gr.Textbox(label="Prompt", placeholder="Enter the prompt text", lines=2)
        response = gr.Textbox(label="Response", placeholder="Enter the student response", lines=2)

    with gr.Row():
        example_btn = gr.Button("🎲 Random Example")
        reset_btn = gr.Button("♻️ Reset")

    predict_btn = gr.Button("Predict")

    with gr.Row():
        score_acc = gr.Slider(label="Accuracy Score", minimum=0, maximum=100, interactive=False)
        score_coh = gr.Slider(label="Coherence Score", minimum=0, maximum=100, interactive=False)

    # Colored CEFR label as Markdown
    acc_label = gr.Markdown("")
    coh_label = gr.Markdown("")

    example_btn.click(fn=get_random_example, outputs=[ef_level, cefr_level, prompt, response])
    reset_btn.click(fn=lambda: ("", "", "", ""), outputs=[ef_level, cefr_level,  prompt, response])
    predict_btn.click(fn=predict, 
                      inputs=[ef_level, cefr_level, prompt, response], 
                      outputs=[score_acc, score_coh, acc_label, coh_label])

demo.launch(share=True)


* Running on local URL:  http://127.0.0.1:7864
* Running on public URL: https://9eda6bdd0d4aad090a.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


