In [None]:
from datasets import load_dataset
from transformers import AutoTokenizer, AutoModelForQuestionAnswering, MarianTokenizer, MarianMTModel
import torch, random
import numpy as np



In [None]:
# loading the question answer model
qa_model_name = "deepset/roberta-base-squad2"
qa_tokenizer = AutoTokenizer.from_pretrained(qa_model_name, use_fast=True)
qa_model = AutoModelForQuestionAnswering.from_pretrained(qa_model_name).to(device)
print("Loaded QA model:", qa_model_name)


In [None]:
# load the translation model
trans_model_name = "Helsinki-NLP/opus-mt-en-fr"
trans_tokenizer = MarianTokenizer.from_pretrained(trans_model_name)
trans_model = MarianMTModel.from_pretrained(trans_model_name).to(device)
print("Loaded translation model:", trans_model_name)


In [None]:
# Function to extract an answer from the given context
def answer_question(question, context, max_length=512):
    inputs = qa_tokenizer(question, context, return_tensors="pt", truncation=True, max_length=max_length)
    with torch.no_grad():
        outputs = qa_model(**inputs)
    start_logits = outputs.start_logits[0].numpy()
    end_logits = outputs.end_logits[0].numpy()

    start_index = int(np.argmax(start_logits))
    end_index = int(np.argmax(end_logits))

    if start_index > end_index:
        return ""

    input_ids = inputs["input_ids"][0].numpy().tolist()
    answer = qa_tokenizer.decode(input_ids[start_index:end_index+1], skip_special_tokens=True).strip()
    return answer


In [None]:
# Function to translate text from English to French
def translate_to_french(text):
    if not text.strip():
        return ""
    inputs = trans_tokenizer([text], return_tensors="pt", padding=True, truncation=True)
    with torch.no_grad():
        translated = trans_model.generate(**inputs)
    return trans_tokenizer.decode(translated[0], skip_special_tokens=True)


In [None]:

def english_question_to_french_answer(question, context):
    english_answer = answer_question(question, context)
    if english_answer == "":
        return {"english_answer": "", "french_answer": "No clear answer found."}
    french_answer = translate_to_french(english_answer)
    return {"english_answer": english_answer, "french_answer": french_answer}


In [None]:
# trying on an example
context = (
    "Apple Inc. was founded by Steve Jobs, Steve Wozniak, and Ronald Wayne in April 1976. "
    "It became one of the biggest technology companies in the world."
)
question = "Who founded Apple Inc.?"

result = english_question_to_french_answer(question, context)
print("Question:", question)
print("English Answer:", result["english_answer"])
print("French Answer:", result["french_answer"])


In [None]:
# Now trying random samplse on the dataset
dataset = load_dataset("ag_news", split="train")

def get_random_contexts(n=3):
    return [random.choice(dataset)["text"] for _ in range(n)]

contexts = get_random_contexts(3)
for ctx in contexts:
    question = "What is this text about?"
    result = english_question_to_french_answer(question, ctx)
    print("Context:", ctx)
    print("Question:", question)
    print("English Answer:", result["english_answer"])
    print("French Answer:", result["french_answer"])
    print("-" * 100)
