In [None]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

In [None]:
data_path = "../data/processed/oet_samples_small.csv"

df = pd.read_csv(data_path)
df.head()

In [None]:
texts = df["text"].fillna("").tolist()

vectorizer = TfidfVectorizer()
tfidf_matrix = vectorizer.fit_transform(texts)
tfidf_matrix.shape

In [None]:
def ask_tutor(query, top_k=3):
    query_vec = vectorizer.transform([query])
    sims = cosine_similarity(query_vec, tfidf_matrix)[0]
    top_idx = sims.argsort()[::-1][:top_k]

    results = []
    for idx in top_idx:
        item = {
            "score": float(sims[idx]),
            "text": df.loc[idx, "text"],
            "label": df.loc[idx, "label"],
            "task": df.loc[idx, "task"],
        }
        results.append(item)
    return results

In [None]:
def chat_with_tutor(query, top_k=3):
    matches = ask_tutor(query, top_k=top_k)

    print(f"Student question: {query}")
    print("-" * 72)

    for i, m in enumerate(matches, start=1):
        print(f"Match {i} | similarity: {m['score']:.3f}")
        print(f"Text: {m['text']}")
        print(f"Label: {m['label']} | Task: {m['task']}")
        print("-" * 72)

    return matches

In [None]:
def build_context_snippet(results):
    parts = []
    for i, r in enumerate(results, start=1):
        parts.append(f"Example {i}:")
        parts.append(f"Text: {r['text']}")
        parts.append(f"Label: {r['label']} (task: {r['task']})")
        parts.append("")
    return "\n".join(parts)

In [None]:
from openai import OpenAI

client = OpenAI(api_key="YOUR_KEY_HERE")
client is not None

In [None]:
def generate_tutor_response(query, top_k=3):
    retrieved = ask_tutor(query, top_k=top_k)
    context = build_context_snippet(retrieved)

    system_prompt = (
        "You are Moonlight OET AI Tutor, an expert in OET Reading and Listening. "
        "You give short, clear, encouraging explanations. "
        "Use the examples and their labels (correct / incorrect / partial) to explain "
        "what the student should do and give practical OET tips."
    )

    user_prompt = f"""A student asked this question:

{query}

Here are some labeled OET-style statements from past exercises:

{context}

Using these examples:
1. Answer the student's question.
2. If relevant, explain how the examples show correct vs incorrect or partial answers.
3. Give 1â€“2 practical tips the student can apply in future OET tasks.

Keep it simple, friendly, and OET-focused.
"""

    response = client.chat.completions.create(
        model="gpt-4.1-mini",
        messages=[
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": user_prompt},
        ],
        temperature=0.3,
    )

    answer_text = response.choices[0].message.content
    return answer_text, retrieved

In [None]:
answer, retrieved = generate_tutor_response(
    "How can I avoid losing marks for partial answers in OET listening?",
    top_k=4
)

print(answer)