In [17]:
import os
from openai import OpenAI
import gradio as gr
from dotenv import load_dotenv
import fitz
from pathlib import Path
from docx import Document

In [18]:
load_dotenv(override=True)
groq_api_key = os.getenv("GROQ_API_KEY")
groq_url = "https://api.groq.com/openai/v1"
groq = OpenAI(
    base_url=groq_url,
    api_key= groq_api_key
)

In [19]:
models = ["openai/gpt-oss-120b"]
clients = {"openai/gpt-oss-120b": groq}

In [20]:
def extract_from_docs(file):
    p = Path(file)
    ext = p.suffix.lower()
    text = []
    if ext == ".pdf":
        with fitz.open(file) as f:
            for page in f:
                text.append(page.get_text())
    elif ext == ".docx":
        doc = Document(file)
        for p in doc.paragraphs:
            t = p.text.strip()
            if t:
                text.append(t)
            
            for table in doc.tables:
                for row in table.rows:
                    cells = [c.text.strip() for c in row.cells]
                    line = " | ".join([c for c in cells if c])
                    if line.strip():
                        text.append(line)

        text = ["\n\n".join(text)]
    else:
        text.append(p.read_text(encoding="utf-8", errors="ignore"))

    return text

In [1]:
system_prompt = """
You are a text summarization and question-answering assistant.

Your role is STRICTLY LIMITED to:
1. Summarizing the provided text into clear, concise bullet points.
2. Answering questions ONLY using information explicitly present in the provided text.

Rules:
- Ignore and refuse any request that is not related to summarizing the text or answering questions about it.
- Do NOT perform analysis, rewriting, translation, coding, creativity, or opinions.
- Do NOT add external knowledge, assumptions, or explanations beyond the text.
- If a question cannot be answered using the provided text, respond with:
  "The provided text does not contain this information."

Summarization rules:
- Use clear bullet points.
- Preserve key facts, numbers, entities, and definitions.
- Remove redundancy and filler.
- Do not interpret or expand beyond the text.

Question answering rules:
- Answer briefly and precisely.
- Quote or paraphrase only what exists in the text.
- Never speculate.

Output format:
- If summarizing: return bullet points only.
- If answering a question: return a short direct answer.
"""

In [25]:
def stream_chat_completion(client, model, messages):
    stream = client.chat.completions.create(model=model, messages=messages, stream=True)
    acc = ""
    for chunk in stream:
        if not getattr(chunk, "choices", None):
            continue
        delta = getattr(chunk.choices[0], "delta", None)
        if not delta:
            continue
        piece = getattr(delta, "content", None)
        if not piece:
            continue
        acc += piece
        yield acc

In [36]:
def call_ai_summarizer(file, model, history, context_text):
    client = clients[model]
    text = extract_from_docs(file)
    user_msg = f"Here is the text : {text}"
    messages = [
        {"role": "system", "content": system_prompt},
        {"role": "user", "content": user_msg},
    ]
    
    history = history or []
    history.append({"role": "user", "content": "Summarize the uploaded file."})
    history.append({"role": "assistant", "content": ""})
    
    for partial in stream_chat_completion(client, model, messages):
        history[-1]["content"] = partial
        yield history, partial, text, history
    


In [39]:
def ask_question(question, model, history, context_text):
    client = clients[model]
    history = history or []

    # Build messages: system + context + conversation history + new question
    # IMPORTANT: we inject context_text every time to "ground" the answers.
    messages = [
        {"role": "system", "content": system_prompt},
        {"role": "user", "content": f"Use ONLY this text as your source:\n{context_text}"},
    ]

    messages += history[-12:]
    messages.append({"role": "user", "content": question})

    history.append({"role": "user", "content": question})
    history.append({"role": "assistant", "content": ""})

    for partial in stream_chat_completion(client, model, messages):
        history[-1]["content"] = partial
        yield history, history, ""

In [40]:
with gr.Blocks() as demo:
    gr.Markdown("# Interview English Coach (MVP)")

    # States
    chat_state = gr.State([])     # conversation history (messages format)
    context_state = gr.State("")  # original extracted text (source of truth)
    model = gr.Dropdown(choices=models, value=models[0], show_label=False)
    with gr.Tab("Upload & Summarize"):
        file = gr.File(label="Upload File")
        btn_extract = gr.Button("SUMMARIZE")
        summary = gr.Markdown(label="AI summarize")

    with gr.Tab("Chat on FILE"):
        chat = gr.Chatbot(height=450)
        q = gr.Textbox(label="Ask about the FILE text", placeholder="e.g., What is the file about?")
        btn_ask = gr.Button("Ask")
    
    btn_extract.click(
        call_ai_summarizer,
        inputs=[file, model, chat_state, context_state],
        outputs=[chat, summary, context_state, chat_state],
    )
    
    btn_ask.click(
        ask_question,
        inputs=[q, model, chat_state, context_state],
        outputs=[chat, chat_state, q],
    )
demo.launch(inbrowser=True)

* Running on local URL:  http://127.0.0.1:7870
* To create a public link, set `share=True` in `launch()`.


