<a href="https://colab.research.google.com/github/pythonWolf59/smart-research-companion/blob/dev/Research_Assistant_AI.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [24]:
# Install required libraries
!pip install openai -q  gradio PyMuPDF feedparser fpdf duckduckgo_search arxiv requests


In [None]:
import os
import gradio as gr
import fitz  # PyMuPDF
import requests
from openai import OpenAI
from datetime import datetime
from arxiv import Search as ArxivSearch, SortCriterion
from fpdf import FPDF
import arxiv
import requests

# --- Get Groq API key ---
from google.colab import userdata
groq_api_key = userdata.get('OPENAI_API_KEY')

client = OpenAI(
    base_url="https://api.groq.com/openai/v1",
    api_key=groq_api_key
)

# --- PDF Text Extraction ---
def extract_text_from_pdf(pdf_file):
    with open(pdf_file.name, "rb") as f:
        doc = fitz.open(stream=f.read(), filetype="pdf")
    text = ""
    for page in doc:
        text += page.get_text()
    return text

# --- Chunk and Summarize ---
def chunk_text(text, max_tokens=3000):
    paragraphs = text.split("\n")
    chunks, chunk = [], ""
    for para in paragraphs:
        if len(chunk) + len(para) < max_tokens:
            chunk += para + "\n"
        else:
            chunks.append(chunk)
            chunk = para + "\n"
    if chunk:
        chunks.append(chunk)
    return chunks

# Summarize with AI

def summarize_with_groq(text):
    chunks = chunk_text(text)
    summaries = []

    for chunk in chunks:
        response = client.chat.completions.create(
            model="meta-llama/llama-4-scout-17b-16e-instruct",
            messages=[
                {
                    "role": "user",
                    "content": (
                        "You are an AI assistant helping with academic research.\n"
                        "Summarize the following research abstract in a clean, readable format with:\n"
                        "- **Bold section titles** like Introduction, Method, Results (if applicable)\n"
                        "- Bullet points for key insights\n"
                        "- Paragraph breaks\n"
                        "- Markdown format output\n\n"
                        f"{chunk}"
                    )
                }
            ]
        )
        summaries.append(response.choices[0].message.content.strip())

    return "\n\n---\n\n".join(summaries)

# Search Papers

# --- Multi-source search & caching ---
cached_abstracts = {}   # URL → abstract
title_to_url = {}       # Title → URL

def search_all_sources(query):
    global cached_abstracts, title_to_url
    cached_abstracts = {}
    title_to_url = {}
    lines = []

    # arXiv
    arxiv_search = ArxivSearch(query=query, max_results=25, sort_by=SortCriterion.Relevance)
    arxiv_client = ArxivSearch.client if hasattr(ArxivSearch, "client") else None
    for res in ArxivSearch(query=query, max_results=25, sort_by=SortCriterion.Relevance).results():
        title, url, abs = res.title.strip(), res.entry_id, res.summary.strip()
        lines.append(f"🔗 [arXiv] <a href='{url}' target='_blank'>{title}</a>")
        title_to_url[title] = url
        cached_abstracts[url] = abs

    # Semantic Scholar
    ss = requests.get(
        "https://api.semanticscholar.org/graph/v1/paper/search",
        params={"query": query, "limit": 25, "fields": "title,url,abstract"}
    ).json().get("data", [])
    for p in ss:
        title, url, abs = p.get("title"), p.get("url"), p.get("abstract", "")
        if title and url:
            lines.append(f"🔗 [SemScholar] <a href='{url}' target='_blank'>{title}</a>")
            title_to_url[title] = url
            cached_abstracts[url] = abs

    # CORE
    core = requests.get(
        "https://api.core.ac.uk/v3/search/works",
        params={"q": query, "limit": 25}
    ).json().get("data", [])
    for p in core:
        title, url, abs = p.get("title"), p.get("id"), p.get("abstract", "")
        if title and url:
            lines.append(f"🔗 [CORE] <a href='{url}' target='_blank'>{title}</a>")
            title_to_url[title] = url
            cached_abstracts[url] = abs

    # PubMed
    pm = requests.get(
        "https://api.ncbi.nlm.nih.gov/lit/ctxp/v1/pmc/?format=json&tool=mytool&email=me@example.com&term=" + query
    ).json().get("records", [])[:25]
    for p in pm:
        title = p.get("title")
        url = p.get("pmcid") and f"https://www.ncbi.nlm.nih.gov/pmc/{p['pmcid']}"
        abs = p.get("abstractText", "")
        if title and url:
            lines.append(f"🔗 [PubMed] <a href='{url}' target='_blank'>{title}</a>")
            title_to_url[title] = url
            cached_abstracts[url] = abs

    return "\n".join(lines), gr.update(choices=list(title_to_url.keys()), value=None)

def fetch_summary(title):
    global title_to_url, cached_abstracts
    url = title_to_url.get(title)
    abs_text = cached_abstracts.get(url, "")
    if not abs_text:
        return "No abstract found for this paper."
    return summarize_with_groq(abs_text)

# --- PDF Summarization ---
def handle_pdf(pdf_file): return summarize_with_groq(extract_text_from_pdf(pdf_file))

# --- Gradio Interface ---
def handle_pdf(pdf_file):
    text = extract_text_from_pdf(pdf_file)
    return summarize_with_groq(text)

def launch_app():
    with gr.Blocks(theme=gr.themes.Soft()) as demo:
        gr.Markdown("# 🧠 Research Assistant AI\nUpload PDF or Search Research Papers")

        with gr.Tab("📄 Upload PDF"):
            with gr.Row():
                pdf_input = gr.File(label="Upload PDF", file_types=[".pdf"])
                summarize_btn = gr.Button("Summarize")
            summary_output = gr.Textbox(label="Summary", lines=20)

        with gr.Tab("🔍 Search Papers"):
            with gr.Row():
                prompt_input = gr.Textbox(label="Enter your research topic")
                search_btn = gr.Button("Search")
            paper_list = gr.HTML(label="Search Results")
            paper_dropdown = gr.Dropdown(label="Select Paper URL to Summarize", choices=[])
            paper_summary = gr.Textbox(label="Paper Summary", lines=15)

        summarize_btn.click(fn=handle_pdf, inputs=pdf_input, outputs=summary_output)
        search_btn.click(fn=search_all_sources, inputs=prompt_input, outputs=[paper_list, paper_dropdown])
        paper_dropdown.change(fn=fetch_summary, inputs=paper_dropdown, outputs=paper_summary)

    demo.launch(debug=True)

launch_app()