<a href="https://colab.research.google.com/github/pythonWolf59/smart-research-companion/blob/dev/Research_Assistant_AI.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Install required libraries
!pip install openai -q  gradio PyMuPDF feedparser fpdf duckduckgo_search arxiv

In [1]:
import os
import gradio as gr
import fitz  # PyMuPDF
from openai import OpenAI
from duckduckgo_search import DDGS
from fpdf import FPDF
from datetime import datetime
from arxiv import Search
import re

# --- Get Groq API key ---

from google.colab import userdata
groq_api_key = userdata.get('OPENAI_API_KEY')

client = OpenAI(
    base_url="https://api.groq.com/openai/v1",
    api_key=groq_api_key
)

# --- PDF Text Extraction ---
def extract_text_from_pdf(pdf_file):
    with open(pdf_file.name, "rb") as f:
        doc = fitz.open(stream=f.read(), filetype="pdf")
    text = ""
    for page in doc:
        text += page.get_text()
    return text

# --- Chunk and Summarize ---
def chunk_text(text, max_tokens=3000):
    paragraphs = text.split("\n")
    chunks, chunk = [], ""
    for para in paragraphs:
        if len(chunk) + len(para) < max_tokens:
            chunk += para + "\n"
        else:
            chunks.append(chunk)
            chunk = para + "\n"
    if chunk:
        chunks.append(chunk)
    return chunks

def summarize_with_groq(text):
    chunks = chunk_text(text)
    summaries = []
    for i, chunk in enumerate(chunks):
        response = client.chat.completions.create(
            model="meta-llama/llama-4-scout-17b-16e-instruct",
            messages=[
                {"role": "user", "content": f"Summarize the following research content:\n{chunk}"}
            ]
        )
        summaries.append(response.choices[0].message.content.strip())
    full_summary = "\n\n".join(summaries)
    return full_summary

# --- ArXiv Search and Citation ---
def search_arxiv(query):
    results = arxiv.Search(query=query, max_results=5, sort_by=arxiv.SortCriterion.Relevance)
    papers, citations = [], []
    for result in results.results():
        paper_info = f"**{result.title}**\nURL: {result.entry_id}\nSummary: {result.summary[:500]}..."
        papers.append(paper_info)
        citations.append(generate_citation(result))
    return "\n\n".join(papers), "\n\n".join(citations)

def generate_citation(result):
    year = result.published.year
    authors = result.authors[0].name + (" et al." if len(result.authors) > 1 else "")
    apa = f"{authors} ({year}). {result.title}. {result.entry_id}"
    bibtex = f"@article{{result.entry_id.split('/')[-1]}},\n  title={{ {result.title} }},\n  author={{ {' and '.join([a.name for a in result.authors])} }},\n  journal={{arXiv preprint}},\n  year={{ {year} }},\n  url={{ {result.entry_id} }}\n}}"
    return f"APA:\n{apa}\n\nBibTeX:\n{bibtex}"

# --- Export Helpers ---
def export_to_pdf(content):
    pdf = FPDF()
    pdf.add_page()
    pdf.set_font("Arial", size=12)
    for line in content.split("\n"):
        pdf.multi_cell(0, 10, line)
    filename = f"summary_{datetime.datetime.now().strftime('%Y%m%d%H%M%S')}.pdf"
    pdf.output(filename)
    return filename

def export_to_txt(content):
    filename = f"citation_{datetime.datetime.now().strftime('%Y%m%d%H%M%S')}.txt"
    with open(filename, "w") as f:
        f.write(content)
    return filename

# --- Gradio Interface ---
def handle_pdf(pdf_file):
    text = extract_text_from_pdf(pdf_file)
    summary = summarize_with_groq(text)
    return summary

def handle_prompt(prompt):
    papers, citations = search_arxiv(prompt.replace(" ", "+"))
    return papers, citations

def launch_app():
    with gr.Blocks(theme=gr.themes.Soft()) as demo:
        gr.Markdown("""# 🧠 Research Assistant AI\nUpload PDF or Search Research Papers""")

        with gr.Tab("📄 Upload PDF"):
            with gr.Row():
                pdf_input = gr.File(label="Upload PDF", file_types=[".pdf"])
                summarize_btn = gr.Button("Summarize")
            summary_output = gr.Textbox(label="Summary", lines=20)
            with gr.Row():
                export_pdf = gr.Button("Export Summary as PDF")
                export_cite = gr.Button("Export Citations as TXT")

        with gr.Tab("🔍 Search Papers"):
            with gr.Row():
                prompt_input = gr.Textbox(label="Enter your research topic")
                search_btn = gr.Button("Search")
            paper_output = gr.Textbox(label="Papers", lines=10)
            citation_output = gr.Textbox(label="Citations", lines=10)
            with gr.Row():
                export_paper_pdf = gr.Button("Export Papers as PDF")
                export_paper_cite = gr.Button("Export Citations as TXT")

        summarize_btn.click(fn=handle_pdf, inputs=pdf_input, outputs=summary_output)
        search_btn.click(fn=handle_prompt, inputs=prompt_input, outputs=[paper_output, citation_output])

        export_pdf.click(fn=export_to_pdf, inputs=summary_output, outputs=gr.File())
        export_cite.click(fn=export_to_txt, inputs=citation_output, outputs=gr.File())

        export_paper_pdf.click(fn=export_to_pdf, inputs=paper_output, outputs=gr.File())
        export_paper_cite.click(fn=export_to_txt, inputs=citation_output, outputs=gr.File())

    demo.launch(debug=True)

launch_app()


ModuleNotFoundError: No module named 'fitz'