In [2]:
#@title Research Analyzer
#@markdown This is part of the reasearch analyzer project

In [None]:
#@title Installing dependancies
%%capture
!pip install marker-pdf
!pip install gradio
!pip install scidownl
!pip install langchain

from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_core.prompts import ChatPromptTemplate

from prompts import system_prompt_1, system_prompt_2

import re
import os 
from IPython.display import Markdown

from langchain_core.prompts import ChatPromptTemplate
from langchain_groq import ChatGroq



# setup google gemini api keys
import os
from dotenv import load_dotenv
import platform

if platform.system() == "Windows":
    load_dotenv()
    GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")
    GROQ_API_KEY = os.getenv("GROQ_API_KEY")
elif platform.system() == "Linux":
    from google.colab import userdata
    GOOGLE_API_KEY = userdata.get('GOOGLE_AI_STUDIO2')
    GROQ_API_KEY = userdata.get('GROQ_API_KEY')
else:
    raise OSError("Unsupported operating system")


pro = ChatGoogleGenerativeAI(model="gemini-1.5-pro", google_api_key=GOOGLE_API_KEY, temperature=0.4, convert_system_message_to_human=True)
flash = ChatGoogleGenerativeAI(model="gemini-1.5-flash", google_api_key=GOOGLE_API_KEY, temperature=0.3, convert_system_message_to_human=True)

llm = ChatGroq(
    temperature=0,
    # model_name="mixtral-8x7b-32768",
    # model_name="llama3-70b-8192",
    # model_name="llama-3.1-70b-versatile",
    # model_name="mixtral-8x7b-32768",
    # model_name="llama3-8b-8192",
    model_name="llama-3.1-8b-instant",
    # model_name="gemma2-9b-it",
    
    groq_api_key=GROQ_API_KEY 
)

In [None]:
#@title setup Gradio APP

import os
import sys

# Change the working directory to the directory containing your local modules
os.chdir('/path/to/your/local/modules')

# Add the directory to the system path
sys.path.append('/path/to/your/local/modules')

# Now you can import your local modules
import summary  # Assuming 'summary' is the name of your local module

import gradio as gr
import subprocess
import shutil
from scidownl import scihub_download
from google.colab import files

def process_pdf(input_path):
    base_name = os.path.splitext(os.path.basename(input_path))[0]
    output_dir = f"/content/output/{base_name}"

    # Clean and create output directory
    os.makedirs(output_dir, exist_ok=True)

    # Run the marker_single command
    subprocess.run([
        "marker_single", input_path, output_dir,
        "--batch_multiplier", "2",
        "--max_pages", "20",
        "--langs", "English"
    ], capture_output=True, text=True)

    return output_dir

def generate_summary(output_dir, file_path, llm):
    results = summary.summarize(file_path, llm)
    summary_path = os.path.join(output_dir, "summary.md")
    with open(summary_path, "w") as f:
        f.write(results)
    return summary_path

def create_zip(output_dir):
    zip_path = f"{output_dir}.zip"
    shutil.make_archive(output_dir, 'zip', output_dir)
    return zip_path

def download_paper(doi):
    output_path = "/content/paper.pdf"
    scihub_download(doi, paper_type="doi", out=output_path)
    return output_path

def gradio_app(file, doi):
    if doi:
        input_path = download_paper(doi)
    elif file:
        input_path = file.name
        shutil.move(input_path, "/content/paper.pdf")
        input_path = "/content/paper.pdf"
    else:
        return "Please provide a PDF file or a DOI."

    output_dir = process_pdf(input_path)
    summary_path = generate_summary(output_dir, input_path, llm=None)  # Replace `llm=None` with your actual LLM if needed
    zip_path = create_zip(output_dir)
    return [summary_path, zip_path]




In [10]:
#@title Gradio App

iface = gr.Interface(
    fn=gradio_app,
    inputs=[
        gr.File(label="Upload PDF"),
        gr.Textbox(label="Enter DOI", placeholder="10.1234/example.doi")
    ],
    outputs=[
        gr.File(label="Download Summary"),
        gr.File(label="Download Extracted Content")
    ],
    title="PDF Extractor",
    description="Upload a PDF or enter a DOI and download the extracted content and summary."
)

iface.launch(share=True)


Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
Running on public URL: https://711d6debbb98cdb8df.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)


