In [2]:
#@title Research Analyzer
#@markdown This is part of the reasearch analyzer project

In [None]:
#@title Installing dependancies
%%capture
!pip install marker-pdf
!pip install gradio
!pip install scidownl
!pip install langchain
!pip install langchain-google-genai
!pip install langchain-groq
!git clone https://github.com/mohamedfadlalla/Research-Analyzer.git


In [5]:
#@title Setup
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_core.prompts import ChatPromptTemplate

import re
import os 
from IPython.display import Markdown

from langchain_core.prompts import ChatPromptTemplate
from langchain_groq import ChatGroq



# setup google gemini api keys
import os
from dotenv import load_dotenv
import platform

if platform.system() == "Windows":
    load_dotenv()
    GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")
    GROQ_API_KEY = os.getenv("GROQ_API_KEY")
elif platform.system() == "Linux":
    import sys

    sys.path.append('/content/Research-Analyzer')
    os.chdir('/content/Research-Analyzer')
    from google.colab import userdata
    GOOGLE_API_KEY = userdata.get('GOOGLE_AI_STUDIO2')
    GROQ_API_KEY = userdata.get('GROQ_API_KEY')
else:
    raise OSError("Unsupported operating system")


pro = ChatGoogleGenerativeAI(model="gemini-1.5-pro", google_api_key=GOOGLE_API_KEY, temperature=0.4, convert_system_message_to_human=True)
flash = ChatGoogleGenerativeAI(model="gemini-1.5-flash", google_api_key=GOOGLE_API_KEY, temperature=0.3, convert_system_message_to_human=True)

llm = ChatGroq(
    temperature=0,
    # model_name="mixtral-8x7b-32768",
    # model_name="llama3-70b-8192",
    model_name="llama-3.1-70b-versatile",
    # model_name="mixtral-8x7b-32768",
    # model_name="llama3-8b-8192",
    # model_name="llama-3.1-8b-instant",
    # model_name="gemma2-9b-it",
    
    groq_api_key=GROQ_API_KEY 
)

import gradio as gr
import summary  # Make sure the 'summary' module is in your Python path
import functions  # Make sure the 'functions' module is in your Python path


# Define the function to summarize the file, generate the workflow graph, and save the results to 'summary.md'
def process_file(file_path):
    if file_path is None:
        return "No file uploaded", None, None
    
    results = summary.summarize(file_path, llm)
    
    # Save the results to summary.md
    with open("summary.md", "w") as f:
        f.write(results)
    
    # Generate the workflow graph
    functions.get_workflow(file_path, llm)
    
    return results, "summary.md", "graph.png"




In [None]:
#@title setup Gradio APP

import gradio as gr
import subprocess
import shutil
import os
from scidownl import scihub_download
from google.colab import files

def process_pdf(input_path):
    base_name = os.path.splitext(os.path.basename(input_path))[0]
    output_dir = f"/content/output/{base_name}"

    # Clean and create output directory
    os.makedirs(output_dir, exist_ok=True)

    # Run the marker_single command
    subprocess.run([
        "marker_single", input_path, output_dir,
        "--batch_multiplier", "2",
        "--max_pages", "20",
        "--langs", "English"
    ], capture_output=True, text=True)

    # Create zip file of the extracted content
    zip_path = f"{output_dir}.zip"
    shutil.make_archive(output_dir, 'zip', output_dir)
    summary, summary_file, image = process_file(file)

    return summary, summary_file, image

def download_paper(doi):
    output_path = "/content/paper.pdf"
    scihub_download(doi, paper_type="doi", out=output_path)
    return output_path

def gradio_app(file, doi):
    if doi:
        input_path = download_paper(doi)
    elif file:
        input_path = file.name
        shutil.move(input_path, "/content/paper.pdf")
        input_path = "/content/paper.pdf"
    else:
        return "Please provide a PDF file or a DOI."

    zip_path = process_pdf(input_path)
    markdown_path = '/content/output/paper/paper/paper.md'
    print('started analysis, and finished extraction')
    summary, summary_file, graph = process_file(markdown_path)
    
    return summary, summary_file, graph


#@title Gradio App

iface = gr.Interface(
    fn=gradio_app,
    inputs=[
        gr.File(label="Upload PDF"),
        gr.Textbox(label="Enter DOI", placeholder="10.1234/example.doi")
    ],
    outputs=gr.File(label="Download Extracted Content"),
    title="PDF Extractor",
    description="Upload a PDF or enter a DOI and download the extracted content in a zip file."
)


interface = gr.Interface(fn=gradio_app, inputs=[
        gr.File(label="Upload PDF"),
        gr.Textbox(label="Enter DOI", placeholder="10.1234/example.doi")], 
                         outputs=[gr.Markdown(),gr.File(),gr.Image()])

interface.launch(share=True)
