# preprations

In [1]:
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_core.prompts import ChatPromptTemplate

from prompts import system_prompt_1, system_prompt_2

import re
import os 
from IPython.display import Markdown

from langchain_core.prompts import ChatPromptTemplate
from langchain_groq import ChatGroq



# setup google gemini api keys
import os
from dotenv import load_dotenv
import platform

if platform.system() == "Windows":
    load_dotenv()
    GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")
    GROQ_API_KEY = os.getenv("GROQ_API_KEY")
elif platform.system() == "Linux":
    from google.colab import userdata
    GOOGLE_API_KEY = userdata.get('GOOGLE_AI_STUDIO2')
    GROQ_API_KEY = userdata.get('GROQ_API_KEY')
else:
    raise OSError("Unsupported operating system")


pro = ChatGoogleGenerativeAI(model="gemini-1.5-pro", google_api_key=GOOGLE_API_KEY, temperature=0.4, convert_system_message_to_human=True)
flash = ChatGoogleGenerativeAI(model="gemini-1.5-flash", google_api_key=GOOGLE_API_KEY, temperature=0.3, convert_system_message_to_human=True)

llm = ChatGroq(
    temperature=0,
    # model_name="mixtral-8x7b-32768",
    # model_name="llama3-70b-8192",
    # model_name="llama-3.1-70b-versatile",
    # model_name="mixtral-8x7b-32768",
    # model_name="llama3-8b-8192",
    model_name="llama-3.1-8b-instant",
    # model_name="gemma2-9b-it",
    
    groq_api_key=GROQ_API_KEY 
)

# PDF Acquisition

In [None]:
#@title Depandances
%%capture
!pip3 install -U scidownl
# Langchain and groq
!pip install langchain
!pip install langchain_groq

#installing marker
!pip install marker-pdf

In [4]:
from scidownl import scihub_download
import pandas as pd
import os
from google.colab import files

def download_paper():
    paper = input("Please enter the DOI of the paper here: ")
    paper_type = "doi"
    out = "./paper.pdf"
    # Uncomment and configure the proxies if needed
    # proxies = {
    #     'http': 'socks5://127.0.0.1:7890'
    # }
    scihub_download(paper, paper_type=paper_type, out=out)#, proxies=proxies)

def upload_paper():
    uploaded = files.upload()
    if not list(uploaded.keys())[0].endswith('.pdf'):
        raise ValueError('Uploaded file is not a PDF file.')
    os.rename(list(uploaded.keys())[0], 'paper.pdf')
    print("Paper uploaded successfully as 'paper.pdf'")

def main():
    choice = input("Choose an option (1 for DOI download, 2 for PDF upload): ")
    if choice == '1':
        download_paper()
    elif choice == '2':
        upload_paper()
    else:
        print("Invalid choice. Please enter 1 or 2.")

if __name__ == "__main__":
    main()


KeyboardInterrupt: 

# Markdown Extraction with Marker

In [None]:
#@title extracting markdown
!marker_single /content/paper.pdf /content/output/ --batch_multiplier 2 --max_pages 20 --langs English

# Extracting the Methodology Section
**Objective:** Isolate the methodology section from the segmented markdown for further analysis.
- **Action Items:**
  - From the parsed sections, specifically extract the text under the "Methodology" heading.
  - Handle variations in heading titles like "Methods" or "Materials and Methods."


In [None]:
import re

def extract_outline(file_path):
    outline = []
    with open(file_path, 'r') as file:
        for line in file:
            header_match = re.match(r'^(#+)\s+(.*)', line)
            if header_match:
                level = len(header_match.group(1))
                title = header_match.group(2)
                outline.append((header_match.group(1), title))
    return outline

def print_outline(outline):
    for hashes, title in outline:
        print(hashes + ' ' + title)

file_path = 'manuscript.md'
outline = extract_outline(file_path)
print_outline(outline)


In [1]:
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_core.prompts import ChatPromptTemplate

from prompts import system_prompt_1, system_prompt_2, get_markdown

import re
import os 
from IPython.display import Markdown

from langchain_core.prompts import ChatPromptTemplate
from langchain_groq import ChatGroq



# setup google gemini api keys
from dotenv import load_dotenv
load_dotenv()
GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")

gem1 = ChatGoogleGenerativeAI(model="gemini-1.5-pro", google_api_key=GOOGLE_API_KEY, temperature=0.4, convert_system_message_to_human=True)
flash = ChatGoogleGenerativeAI(model="gemini-1.5-flash", google_api_key=GOOGLE_API_KEY, temperature=0.3, convert_system_message_to_human=True)


GROQ_API_KEY = os.getenv("GROQ_API_KEY")

groq_mx = ChatGroq(
    temperature=0,
    # model_name="mixtral-8x7b-32768",
    # model_name="llama3-70b-8192",
    # model_name="llama-3.1-70b-versatile",
    # model_name="mixtral-8x7b-32768",
    # model_name="llama3-8b-8192",
    model_name="llama-3.1-8b-instant",
    # model_name="gemma2-9b-it",
    
    groq_api_key=GROQ_API_KEY 
)

ImportError: cannot import name 'get_markdown' from 'prompts' (C:\Users\DELL\Desktop\Research Projects\Research-Analyzer\prompts.py)

In [None]:




system = """
###instructions###
You will receive an outline of a scientific paper in the format shown in the expected input. Extract and present only the highest level heading corresponding to the methods section, with no other output.

**Expected Input:**
#abstract
#introduction
#method
##method 1
#results
#conclusion

**Expected Output:**
#method

"""

text = f"""{outline}"""
prompt = ChatPromptTemplate.from_messages([("system", system), ("human", text)])

chain = prompt | chat
response = chain.invoke({})
print(response.content)

# Chapter Two: Materials and Methods


In [None]:
#@title Extracting the method section from the Markdown file
import re

def extract_section(file_path, section_title):
    section_content = []
    inside_section = False
    section_level = None

    with open(file_path, 'r') as file:
        for line in file:
            header_match = re.match(r'^(#+)\s+(.*)', line)
            if header_match:
                level = len(header_match.group(1))
                title = header_match.group(2)

                if inside_section:
                    if level <= section_level:
                        break
                if header_match.group(1) + ' ' + title == section_title:
                    inside_section = True
                    section_level = level

            if inside_section:
                section_content.append(line)

    return ''.join(section_content)


file_path = 'manuscript.md'
section_title = response.content
method = extract_section(file_path, section_title)

## Comperhencive Sammary

In [1]:
from dotenv import load_dotenv
import os
load_dotenv()
GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")


GROQ_API_KEY = os.getenv("GROQ_API_KEY")


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


In [1]:
from langchain_core.prompts import ChatPromptTemplate
from langchain_groq import ChatGroq




In [25]:
from prompts import system_prompt_1, system_prompt_2
from input import method_1, method_2

In [20]:
#@title creation of nodes and edges

chat = ChatGroq(
    temperature=0,
    model_name="mixtral-8x7b-32768",
    
    max_tokens=1024,
    groq_api_key=GROQ_API_KEY  # Optional if not set as an environment variable
)

system = system_prompt_1

text = f"""{method_2}"""
prompt = ChatPromptTemplate.from_messages([("system", system), ("human", text)])

chain = prompt | chat
nande = chain.invoke({})


In [21]:
from prompts import system_prompt_1, system_prompt_2
from input import method_1, method_2

#@title writing the code for the graphviz

chat = ChatGroq(
    temperature=0,
    model_name="gemma2-9b-it",
    max_tokens=1024,
    groq_api_key=GROQ_API_KEY  # Optional if not set as an environment variable
)

system = system_prompt_2

text = f"""{nande.content}"""
prompt = ChatPromptTemplate.from_messages([("system", system), ("human", text)])

chain = prompt | chat
llm_response = chain.invoke({})


In [22]:
#@title extract the code form the response and run it
import re

def extract_code(llm_response):
    # Define the pattern to match the Python code block
    pattern = re.compile(r'```python(.*?)```', re.DOTALL)

    # Search for the code block
    match = pattern.search(llm_response)

    if match:
        # Extract the code
        code = match.group(1).strip()
        return code
    else:
        return "No Python code found in the response."

# Example LLM response
code = extract_code(llm_response.content)
exec(code)

In [26]:
print(system_prompt_2)


### instructions ###
Your job is to write Python code that:
1. Reads the provided list of nodes and edges.
2. Utilizes Graphviz to create a directed graph.
3. Adds nodes to the graph with appropriate labels.
4. Adds edges between the nodes with descriptions of their relationships.
5. Sets aesthetic properties such as layout direction, size, shape, or other visual attributes.
7. Very important to output the results to high quality png image name "graph.png". 




# Comperhencive sammary

In [11]:
llm = ChatGroq(
    temperature=0,
    # model_name="mixtral-8x7b-32768",
    # model_name="llama3-70b-8192",
    # model_name="llama-3.1-70b-versatile",
    # model_name="mixtral-8x7b-32768",
    model_name="llama3-8b-8192",
    # model_name="llama-3.1-8b-instant",
    # model_name="gemma2-9b-it",
    
    groq_api_key=GROQ_API_KEY 
)

In [None]:
llm = ChatGroq(
    temperature=0,
    # model_name="mixtral-8x7b-32768",
    # model_name="llama3-70b-8192",
    # model_name="llama-3.1-70b-versatile",
    # model_name="mixtral-8x7b-32768",
    # model_name="llama3-8b-8192",
    model_name="llama-3.1-8b-instant",
    # model_name="gemma2-9b-it",
    
    groq_api_key=GROQ_API_KEY 
)

In [69]:
## Manuscript to pandas
import summary
import importlib
importlib.reload(summary)

file_path = 'Examples/systematic_review/systematic_review.md'
results = summary.summarize(file_path, llm)
Markdown(results)


##  Background and Objectives   
The COVID-19 pandemic, caused by the SARS-CoV-2 virus, has become a global health crisis. The study aims to investigate the potential of repurposing FDA-approved drugs against SARS-CoV-2 virus using molecular docking and pharmacophore modeling.
##  Methods                     
**Methodology Summary:**

* Design: Systematic search of scientific studies on Google Scholar
* Sample: All studies published by the end of 2020/12/30
* Tools/Techniques: 
  - Keywords: (COVID19 OR SARS-CoV-2) AND (Drug repurposing OR Drug repositioning OR Drug re-profling OR Drug rediscovery) AND (Docking AND Molecular dynamic)
  - Database: Google Scholar
##  Main Findings               
* 1,444 studies on repurposing drugs for COVID-19 were collected from Google Scholar by the end of 2020/12/30.
##  Analysis and Interpretation 
The authors used a systematic search strategy in Google Scholar to collect studies on drug repurposing for COVID-19, using a combination of keywords. The search was limited to studies published by the end of 2020/12/30.

The authors did not mention any specific statistical methods used in the analysis. However, the results of the search strategy are likely to be presented in the form of a list of studies, which may be analyzed using descriptive statistics (e.g., frequency, percentage) to summarize the findings.

The implications of the results are not explicitly stated, but the authors likely aim to provide an overview of the existing literature on drug repurposing for COVID-19, which can inform future research and clinical decisions.
##  Contributions and Novelty   
The paper contributes to the field of COVID-19 research by providing a comprehensive search strategy for identifying studies on repurposing drugs for COVID-19. The novel approach proposed is the use of a combination of keywords in the Google Scholar database to efficiently collect relevant studies.
##  Limitations                 
The study may be limited by the reliance on Google Scholar, which may not be comprehensive in capturing all relevant studies, particularly those published in non-English languages or in non-indexed journals. Additionally, the search strategy may be biased towards studies that used docking and molecular dynamics, potentially overlooking other repurposing methods.
##  Conclusions and Recommendations 
The researchers conclude that antiviral drugs, particularly those targeting the main protease, show promise in the fight against COVID-19. Tetracyclines and antivirals, originally protease inhibitors, are recommended for further investigation. The models developed in this study can be used for virtual screening to aid in finding effective therapeutic agents against COVID-19.
##  Key References and Citations 
Number of citaiton: 108

In [52]:
with open('file.txt', 'w') as f:
    f.write(results)