In [42]:
from dotenv import load_dotenv
import os

load_dotenv()


True

In [None]:
# import sys
# print(sys.executable)    # To check which Python interpreter is being used

In [23]:
import arxiv

def search_arxiv(query: str, top_k: int = 5):
    """
    Search arXiv for a query and return top_k results.
    """
    search = arxiv.Search(
        query=query,
        max_results=top_k,
        sort_by=arxiv.SortCriterion.Relevance
    )

    papers = []
    for result in search.results():
        papers.append({
            "title": result.title,
            "authors": [str(a) for a in result.authors],
            "published": result.published,
            "summary": result.summary,
            "pdf_url": result.pdf_url,
        })
    return papers

if __name__ == "__main__":
    query = "machine learning in healthcare"
    papers = search_arxiv(query, top_k=3)
    for p in papers:
        print(papers.index(p)+1)
        print(f"PDF URL: {p['pdf_url']}")
        print(f"Title: {p['title']}")
        print(f"Authors: {' '.join(p['authors'])}")
        print(f"Published: {p['published']}")
        print(f"Summary: {p['summary']}")
        print('*****************')

  for result in search.results():


1
PDF URL: https://arxiv.org/pdf/2303.15563v1
Title: Privacy-preserving machine learning for healthcare: open challenges and future perspectives
Authors: Alejandro Guerra-Manzanares L. Julian Lechuga Lopez Michail Maniatakos Farah E. Shamout
Published: 2023-03-27 19:20:51+00:00
Summary: Machine Learning (ML) has recently shown tremendous success in modeling various healthcare prediction tasks, ranging from disease diagnosis and prognosis to patient treatment. Due to the sensitive nature of medical data, privacy must be considered along the entire ML pipeline, from model training to inference. In this paper, we conduct a review of recent literature concerning Privacy-Preserving Machine Learning (PPML) for healthcare. We primarily focus on privacy-preserving training and inference-as-a-service, and perform a comprehensive review of existing trends, identify challenges, and discuss opportunities for future research directions. The aim of this review is to guide the development of private 

In [34]:
idx=int(input("Enter the paper number to ask any question about:"))
qa_paper=papers[idx-1]['pdf_url']

In [None]:
input_paper=qa_paper+'.pdf'

In [40]:
input_paper

'https://arxiv.org/pdf/2303.15563v1'

In [38]:
from langchain_community.document_loaders import PyPDFLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import FAISS
from langchain_community.embeddings import HuggingFaceEmbeddings

# Step 1: Load PDF
loader = PyPDFLoader(input_paper)
docs = loader.load()

# Step 2: Split into chunks
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
documents = text_splitter.split_documents(docs)

# Step 3: Create embeddings
embedding_model = HuggingFaceEmbeddings()


  embedding_model = HuggingFaceEmbeddings()


In [3]:
# Step 4: Build vector database
vectordb1 = FAISS.from_documents(documents, embedding_model)

# Step 5: Create retriever
retriever1 = vectordb1.as_retriever()

In [4]:
from langchain_core.prompts import ChatPromptTemplate

prompt=ChatPromptTemplate.from_template(
    """
you are a expert in research paper analysis. Answer the following question based only on the provided context:
<context>
{context}
</context>
"""
)


In [43]:
import os
import certifi
os.environ['SSL_CERT_FILE'] = certifi.where()

In [44]:
from langchain_google_genai import GoogleGenerativeAI

In [45]:
# from getpass import getpass

# api_key = getpass()
# import os

api_key=os.getenv('GOOGLE_API_KEY')


In [48]:
llm = GoogleGenerativeAI(model="gemini-2.5-flash", google_api_key=api_key)
print(
    llm.invoke(
        "What are some of the pros and cons of Python as a programming language?"
    )
)

Python is one of the most popular programming languages today, known for its versatility and readability. Like any tool, it has its strengths and weaknesses.

Here's a breakdown of the pros and cons of Python:

---

### Pros of Python

1.  **Ease of Learning and Readability:**
    *   **Pro:** Python has a simple, clean, and highly readable syntax that resembles natural language. This makes it very easy for beginners to pick up and for experienced developers to understand code quickly. The use of indentation for code blocks enforces a consistent style.

2.  **Extensive Libraries and Frameworks ("Batteries Included"):**
    *   **Pro:** Python boasts a vast ecosystem of libraries and frameworks that cater to almost every imaginable task.
        *   **Web Development:** Django, Flask, FastAPI
        *   **Data Science & Machine Learning:** NumPy, Pandas, Scikit-learn, TensorFlow, PyTorch
        *   **Scientific Computing:** SciPy, Matplotlib
        *   **Automation & Scripting:** Bui

In [None]:
from langchain_classic.chains import StuffDocumentsChain, LLMChain

In [18]:
from langchain_classic.chains.combine_documents import create_stuff_documents_chain
document_chain=create_stuff_documents_chain(llm,prompt)
document_chain

RunnableBinding(bound=RunnableBinding(bound=RunnableAssign(mapper={
  context: RunnableLambda(format_docs)
}), kwargs={}, config={'run_name': 'format_inputs'}, config_factories=[])
| ChatPromptTemplate(input_variables=['context'], input_types={}, partial_variables={}, messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context'], input_types={}, partial_variables={}, template='\nyou are a expert in research paper analysis. Answer the following question based only on the provided context:\n<context>\n{context}\n</context>\n'), additional_kwargs={})])
| GoogleGenerativeAI(google_api_key=SecretStr('**********'), model='models/gemini-2.5-flash', client=ChatGoogleGenerativeAI(profile={'max_input_tokens': 1048576, 'max_output_tokens': 65536, 'image_inputs': True, 'audio_inputs': True, 'pdf_inputs': True, 'video_inputs': True, 'image_outputs': False, 'audio_outputs': False, 'video_outputs': False, 'reasoning_output': True, 'tool_calling': True, 'structured_output': Tr

In [None]:
from langchain_classic.chains import create_retrieval_chain

retrieval_chain=create_retrieval_chain(retriever1,document_chain)
retrieval_chain

RunnableBinding(bound=RunnableAssign(mapper={
  context: RunnableBinding(bound=RunnableLambda(lambda x: x['input'])
           | VectorStoreRetriever(tags=['FAISS', 'HuggingFaceEmbeddings'], vectorstore=<langchain_community.vectorstores.faiss.FAISS object at 0x00000254B4368B00>, search_kwargs={}), kwargs={}, config={'run_name': 'retrieve_documents'}, config_factories=[])
})
| RunnableAssign(mapper={
    answer: RunnableBinding(bound=RunnableBinding(bound=RunnableAssign(mapper={
              context: RunnableLambda(format_docs)
            }), kwargs={}, config={'run_name': 'format_inputs'}, config_factories=[])
            | ChatPromptTemplate(input_variables=['context'], input_types={}, partial_variables={}, messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context'], input_types={}, partial_variables={}, template='\nyou are a expert in research paper analysis. Answer the following question based only on the provided context:\n<context>\n{context}\n</contex

In [None]:
retrieval_chain.invoke({'input':"In mathematics, the term homomorphic refers to what?"})

{'input': 'In mathematics, the term homomorphic refers to what?',
 'context': [Document(id='87379ae6-5e32-46b2-927b-acebc2119306', metadata={'producer': 'dvips + GPL Ghostscript GIT PRERELEASE 9.22', 'creator': 'LaTeX with hyperref', 'creationdate': '2023-03-28T21:00:13-04:00', 'moddate': '2023-03-28T21:00:13-04:00', 'title': '', 'subject': '', 'author': '', 'keywords': '', 'source': 'https://arxiv.org/pdf/2303.15563v1.pdf', 'total_pages': 13, 'page': 2, 'page_label': '3'}, page_content='ICLR 2023 W orkshop on Trustworthy Machine Learning for Heal thcare\nas the added noise hinders the inference of actual knowledge about the training data by the attacker.\nHowever, adding too much noise (i.e., high privacy budget) c an hamper learning and negatively\nimpact the model accuracy (Chilukoti et al., 2022).\n2.3 H O M O M O RP H IC EN CRY P T IO N\nIn mathematics, the term homomorphic refers to the transformation of a given set into another whil e\npreserving the relation between the element