# RAG application built on gemini 

In [1]:
from langchain_community.document_loaders import PyPDFLoader
loader = PyPDFLoader("rev_farming.pdf")
data = loader.load()

In [2]:
len(data)

6

In [3]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

# split data
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000)
docs = text_splitter.split_documents(data)


print("Total number of documents: ",len(docs))

Total number of documents:  39


In [4]:
docs[7]

Document(metadata={'producer': 'Microsoft® Word 2016; modified using iText® Core 7.2.4 (AGPL version) ©2000-2022 iText Group NV', 'creator': 'Microsoft® Word 2016', 'creationdate': '2024-03-29T04:13:22-04:00', 'meeting starting date': '8 Feb. 2024', 'moddate': '2024-04-02T07:26:48-04:00', 'ieee article id': '10486501', 'ieee issue id': '10486060', 'subject': '2024 2nd International Conference on Computer, Communication and Control (IC4);2024; ; ;10.1109/IC457434.2024.10486501', 'ieee publication id': '10486055', 'title': '&#x0022;Revolutionizing Farming: GAN-Enhanced Imaging, CNN Disease Detection, and LLM Farmer Assistant&#x0022;', 'meeting ending date': '10 Feb. 2024', 'source': 'rev_farming.pdf', 'total_pages': 6, 'page': 0, 'page_label': '1'}, page_content='2024 2nd International Conference on Computer, Communication and Control (IC4) | 979-8-3503-8793-3/24/$31.00 ©2024 IEEE | DOI: 10.1109/IC457434.2024.10486501\nAuthorized licensed use limited to: Bozok Universitesi. Downloaded on

In [5]:
from langchain_chroma import Chroma
from langchain_google_genai import GoogleGenerativeAIEmbeddings

from dotenv import load_dotenv
load_dotenv() 

#Get an API key: 
# Head to https://ai.google.dev/gemini-api/docs/api-key to generate a Google AI API key. Paste in .env file

# Embedding models: https://python.langchain.com/v0.1/docs/integrations/text_embedding/

embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
vector = embeddings.embed_query("hello, world!")
vector[:5]
#vector

[0.05168594419956207,
 -0.030764883384108543,
 -0.03062233328819275,
 -0.02802734263241291,
 0.01813093200325966]

In [6]:
vectorstore = Chroma.from_documents(documents=docs, embedding=GoogleGenerativeAIEmbeddings(model="models/embedding-001"))

In [14]:
retriever = vectorstore.as_retriever(search_type="similarity", search_kwargs={"k": 10})

retrieved_docs = retriever.invoke("What is new in farming?")


In [15]:
len(retrieved_docs)

10

In [16]:
print(retrieved_docs[5].page_content)

2024 2nd International Conference on Computer, Communication and Control (IC4)  
979-8-3503-8793-3/24/$31.00 ©2024 IEEE 
"Revolutionizing Farming: GAN-Enhanced 
Imaging, CNN Disease Detection, and LLM Farmer 
Assistant" 
Chhaya Dhavale  
Information Technology Department 
Xavier Institute of Engineering 
Mumbai, India 
chhaya.n@xavier.ac.in 
Shubham Pole 
Information Technology Department 
Xavier Institute of Engineering 
Mumbai, India 
shubhampole2003@gmail.com
Trupti Pawar 
Information Technology Department 
Xavier Institute of Engineering 
Mumbai, India 
tcpawar43@gmail.com 
Krishna Sabat 
Information Technology Department 
Xavier Institute of Engineering 
Mumbai, India 
sabatkrishna241@gmail.com
Aradhana Singh 
Information Technology Department 
Xavier Institute of Engineering 
Mumbai, India 
aradhana.singh200603@gmail.com 
Abstract - Crop disease recognition is a crucial aspect of 
modern agriculture that can significantly impact crop yield,


In [17]:
from langchain_google_genai import ChatGoogleGenerativeAI

llm = ChatGoogleGenerativeAI(model="gemini-1.5-pro",temperature=0.3, max_tokens=500)

In [18]:
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate

system_prompt = (
    "You are an assistant for question-answering tasks. "
    "Use the following pieces of retrieved context to answer "
    "the question. If you don't know the answer, say that you "
    "don't know. Use three sentences maximum and keep the "
    "answer concise."
    "\n\n"
    "{context}"
)

prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system_prompt),
        ("human", "{input}"),
    ]
)

In [19]:
question_answer_chain = create_stuff_documents_chain(llm, prompt)
rag_chain = create_retrieval_chain(retriever, question_answer_chain)

In [21]:
response = rag_chain.invoke({"input": "what is new in farming?"})
print(response["answer"])

Generative Adversarial Networks (GANs) are expanding datasets for disease identification, improving the accuracy of Convolutional Neural Networks (CNNs) used for disease recognition.  Large Language Models (LLMs) like LangChain and LLAMA are being integrated into chatbots to provide farmers with easily accessible information and support.  This combined approach aims to improve crop yields and create a more sustainable agricultural sector.
