In [60]:
from langchain_google_genai import GoogleGenerativeAI,GoogleGenerativeAIEmbeddings
from langchain_ollama import OllamaEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import TextLoader
from langchain_community.vectorstores import mongodb_atlas
from langchain_core.prompts import PromptTemplate
from langchain_core.output_parsers import StrOutputParser
import datetime
from dotenv import load_dotenv
import hashlib
import os

In [43]:
load_dotenv()

True

In [None]:
embedding_model = OllamaEmbeddings(model="embeddinggemma:300m")
llm = GoogleGenerativeAI(model="gemini-2.5-flash-lite")

In [45]:
def generate_embedding(text):
    embedding = embedding_model.embed_query(text)
    return embedding

In [46]:
result = generate_embedding("Saksham Kulshrestha")
result

[-0.15868421,
 -0.06113359,
 -0.02958526,
 0.029409356,
 0.042204127,
 0.058823958,
 -0.0027849262,
 0.010787206,
 0.0297307,
 -0.010123012,
 -0.03298487,
 -0.042351812,
 -0.022406967,
 -0.06050899,
 0.05497947,
 -0.039569568,
 0.05746339,
 -0.029299628,
 -0.061230447,
 0.017460207,
 0.0035171276,
 -0.03546718,
 -0.020311931,
 -0.004988538,
 -0.009850741,
 -0.0016508608,
 0.020677047,
 0.06793262,
 -0.01755061,
 -0.018425021,
 0.028430719,
 -0.024167772,
 -0.0026543948,
 0.005986733,
 0.038694542,
 0.06641955,
 0.04031563,
 -0.017091362,
 0.033054262,
 0.004482337,
 -0.057588153,
 0.023863403,
 0.020007217,
 0.00884083,
 0.054430097,
 -0.018050024,
 0.012851073,
 0.0044125943,
 -0.051933013,
 0.026838982,
 0.010387406,
 -0.010946711,
 -0.015932202,
 -0.008988353,
 -0.022748632,
 -0.02200942,
 -0.067471236,
 -0.010000793,
 0.023444507,
 0.017611519,
 -0.016579239,
 -0.04108012,
 -0.07372018,
 0.018739907,
 -0.030406374,
 -0.057287898,
 -0.029194027,
 0.033670083,
 0.026110413,
 0.213873

In [47]:
len(result)

768

In [48]:
data = TextLoader('central_schemes.txt',encoding='utf-8').load()
data

[Document(metadata={'source': 'central_schemes.txt'}, page_content='scheme name - Electronics Hardware Technology Park Scheme\nministry - Ministry of Electronics and Information Technology (MeitY)\ndepartment - \nkey sectors - Hardware\nbrief - The STP (Software Technology Park) and EHTP (Electronic Hardware Technology Park) schemes are initiatives aimed at promoting software and electronics hardware development for exports in India. These schemes provide specific benefits and eligibility criteria for interested units....\neligibility - \nbenefits - The scheme offers benefits such as duty-free import of capital goods and raw materials, simplified customs procedures, and tax incentives for eligible businesses., Units operating under the STP and EHTP schemes are entitled to import specified goods duty-free, reducing operational costs and promoting the development of software and electronics hardware for exports., For specific details about the scheme and its benefits, please refer to the

In [49]:
len(data)

1

In [50]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=400, chunk_overlap=20)
documents = text_splitter.split_documents(data)
documents

[Document(metadata={'source': 'central_schemes.txt'}, page_content='scheme name - Electronics Hardware Technology Park Scheme\nministry - Ministry of Electronics and Information Technology (MeitY)\ndepartment - \nkey sectors - Hardware'),
 Document(metadata={'source': 'central_schemes.txt'}, page_content='brief - The STP (Software Technology Park) and EHTP (Electronic Hardware Technology Park) schemes are initiatives aimed at promoting software and electronics hardware development for exports in India. These schemes provide specific benefits and eligibility criteria for interested units....\neligibility -'),
 Document(metadata={'source': 'central_schemes.txt'}, page_content='benefits - The scheme offers benefits such as duty-free import of capital goods and raw materials, simplified customs procedures, and tax incentives for eligible businesses., Units operating under the STP and EHTP schemes are entitled to import specified goods duty-free, reducing operational costs and promoting the

In [51]:
len(documents)

433

In [52]:
for doc in documents:
    print(doc.page_content)

scheme name - Electronics Hardware Technology Park Scheme
ministry - Ministry of Electronics and Information Technology (MeitY)
department - 
key sectors - Hardware
brief - The STP (Software Technology Park) and EHTP (Electronic Hardware Technology Park) schemes are initiatives aimed at promoting software and electronics hardware development for exports in India. These schemes provide specific benefits and eligibility criteria for interested units....
eligibility -
benefits - The scheme offers benefits such as duty-free import of capital goods and raw materials, simplified customs procedures, and tax incentives for eligible businesses., Units operating under the STP and EHTP schemes are entitled to import specified goods duty-free, reducing operational costs and promoting the development of software and electronics hardware for exports., For specific
For specific details about the scheme and its benefits, please refer to the official document.
benefit tags - Regulatory
tenure - Active


In [53]:
docs_to_insert = [{
    "hash":hashlib.sha256(doc.page_content.encode()).hexdigest(),
    "text": doc.page_content,
    "embedding": generate_embedding(doc.page_content),
    "time_added": datetime.datetime.now()
} for doc in documents]

In [None]:
# MongoDB Atlas connection
from pymongo import MongoClient
client = MongoClient(os.environ['CONNECTION_URI'])
collection =  client["rag_db"]["schemes"]

# Insert documents into the collection
result = collection.insert_many(docs_to_insert)

In [58]:
# after creating the search index in mongodb atlas

def get_query_results(query):
  """Gets results from a vector search query."""

  query_embedding = generate_embedding(query)
  print(query_embedding)
  pipeline = [
      {
            "$vectorSearch": {
              "index": "vector_index",
              "queryVector": query_embedding,
              "path": "embedding",
              "numCandidates":100,
              "limit": 5
            }
      }, {
            "$project": {
              "_id": 0,
              "text": 1
         }
      }
  ]

  results = collection.aggregate(pipeline)
  print(results)

  array_of_results = []
  for doc in results:
      array_of_results.append(doc)
  return array_of_results

In [59]:
get_query_results("What is the eligibility criteria for PM Kisan Yojana?")

[-0.08939518, -0.018883472, 0.013672885, 0.020768998, 0.05478808, 0.049221028, -0.034972087, -0.0034641684, 0.05995898, 0.0013725911, 0.024070857, -0.017182946, 0.0450976, -0.008019469, 0.07396615, -0.014516951, 0.074510984, -0.022944972, 0.0007082326, 0.023178041, 0.02272476, 0.041856226, 0.0036915878, 0.020119041, 0.0006824796, 0.02141984, 0.01989887, 0.031266388, 0.049620833, 0.02815637, 0.0010503368, -0.002546943, 0.0036300432, -0.059251983, 0.0459273, 0.0073740724, 0.02088045, -0.053572502, -0.04389238, -0.0430246, -0.03576636, 0.06297702, 0.052591495, -0.060679857, 0.0004156956, -0.039389733, -0.005754843, -0.05211587, -0.009362835, 0.0068998546, -0.02349425, 0.009107219, -0.036993638, 0.0064068157, -0.005023754, -0.03685387, -0.03383881, -0.024924854, -0.0012859466, -0.026780061, -0.025471956, -0.023438498, -0.03454624, 0.09363298, -0.0066658976, 0.0031694274, -0.036230363, 0.09475672, 0.007044265, 0.11538213, 0.035064954, 0.001212741, 0.012402153, -0.031732887, 0.16790812, 0.04

[{'text': 'eligibility -'},
 {'text': 'eligibility -'},
 {'text': 'eligibility - The conditions necessary for getting funded are as follows:, Be economically viable, Provide access to markets for the poor, Be socially relevant and impact the poor as customers, producers, or employees, The enterprises must have plans to expand operations in any or all the following states – Bihar, Chhattisgarh, Odisha, Uttar Pradesh, West Bengal, Madhya Pradesh, Jharkhand, and'},
 {'text': 'eligibility - First-generation entrepreneurs, existing entrepreneurs, proprietary & partnership concerns and companies. Technical qualification of the promoter in the relevant field is a pre-requisite.'},
 {'text': 'eligibility - Women Entrepreneurs are eligible for this scheme.'}]

In [61]:
query = "what are the schemes from the government for farmers?"
context_docs = get_query_results(query)
context_string = " ".join([doc["text"] for doc in context_docs])

prompt = f"""Use the following pieces of context to answer the question at the end.
    {context_string}
    Question: {query}
"""
llm.invoke(prompt)


[-0.10138701, 0.008240459, 0.0052452483, 0.0021839552, 0.026230656, 0.024729539, -0.017498795, -0.024123397, 0.035310656, 0.028185295, -0.04123175, -0.057824615, 0.05823831, -0.043038722, 0.048593804, 0.013938374, 0.044336483, 0.030322684, 0.007074326, 0.039818272, 0.0061959703, 0.028782869, 0.001770549, 0.025081905, -0.017013611, -0.0068621784, 0.018502695, -0.00092058966, 0.017719936, 0.04003056, -0.011053067, -0.02994204, -0.007792014, -0.061654795, 0.0725284, 0.025453994, 0.018599968, -0.060129933, -0.0037139207, 0.025039012, -0.036073484, 0.022141559, 0.043328047, -0.031379018, 0.030016795, -0.05143603, -0.033051413, -0.066771306, -0.03951073, -0.020027904, -0.029225774, -0.012396734, -0.03829652, -0.019137507, 0.013619013, -0.0061367867, -0.043948896, 0.01813799, -0.020311778, 0.033825975, 0.001249606, -0.06971919, -0.04425341, 0.025356662, -0.008025556, -0.030202042, 0.03402939, 0.042736195, 0.024322659, 0.12607601, 0.025894746, -0.03169234, 0.025494467, 0.034697577, 0.1620433, 

'Based on the provided context, here are the government schemes for farmers:\n\n*   **Agriculture Infrastructure Fund for Agripreneurs**: This is a scheme from the Ministry of Agriculture.\n*   **NABVENTURES Fund**: This scheme is from the National Bank for Agriculture and Rural Development (NABARD).'

In [62]:
def final_answer(query):
    context_docs = get_query_results(query)
    context_string = " ".join([doc["text"] for doc in context_docs])

    prompt = f"""Use the following pieces of context to answer the question at the end.
        {context_string}
        Question: {query}
    """
    result = llm.invoke(prompt)
    return result

In [63]:
final_answer("which schemes are for tech sector?")

[-0.11775042, 0.012972833, 0.026561286, 0.017510701, 0.051614836, 0.030594198, 0.0021028498, -0.02641566, 0.037350446, -0.011524088, -0.035430368, -0.060456894, 0.021443835, -0.024349706, 0.016203295, -0.02369167, 0.02065446, 0.00033815758, 0.024824629, 0.02587359, -0.00032904244, 0.04225179, -0.04374245, 0.0025868157, -0.064702794, 0.009116389, 0.045050837, 0.01788533, -0.012818224, 0.06694642, 0.01697156, -0.0251817, -0.015686538, -0.051620323, 0.0053762738, 0.025312314, 0.021864424, -0.059858758, 0.0444987, -0.019103749, -0.044536106, 0.07590703, 0.029788213, -0.0013554528, -0.01182471, -0.059781432, -0.039011184, -0.034470636, 0.023978246, 0.005167791, -0.044170685, 0.011316362, -0.071242794, 0.029773131, -0.010609401, -0.06335683, -0.100552574, -0.013760665, -0.02314998, 0.03927448, 0.0054546166, 0.021390691, -0.079855636, 0.022002602, 0.046881042, -0.03897976, 0.00034717433, 0.0011005988, -0.028379729, 0.12386566, -0.002418895, -0.043602206, -0.0011661362, 0.028487721, 0.18133941

'Based on the provided context, the following schemes are for the tech sector:\n\n*   **Technology Development Programme**\n*   **Genesis**'