In [1]:
from transformers import AutoTokenizer, AutoModel
import torch
import torch.nn.functional as F

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
import faiss 
import numpy as np
import pickle

In [3]:
import os
os.environ['KMP_DUPLICATE_LIB_OK']='True'

In [4]:
class semanticEmbedding:
    def __init__(self, model_name='sentence-transformers/all-mpnet-base-v2'):
        self.tokenizer = AutoTokenizer.from_pretrained(model_name)
        self.model = AutoModel.from_pretrained(model_name)

    def mean_pooling(self, model_output, attention_mask):
        token_embeddings = model_output[0] #First element of model_output contains all token embeddings
        input_mask_expanded = attention_mask.unsqueeze(-1).expand(token_embeddings.size()).float()
        return torch.sum(token_embeddings * input_mask_expanded, 1) / torch.clamp(input_mask_expanded.sum(1), min=1e-9)
    def get_embedding(self, sentences):
    # Tokenize sentences
        encoded_input = self.tokenizer(sentences, padding=True, truncation=True, return_tensors='pt')
        with torch.no_grad():
            model_output = self.model(**encoded_input)
        # Perform pooling
        sentence_embeddings = self.mean_pooling(model_output, encoded_input['attention_mask'])

        # Normalize embeddings
        sentence_embeddings = F.normalize(sentence_embeddings, p=2, dim=1)
        return sentence_embeddings.detach().numpy()

In [15]:
class FaissIdx:

    def __init__(self, model, dim=768):
        self.index = faiss.IndexFlatIP(dim) # need to load the pickle model in the final file
        # self.index = faiss
        # Maintaining the document data
        self.doc_map = dict()
        self.model = model
        self.ctr = 0

    def add_doc(self, document_text):
        self.index.add(self.model.get_embedding(document_text))
        self.doc_map[self.ctr] = document_text # store the original document text
        self.ctr += 1

    def search_doc(self, query, k=3):
        D, I = self.index.search(self.model.get_embedding(query), 5)
        return [{self.doc_map[idx]: score} for idx, score in zip(I[0], D[0]) if idx in self.doc_map]
    def save_index(self, index_filename, doc_map_filename):
        # Save Faiss index to file
        faiss.write_index(self.index, index_filename)

        # Save document map to file using pickle
        with open(doc_map_filename, 'wb') as f:
            pickle.dump(self.doc_map, f)

    def load_index(self, index_filename, doc_map_filename):
        # Load Faiss index from file
        self.index = faiss.read_index(index_filename)

        # Load document map from file using pickle
        with open(doc_map_filename, 'rb') as f:
            self.doc_map = pickle.load(f)

In [16]:
# creating an instance of the class
model = semanticEmbedding()

In [17]:
index = FaissIdx(model)

In [18]:
index.load_index('index.bin', 'doc_map.pkl')

In [77]:
query = "I want to study abroad"

In [78]:
query_result = index.search_doc(query)

In [79]:
context = ''

In [80]:
for i in range(5):
    for key, value in query_result[i].items():
        if(value>0.23):
            context += key

In [81]:
context

'Scheme of National Overseas Scholarship = The Central Sector Scheme of National Overseas Scholarship is to facilitate the low income students belonging to the Scheduled Castes, Denotified Nomadic and Semi-Nomadic Tribes, Landless Agricultural Labourers and Traditional Artisans category to obtain higher education viz., Master degree or Ph. D courses by studying abroad thereby improving their Economic and Social status. During Selection Year 2022-23, there are 125 fresh awards, subject to availability of funds. Out of 125 Awards, 115 Awards are for Scheduled Castes candidates, 06 Awards are for Denotified, Nomadic and Semi-Nomadic...Scheme of National Overseas Scholarship = The Central Sector Scheme of National Overseas Scholarship is to facilitate the low income students belonging to the Scheduled Castes, Denotified Nomadic and Semi-Nomadic Tribes, Landless Agricultural Labourers and Traditional Artisans category to obtain higher education viz., Master degree or Ph. D courses by studyi

In [82]:
content = ""

In [73]:
if context == '':
    print('Is there anything else I can help you with')
else:
    content = f'Give the response in proper english with numbering and all Consider the query and use the context given followed by query in the curly braces and generate a response accordingly. This is the query: {query_result} and the context is: {context}'


In [74]:
content

"Give the response in proper english with numbering and all Consider the query and use the context given followed by query in the curly braces and generate a response accordingly. This is the query: [{'Scheme of National Overseas Scholarship = The Central Sector Scheme of National Overseas Scholarship is to facilitate the low income students belonging to the Scheduled Castes, Denotified Nomadic and Semi-Nomadic Tribes, Landless Agricultural Labourers and Traditional Artisans category to obtain higher education viz., Master degree or Ph. D courses by studying abroad thereby improving their Economic and Social status. During Selection Year 2022-23, there are 125 fresh awards, subject to availability of funds. Out of 125 Awards, 115 Awards are for Scheduled Castes candidates, 06 Awards are for Denotified, Nomadic and Semi-Nomadic...': 0.4516108}, {'Scheme of National Overseas Scholarship = The Central Sector Scheme of National Overseas Scholarship is to facilitate the low income students 

In [46]:
import google.generativeai as genai

In [47]:
genai.configure(api_key=os.environ.get('GOOGLE_API_KEY'))

In [48]:
model = genai.GenerativeModel('gemini-pro')

In [75]:
response = model.generate_content(content)

In [76]:
print(response.text)

1. **Scheme of National Overseas Scholarship**

   The Central Sector Scheme of National Overseas Scholarship aims to facilitate low-income students from the Scheduled Castes, Denotified Nomadic and Semi-Nomadic Tribes, Landless Agricultural Labourers, and Traditional Artisans categories to pursue higher education abroad. The objective is to enhance their economic and social status by providing them with opportunities to obtain Master's degrees or Ph. D. courses. During the Selection Year 2022-23, there are 125 fresh awards available, subject to the availability of funds. Out of these, 115 Awards are reserved for Scheduled Castes candidates, 06 Awards are for Denotified, Nomadic and Semi-Nomadic Tribes, and the remaining awards are distributed among other eligible categories.

2. **National Overseas Scholarships for Scheduled Tribe Students**

   This Central Scheme of National Overseas Scholarships for Scheduled Tribe (ST) Students specifically supports ST students by providing financ