In [1]:
import os

os.chdir(r"/home/kumail/Quantized-RAG")

# https://python.langchain.com/v0.1/docs/integrations/vectorstores/faiss/

In [2]:
from transformers import AutoTokenizer, AutoModel
import torch
import faiss
import numpy as np
import pickle

class Vector:
    def __init__(self, model_name, index_file, documents_file) :
        self.embed_tokenizer, self.embed_model=self.load_model(model_name)
        self.faiss_index = self.load_index(index_file)
        self.docs = self.load_embedded_documents(documents_file)

        print("Vector")

    def load_model(self, model_name):
        tokenizer = AutoTokenizer.from_pretrained(model_name)
        model = AutoModel.from_pretrained(model_name)
        return tokenizer, model
    
    def load_index(self, index_file):
        return faiss.read_index(index_file)
    
    def load_embedded_documents(self, documents_file):
        with open(documents_file, 'rb') as f:
            documents = pickle.load(f)
            return documents
        
    def compute_embeddings(self, texts):
        inputs = self.embed_tokenizer(texts, padding=True, truncation=True, return_tensors="pt")
        with torch.no_grad():
            model_output = self.embed_model(**inputs)
        embeddings = model_output.last_hidden_state.mean(dim=1)  # mean pooling
        return embeddings.numpy()

    def cosine_search(self, query_embedding, k):
        distances, indices = self.faiss_index.search(query_embedding, k)
        return indices
    
    def search(self, text, k):
        query_embedding = self.compute_embeddings([text])
        retrived_indexes = self.cosine_search(query_embedding, k)
        return [self.docs[i] for i in retrived_indexes[0]]


model_name = "sentence-transformers/all-MiniLM-L6-v2"
documents_file = "data/documents.pkl"
index_file = "data/faiss_index.bin"

vec = Vector(model_name=model_name,
                index_file=index_file,
                documents_file=documents_file
            )

ask = "What is the revenue of apple"
data = vec.search(ask, 2)

  from .autonotebook import tqdm as notebook_tqdm


Vector


In [6]:
from langchain_core.prompts.prompt import PromptTemplate

class PromotEngineer:
    def __init__(self, prompt_template):
        self.prompt_template = prompt_template
        print("PE")


    def prompt(self, essentials):
        prompt_adjuster = PromptTemplate(
            input_variables=["data", "question"], template=self.prompt_template
        )

        return prompt_adjuster.format(**essentials)



PROMPT_TEMPLATE = """Only few words answer
{data}
{question}
"""

prompt_setter = PromotEngineer(prompt_template=PROMPT_TEMPLATE)

data = {
    "data": data,
    "question": ask
}

prompted_ask =  prompt_setter.prompt(data)

PE


In [4]:
prompted_ask

'Only few words answer\n["Apple Inc.\'s total revenue in 2022 was $394.33 billion.", "Apple\'s Revenue in the last fiscal year was 385.71 billion USD."]\nWhat is the revenue of apple\n'

In [5]:
from langchain.llms import Ollama

class LLM:
    def __init__(self, llm_url, llm_model_name):
        self.llm_url = llm_url
        self.llm_model_name = llm_model_name
        print("LLM")


    def ask_to_llm(self, ask):
        ollama = Ollama(base_url=self.llm_url, model=self.llm_model_name)
        return ollama(ask)  


url = "http://localhost:11434"
model_name = "phi3"

llm = LLM(llm_url=url, llm_model_name=model_name)

# ask = "hi"
response  = llm.ask_to_llm(prompted_ask)

LLM


  warn_deprecated(


In [7]:
response

'$394.33 billion.'

In [13]:
class Orchestration(Vector, PromotEngineer, LLM):
    def __init__(self, model_name, index_file, documents_file, prompt_template, llm_url, llm_model_name):
        Vector.__init__(self, model_name=model_name, index_file=index_file, documents_file=documents_file)
        PromotEngineer.__init__(self, prompt_template=prompt_template)
        LLM.__init__(self, llm_url=llm_url, llm_model_name=llm_model_name)

    def run(self, ask):
        # Perform operations using the orchestration object
        # Data Retrieval from Vector DB
        data = self.search(ask, 2)
        
        data_dict = {"data": data, "question": ask}
        
        # Prompt Setter
        prompted_ask = self.prompt(data_dict)
        
        # Response to user
        response = orchestration.ask_to_llm(prompted_ask)
        return response

# Example usage
model_name = "sentence-transformers/all-MiniLM-L6-v2"
documents_file = "data/documents.pkl"
index_file = "data/faiss_index.bin"
ask = "What is the revenue of apple"
PROMPT_TEMPLATE = """Only few words answer
{data}
{question}
"""
url = "http://localhost:11434"
llm_model_name = "phi3"

orchestration = Orchestration(
    model_name=model_name,
    index_file=index_file,
    documents_file=documents_file,
    prompt_template=PROMPT_TEMPLATE,
    llm_url=url,
    llm_model_name=llm_model_name
)

orchestration.run(ask=ask)

# # Perform operations using the orchestration object
# data = orchestration.search(ask, 2)
# data_dict = {"data": data, "question": ask}
# prompted_ask = orchestration.prompt(data_dict)
# response = orchestration.ask_to_llm(prompted_ask)


Vector
PE
LLM


'$394.33 billion\n\n$385.71 billion'

In [11]:
response

'$394.33 billion'

In [None]:
``