# Build and deploy LLM-based applications

In [None]:
from langchain.embeddings.base import Embeddings
from langchain.vectorstores import FAISS
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.document_loaders import WikipediaLoader
from sentence_transformers import SentenceTransformer

FAISS_INDEX_PATH = "faiss_index_local"

topics = ["The Eras Tour", "2023 XFL season"]
loaders = [WikipediaLoader(query=topic, load_max_docs=20) for topic in topics]

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=300,
    chunk_overlap=20,
    length_function=len,
)

In [None]:
from operator import add

docs = add(*[loader.load() for loader in loaders])
print([d.metadata["title"] for d in docs])

chunks = text_splitter.create_documents(
    [doc.page_content for doc in docs], metadatas=[doc.metadata for doc in docs]
)

In [None]:
class LocalHuggingFaceEmbeddings(Embeddings):
    def __init__(self, model_id):
        self.model = SentenceTransformer(model_id)

    def embed_documents(self, texts: list[str]) -> list[list[float]]:
        embeddings = self.model.encode(texts)
        return embeddings

    def embed_query(self, text: str) -> list[float]:
        embedding = self.model.encode(text)
        return list(map(float, embedding))

In [None]:
embeddings = LocalHuggingFaceEmbeddings("multi-qa-mpnet-base-dot-v1")
db = FAISS.from_documents(chunks, embeddings)
db.save_local(FAISS_INDEX_PATH)

In [None]:
import torch
from typing import Optional, Any
from langchain import HuggingFacePipeline
from langchain.chains.question_answering import load_qa_chain
from langchain.prompts import PromptTemplate
from langchain.vectorstores import FAISS
from transformers import pipeline as hf_pipeline

In [None]:
class StableLMPipeline(HuggingFacePipeline):
    # Class is temporary, we are working with the authors of LangChain to make these unnecessary.

    def _call(self, prompt: str, stop: Optional[list[str]] = None) -> str:
        response = self.pipeline(
            prompt, temperature=0.1, max_new_tokens=256, do_sample=True
        )
        print(f"Response is: {response}")
        text = response[0]["generated_text"][len(prompt) :]
        return text

    @classmethod
    def from_model_id(
        cls,
        model_id: str,
        task: str,
        device: Optional[str] = None,
        model_kwargs: Optional[dict] = None,
        **kwargs: Any,
    ):
        pipeline = hf_pipeline(
            model=model_id,
            task=task,
            device=device,
            model_kwargs=model_kwargs,
        )
        return cls(
            pipeline=pipeline,
            model_id=model_id,
            model_kwargs=model_kwargs,
            **kwargs,
        )

In [None]:
template = """
<|SYSTEM|># StableLM Tuned (Alpha version)
- You are a helpful, polite, fact-based agent for answering questions. 
- Your answers include enough detail for someone to follow through on your suggestions. 
<|USER|>
If you don't know the answer, just say that you don't know. Don't try to make up an answer.
Please answer the following question using the context provided. 

CONTEXT: 
{context}
=========
QUESTION: {question} 
ANSWER: <|ASSISTANT|>"""

PROMPT = PromptTemplate(template=template, input_variables=["context", "question"])

In [None]:
class QALocal:
    def __init__(self):
        self.embeddings = LocalHuggingFaceEmbeddings("multi-qa-mpnet-base-dot-v1")
        self.db = FAISS.load_local(FAISS_INDEX_PATH, self.embeddings)
        self.llm = StableLMPipeline.from_model_id(
            model_id="stabilityai/stablelm-tuned-alpha-7b",
            task="text-generation",
            model_kwargs={
                "torch_dtype": torch.float16,
                "device_map": "auto",
                "cache_dir": "/mnt/local_storage",
            },
        )
        self.chain = load_qa_chain(llm=self.llm, chain_type="stuff", prompt=PROMPT)

    def qa(self, query):
        search_results = self.db.similarity_search(query)
        print(f"Results from db are: {search_results}")
        result = self.chain({"input_documents": search_results, "question": query})
        print(f"Result is: {result}")
        return result["output_text"]

In [None]:
local_qa = QALocal()

In [None]:
local_qa.qa("How many people live in San Francisco?")

In [None]:
local_qa.qa("When did Taylor Swift's Eras tour start?")

In [None]:
local_qa.qa("Can you tell me about the XFL 2023 season?")

In [None]:
del local_qa

In [None]:
from accelerate import Accelerator

accelerator = Accelerator()
accelerator.free_memory()