In [None]:
"""Wrapper around Oobabooga APIs."""
from typing import Any, Dict, List, Mapping, Optional

import requests
from pydantic import Extra, root_validator

from langchain.llms.base import LLM
from langchain.llms.utils import enforce_stop_tokens
from langchain.utils import get_from_dict_or_env

VALID_TASKS = ("text2text-generation", "text-generation")

class OobaboogaEndpoint(LLM):
    """Oobabooga text-generation-webui endpoints.
    """
    
    endpoint_url: str = ""
    """Endpoint URL to use."""
    task: Optional[str] = None
    """Task to call the model with. Should be a task that returns `generated_text`."""
    model_kwargs: Optional[dict] = None
    """Key word arguments to pass to the model."""

    class Config:
        """Configuration for this pydantic object."""

        extra = Extra.forbid

    @root_validator()
    def validate_environment(cls, values: Dict) -> Dict:
        
        return values

    @property
    def _identifying_params(self) -> Mapping[str, Any]:
        """Get the identifying parameters."""
        _model_kwargs = self.model_kwargs or {}
        return {
            **{"endpoint_url": self.endpoint_url, "task": self.task},
            **{"model_kwargs": _model_kwargs},
        }

    @property
    def _llm_type(self) -> str:
        """Return type of llm."""
        return "oobabooga_endpoint"

    def _call(self, prompt: str, stop: Optional[List[str]] = None) -> str:
        """Call out to HuggingFace Hub's inference endpoint.

        Args:
            prompt: The prompt to pass into the model.
            stop: Optional list of stop words to use when generating.

        Returns:
            The string generated by the model.

        Example:
            .. code-block:: python

                response = hf("Tell me a joke.")
        """

        # send request
        request = {
            'prompt': prompt,
            'max_new_tokens': 1024,
            'do_sample': True,
            'temperature': 0.1, #1.3
            'top_p': 0.95, #0.1
            'typical_p': 1,
            'repetition_penalty': 1.0, #1.18
            'top_k': 40,
            'min_length': 0,
            'no_repeat_ngram_size': 0,
            'num_beams': 1,
            'penalty_alpha': 0,
            'length_penalty': 1,
            'early_stopping': False,
            'seed': -1,
            'add_bos_token': True,
            'truncation_length': 2048,
            'ban_eos_token': False,
            'skip_special_tokens': True,
            'stopping_strings': ["\nObservation:", "\n### Assistant:", "\n### Human:"] #[]
        }

        response = requests.post(URI, json=request)

        if response.status_code == 200:
            result = response.json()['results'][0]['text']
            if result.rsplit(None, 1)[1] == ("Observation" or "\n### Human" or "\n### Assistant"):
                result = result.rsplit(None, 1)[0]
            else:
                result
        return result
    
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.chains import RetrievalQA
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import Chroma
from langchain.document_loaders import DirectoryLoader

import os

ABS_PATH = os.path.dirname(os.path.abspath(''))
DB_DIR = os.path.join(ABS_PATH, "db")
FILE_DIR = os.path.join(ABS_PATH, "docs")

HOST = "localhost:5000"
URI = f"http://{HOST}/api/v1/generate"

endpoint_url = URI

embeddings = HuggingFaceEmbeddings()

llm = OobaboogaEndpoint()

text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)

loader = DirectoryLoader(FILE_DIR, glob="**/*.txt", show_progress=True)
docs = loader.load()
texts = text_splitter.split_documents(docs)

db = Chroma.from_documents(texts, embeddings, collection_name="docstore", persist_directory=DB_DIR)

retriever = db.as_retriever()
retriever.search_kwargs['distance_metric'] = 'cos'
retriever.search_kwargs['k'] = 4

qa = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=retriever, return_source_documents=False)


In [None]:
#query = input("Enter query:") #interactive Q&A

ans = qa({"query": "your query"})

print(ans)