In [3]:
import os
from pydantic import Extra
import requests
from typing import Any, List, Mapping, Optional

from langchain.callbacks.manager import CallbackManagerForLLMRun
from langchain.llms.base import LLM
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import ChatPromptTemplate

from langchain.prompts import PromptTemplate
# Run chain
from langchain.chains import RetrievalQA
from langchain_community.vectorstores import FAISS # type: ignore
from langchain_ollama import OllamaEmbeddings # type: ignore
import warnings
warnings.filterwarnings('ignore')

In [4]:
os.environ["token"] = "Bearer token|123675a6-95f6-4fb7-bc95-30095472ae3a|02de311fd83421a7fd637bf34dc8f959caa29f39888d2919e4b9640a2220224b"
token = os.environ["token"]

In [5]:
from typing import ClassVar

parser = StrOutputParser()

class LlamaLLM(LLM):
    llm_url: ClassVar[str] = 'https://api.lab45.ai/v1.1/skills/completion/query'
    
    backend:        Optional[str]   = 'gpt-35-turbo-16k'
    temp:           Optional[float] = 0.7
    top_p:          Optional[float] = 0.1
    top_k:          Optional[int]   = 40
    n_batch:        Optional[int]   = 8
    n_threads:      Optional[int]   = 4
    n_predict:      Optional[int]   = 256
    max_tokens:     Optional[int]   = 256
    repeat_last_n:  Optional[int]   = 64
    repeat_penalty: Optional[float] = 1.18

    class Config:
        extra = Extra.forbid

    @property
    def _llm_type(self) -> str:
        return "gpt-35-turbo-16k"
    
    @property
    def _get_model_default_parameters(self):
        return {
            "max_tokens": self.max_tokens,
            #"n_predict": self.n_predict,
            "top_k": self.top_k,
            "top_p": self.top_p,
            "temperature": self.temp,
            #"n_batch": self.n_batch,
            #"repeat_penalty": self.repeat_penalty,
            #"repeat_last_n": self.repeat_last_n,
        }

    def _call(
        self,
        prompt: str,
        stop: Optional[List[str]] = None,
        run_manager: Optional[CallbackManagerForLLMRun] = None,
        **kwargs: Any,
    ) -> str:
        if stop is not None:
            raise ValueError("stop kwargs are not permitted.")

        payload = {
        "messages": [
            {
            "content": prompt,
            "role": "user"
            }
        ],
        "skill_parameters": {
            "model_name": "gpt-35-turbo-16k",
            "max_output_tokens": 256,
            "temperature": 0,
            "top_k": 5
        },
        "stream_response": False
        }

        headers = {"Content-Type": "application/json","Authorization": token}

        response = requests.post(self.llm_url, json=payload, headers=headers, verify=False)
        response.raise_for_status()

        # print("API Response:", response.json())

        return response.json()  # get the response from the API

    @property
    def _identifying_params(self) -> Mapping[str, Any]:
        """Get the identifying parameters."""
        return {
            "llmUrl": self.llm_url,
            'model_parameters': self._get_model_default_parameters
            }

In [6]:
llm = LlamaLLM()

In [7]:
""" #Testing
prompt = "[INST] Question: Who is Albert Einstein? \n Answer: [/INST]"
result = llm._call(prompt)
parsed_result = result['data']['content'] # type: ignore
parser.invoke(parsed_result)
 """

' #Testing\nprompt = "[INST] Question: Who is Albert Einstein? \n Answer: [/INST]"\nresult = llm._call(prompt)\nparsed_result = result[\'data\'][\'content\'] # type: ignore\nparser.invoke(parsed_result)\n '

In [8]:
prompt = ChatPromptTemplate.from_messages(
    [("system", "you are a bot {name}"), ("human", "{input}")]
)
chain = prompt | llm



In [10]:

# Build prompt

template = """[INST] <<SYS>>

Answer the question base on the context below.

<</SYS>>

Context: {context}
Question: {question}
Answer:
[/INST]"""
QA_CHAIN_PROMPT = PromptTemplate(input_variables=["context", "question"],template=template,)


qa_chain = RetrievalQA.from_chain_type(llm,
                                       verbose=True,
                                       retriever=None,
                                       #retriever=custom_retriever,
                                       return_source_documents=True,
                                       chain_type_kwargs={"prompt": QA_CHAIN_PROMPT})

ValidationError: 1 validation error for RetrievalQA
retriever
  Input should be a valid dictionary or instance of BaseRetriever [type=model_type, input_value=None, input_type=NoneType]
    For further information visit https://errors.pydantic.dev/2.10/v/model_type

In [11]:
question = "Is probability a class topic?"
result = qa_chain({"query": question})
result["result"]


NameError: name 'qa_chain' is not defined