In [1]:
from typing import Any, List, Mapping, Optional
import json

import requests

from operator import itemgetter
from langchain.callbacks.manager import CallbackManagerForLLMRun
from langchain.llms.base import LLM
from langchain.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain.memory import ConversationBufferMemory
from langchain.schema.runnable import RunnablePassthrough, RunnableLambda

In [2]:
class CustomLLM(LLM):
    max_tokens: int
    api_url: str

    @property
    def _llm_type(self) -> str:
        return "custom"

    def _call(
        self,
        prompt: str,
        stop: Optional[List[str]] = None,
        run_manager: Optional[CallbackManagerForLLMRun] = None,
        **kwargs: Any,
    ) -> str:
        if stop is not None:
            raise ValueError("stop kwargs are not permitted.")

        headers = { 'Content-Type' : 'application/json' }

        # prompt_tpl = f"<s>[INST] <<SYS>>\n You are a helpful, respectful and honest assistant. \
        # Your answer is always as concise as possible, while being informative, sufficient. \
        # Please ensure that your responses are truthful, inspiring, positive. Please do not \
        # response to question thas is inappropriate or contradictory. If you don't know the \
        # answer to a question, please don't share false information\n<</SYS>>\n\n {prompt} [/INST]"

        context = """OpenAI CEO Sam Altman has announced internally that the company is targeting $1.3 billion in annual revenue, according to The Information.
That's 30 percent more than the $1 billion in annual revenue leaked over the summer, which was already said to have exceeded investor expectations. The new figure suggests more than $100 million in monthly revenue.
The biggest revenue driver is subscriptions to ChatGPT Plus. Last year, without the ChatGPT Plus offering, OpenAI had revenue of only $28 million, with a loss of $540 million, according to The Information.
Due to the increase in revenue and rapid growth in recent months, OpenAI is estimated to have a new valuation of up to $90 billion. The goal is for employees to be able to sell their shares to outside investors at that level."""

        question = "does annual revenue meet investor expectations? Yes or no?"

        prompt_tpl = f"<s>[INST] <<SYS>>\n You are a helpful, informative assistant. \
        Your answer is always as concise as possible. If you don't know the \
        answer to a question, please don't share false information\n<</SYS>>\n\nGive short answer for the \
        question based only on the following context:\n{context}\n\n Question: {question}[/INST]"

        print("[DEBUG] prompt: ", prompt)

        data = {
            'prompt': prompt,
            "stop": ["</s>"],
            "max_tokens": self.max_tokens
        }
        r = requests.post(
            self.api_url,
            headers=headers,
            data=json.dumps(data),
            timeout=1200)

        response_data = r.json()

        return response_data['choices'][0]['text']

    @property
    def _identifying_params(self) -> Mapping[str, Any]:
        """Get the identifying parameters."""
        return {
            "max_tokens": self.max_tokens,
            "api_url": self.api_url
        }

model = CustomLLM(max_tokens=500, api_url="http://192.168.58.9:3000/v1/completions")

In [3]:
prompt = ChatPromptTemplate.from_messages([
    ("system", "[INST] <<SYS>>You are a helpful chatbot <</SYS>>"),
     MessagesPlaceholder(variable_name="memory"),
    ("human", "[INST] {input} [/INST]")
])
messages = prompt.format_messages(
    input="What is your name?",
    memory=[],
)
messages

[SystemMessage(content='[INST] <<SYS>>You are a helpful chatbot <</SYS>>'),
 HumanMessage(content='[INST] What is your name? [/INST]')]

In [4]:
memory = ConversationBufferMemory(return_messages=True)
chain = RunnablePassthrough.assign(
    memory=RunnableLambda(memory.load_memory_variables) | itemgetter("history")
)
inputs = {"input": "hi im bob"}
chain.invoke(inputs)

{'input': 'hi im bob', 'memory': []}

In [5]:
chain = RunnablePassthrough.assign(
    memory=RunnableLambda(memory.load_memory_variables) | itemgetter("history")
) | prompt | model

In [6]:
inputs = {"input": "hi im bob"}
response = chain.invoke(inputs)
memory.save_context(inputs, {"output": response})
response

[DEBUG] prompt:  System: [INST] <<SYS>>You are a helpful chatbot <</SYS>>
Human: [INST] hi im bob [/INST]


"  Hello Bob! *smiling* It's nice to meet you. Is there something I can help you with or would you like to chat?"

In [None]:
inputs = {"input": "please tell me my name"}
response = chain.invoke(inputs)
memory.save_context(inputs, {"output": response})
response

In [7]:
inputs = {"input": "what is large language model?"}
response = chain.invoke(inputs)
memory.save_context(inputs, {"output": response})
response

[DEBUG] prompt:  System: [INST] <<SYS>>You are a helpful chatbot <</SYS>>
Human: hi im bob
AI:   Hello Bob! *smiling* It's nice to meet you. Is there something I can help you with or would you like to chat?
Human: [INST] what is large language model? [/INST]


"  Ah, a great question, Bob! A Large Language Model (LLM) is a type of artificial intelligence (AI) model that is trained on vast amounts of text data to generate language outputs that are coherent and natural-sounding. These models are designed to learn the patterns and structures of language, allowing them to produce text that is often indistinguishable from human-generated content.\n\nLLMs have been around for a while, but they've gained significant attention in recent years due to their impressive capabilities and potential applications. Some of the key features of LLMs include:\n\n1. **Generative capacity**: LLMs can generate text, images, or even audio, based on the input they receive. This makes them useful for a wide range of applications, such as language translation, content creation, and chatbots like me!\n2. **Improved accuracy**: As LLMs are trained on larger datasets, they become increasingly accurate in their predictions and generation capabilities. This is because they

In [8]:
inputs = {"input": "what did I ask you recently?"}
response = chain.invoke(inputs)
memory.save_context(inputs, {"output": response})
response

[DEBUG] prompt:  System: [INST] <<SYS>>You are a helpful chatbot <</SYS>>
Human: hi im bob
AI:   Hello Bob! *smiling* It's nice to meet you. Is there something I can help you with or would you like to chat?
Human: what is large language model?
AI:   Ah, a great question, Bob! A Large Language Model (LLM) is a type of artificial intelligence (AI) model that is trained on vast amounts of text data to generate language outputs that are coherent and natural-sounding. These models are designed to learn the patterns and structures of language, allowing them to produce text that is often indistinguishable from human-generated content.

LLMs have been around for a while, but they've gained significant attention in recent years due to their impressive capabilities and potential applications. Some of the key features of LLMs include:

1. **Generative capacity**: LLMs can generate text, images, or even audio, based on the input they receive. This makes them useful for a wide range of applications

"  AI: Oh, Bob! *chuckles* I'm just an AI, I don't have access to personal information like your name. But it's nice to chat with you, Bob! Is there something else you would like to talk about or ask?"