In [89]:
from typing import Optional

import json
import requests

# Flow of messages are defiend as segments in this system.
# Segments are based on the format of JSON.

type segment = list[dict[str, str]]


def from_segment_file(filename: str) -> segment:
    return json.load(open(filename, "r", encoding="utf-8"))


def make_segment_file(filename: str, segment: segment) -> None:
    return json.dump(
        segment,
        open(filename, "w", encoding="utf-8"),
        indent=2,
        ensure_ascii=False,
    )


def to_segment_file(filename: str, segment: segment) -> None:
    return make_segment_file(
        filename=filename,
        segment=from_segment_file(filename=filename) + segment,
    )


# Messages are categorized as three types usually:
# System message, user message and assistant messages.


type message = dict[str, str]
type message_list = segment


def make_message(role: str, content: str) -> message:
    return {
        "role": role,
        "content": content,
    }


def make_system_message(content: str) -> message:
    return make_message(role="system", content=content)


def make_user_message(content: str) -> message:
    return make_message(role="user", content=content)


def make_assistant_message(content: str) -> message:
    return make_message(role="assistant", content=content)


# Any request to LLM should be done by a message list.


def config_llm(
    temperature: Optional[float] = 0.4,
    max_tokens: Optional[int] = 512,
    max_buffer_tokens: Optional[int] = 1024,
) -> dict:
    return {
        "temperature": temperature,
        "max_tokens": max_tokens,
        "max_buffer_tokens": max_buffer_tokens,
    }


def get_completion_from_response(response: dict) -> str:
    return response["choices"][0]["message"]["content"]


def get_prompt_tokens_from_response(response: dict) -> int:
    return response["usage"]["prompt_tokens"]


def get_completion_tokens_from_response(response: dict) -> int:
    return response["usage"]["completion_tokens"]


# def get_response_params(message_list: message_list) -> dict:
#     source: str = "http://localhost:8000/v1/chat/completions"
#     headers: dict = {
#         "accept": "application/json",
#         "Content-Type": "application/json",
#     }
#     params: dict = {
#         "messages": message_list,
#         "model": "string",
#         "do_sample": True,
#         "stream": False,
#         "max_tokens": config_llm()["max_tokens"],
#         "temperature": config_llm()["temperature"],
#         "top_p": 0,
#         "n": 1,
#     }
#     response: dict = requests.post(
#         source,
#         headers=headers,
#         data=json.dumps(params),
#     ).json()
#     return {
#         "completion": get_completion_from_response(response),
#         "prompt_tokens": (prompt_tokens := get_prompt_tokens_from_response(response)),
#         "completion_tokens": (
#             completion_tokens := get_completion_tokens_from_response(response)
#         ),
#         "total_tokens": prompt_tokens + completion_tokens,
#     }


def get_response_params(message_list: message_list) -> dict:
    completion = str([make_system_message(content="0123456789")])
    return {
        "completion": completion,
        "prompt_tokens": len(str(message_list)),
        "completion_tokens": len(completion),
        "total_tokens": len(str(message_list) + completion),
    }


def get_completion_from_response_params(response_params: dict) -> str:
    return response_params["completion"]


# It is easy to implement the buffer memory technique from LangChain.


def get_total_tokens_from_response_params(message_list: message_list) -> int:
    return get_response_params(message_list)["total_tokens"]


def is_enough_buffer_tokens(message_list: message_list) -> bool:
    return (
        get_total_tokens_from_response_params(message_list=message_list)
        + config_llm()["max_tokens"]
        <= config_llm()["max_buffer_tokens"]
    )


def process_buffer_message_list(message_list: message_list) -> message_list:
    return (
        message_list
        if is_enough_buffer_tokens(message_list=message_list)
        else process_buffer_message_list(message_list=message_list[1:])
    )


# Since large input length can break the context buffer,
# this situation is handled separately.


def is_enough_input_length(message_list: message_list) -> bool:
    return len(message_list[-1]["content"]) <= config_llm()["max_tokens"]


def process_buffer(message_list: message_list) -> str:
    return (
        get_completion_from_response_params(
            get_response_params(
                message_list=(
                    message_list := process_buffer_message_list(
                        message_list=message_list,
                    )
                ),
            ),
        )
        if is_enough_input_length(message_list=message_list)
        else "[ERROR] not enough input length!"
    )

In [90]:
resp = get_response_params(message_list=[make_user_message("Hello")])

In [91]:
resp

{'completion': "[{'role': 'system', 'content': '0123456789'}]",
 'prompt_tokens': 38,
 'completion_tokens': 45,
 'total_tokens': 83}

In [101]:
lst = [make_user_message("1234567890")] * 100

In [102]:
lst

[{'role': 'user', 'content': '1234567890'},
 {'role': 'user', 'content': '1234567890'},
 {'role': 'user', 'content': '1234567890'},
 {'role': 'user', 'content': '1234567890'},
 {'role': 'user', 'content': '1234567890'},
 {'role': 'user', 'content': '1234567890'},
 {'role': 'user', 'content': '1234567890'},
 {'role': 'user', 'content': '1234567890'},
 {'role': 'user', 'content': '1234567890'},
 {'role': 'user', 'content': '1234567890'},
 {'role': 'user', 'content': '1234567890'},
 {'role': 'user', 'content': '1234567890'},
 {'role': 'user', 'content': '1234567890'},
 {'role': 'user', 'content': '1234567890'},
 {'role': 'user', 'content': '1234567890'},
 {'role': 'user', 'content': '1234567890'},
 {'role': 'user', 'content': '1234567890'},
 {'role': 'user', 'content': '1234567890'},
 {'role': 'user', 'content': '1234567890'},
 {'role': 'user', 'content': '1234567890'},
 {'role': 'user', 'content': '1234567890'},
 {'role': 'user', 'content': '1234567890'},
 {'role': 'user', 'content': '12

In [103]:
resp = process_buffer(message_list=lst)
resp

"[{'role': 'system', 'content': '0123456789'}]"

In [104]:
process_buffer_message_list(message_list=message_list)

[{'role': 'user', 'content': '1234567890'},
 {'role': 'user', 'content': '1234567890'},
 {'role': 'user', 'content': '1234567890'},
 {'role': 'user', 'content': '1234567890'},
 {'role': 'user', 'content': '1234567890'},
 {'role': 'user', 'content': '1234567890'},
 {'role': 'user', 'content': '1234567890'},
 {'role': 'user', 'content': '1234567890'},
 {'role': 'user', 'content': '1234567890'},
 {'role': 'user', 'content': '1234567890'}]

In [105]:
get_total_tokens_from_response_params(message_list=message_list)

475

In [99]:
get_total_tokens_from_response_params(
    message_list=process_buffer_message_list(message_list=message_list)
)

475

In [100]:
message_list[-1]["content"]

'1234567890'

In [121]:
lst = [1, 2, 3, 4, 5]
lst[2:]

[3, 4, 5]

In [120]:
lst.pop(0)

1