In [1]:
import json

import requests


RULM_URL = 'https://api.rulm.alexkuk.ru/v1'


class RulmError(Exception):
    pass


def rulm_models():
    return requests.get(f'{RULM_URL}/models').json()


def rulm_tokenize(text, model='saiga-7b-q4'):
    response = requests.post(
        f'{RULM_URL}/tokenize',
        json={
            'text': text,
            'model': model
        }
    )
    if response.status_code != 200:
        raise RulmError(response.text)
        
    return response.json()


def rulm_chat_complete_stream(messages, model='saiga-7b-q4', max_tokens=128, temperature=0.2):
    response = requests.post(
        f'{RULM_URL}/chat_complete',
        json={
            'messages': messages,
            'model': model,
            'max_tokens': max_tokens,
            'temperature': temperature
        },
        stream=True
    )
    if response.status_code != 200:
        raise RulmError(response.text)

    for line in response.iter_lines():
        item = json.loads(line)
        error = item.get('error')
        if error:
            raise RulmError(error)
        yield item


def show_rulm_stream(items):
    buffer = []
    for item in items:
        text = item.get('text')
        prompt_progress = item.get('prompt_progress')
        if text:
            buffer.append(text)
            print(text, flush=True, end='')
        else:
            print(f'{prompt_progress * 100:.0f}%', flush=True, end=' ')
            if prompt_progress == 1:
                print('\n', flush=True)
    return ''.join(buffer)


def rulm_chat_complete(messages, **kwargs):
    items = rulm_chat_complete_stream(messages, **kwargs)
    buffer = []
    for item in items:
        if item.get('text'):
            buffer.append(item.get('text'))
    return ''.join(buffer)

In [2]:
prompt = '9 мая 2023 года это какой день недели?'
model = 'saiga-7b-q4'

items = rulm_chat_complete_stream([prompt], model, max_tokens=64, temperature=0)
output = show_rulm_stream(items)

0% 24% 47% 71% 94% 100% 

Выходные данные: 9 мая 2023 года будет вторник.

In [3]:
output = rulm_chat_complete([prompt], model=model, max_tokens=64, temperature=0)
print(output)

Выходные данные: 9 мая 2023 года будет вторник.
