In [None]:
from langchain_community.llms import LlamaCpp

import abc


In [None]:

class Model(abc.ABC):
    @abc.abstractmethod
    def send_request(self):
        raise NotImplementedError()

class LlamaCPP(Model):
    def __init__(self, model_path="models/llama-13b-hf_q8_0.gguf", n_gpu_layers=41, n_batch=1024, n_ctx=2048) -> None:
        self.model_path = model_path
        self.n_gpu_layers = n_gpu_layers
        self.n_batch = n_batch
        self.n_ctx = n_ctx

        self.model = LlamaCpp(
            model_path=model_path,
            n_gpu_layers=n_gpu_layers,
            n_batch=n_batch,
            n_ctx=n_ctx,
            f16_kv=True,  # MUST set to True, otherwise you will run into problem after a couple of calls
            verbose=True,
        )
        super().__init__()

    def send_request(self, X, break_word:str = " name:") -> str:
        tok_len = self.model.get_num_tokens(X)
        print(tok_len)
        if tok_len > 1500:
            raise Exception(f"Request exceeds prefelable 1500 tokens. Has: {tok_len}")
        prev = ""
        res = ""
        for token in self.model.stream(X, echo=False):
            res += token
            if break_word == prev+token:
                # print(res)#dev
                res = res.replace(" ; name:", "")
                res = res.replace(";name:", "")
                res = res.replace(" ;name:", "")
                res = res.replace("; name:", "")
                res = res.replace(" name:", "")
                res = res.replace("name:", "")
                break
            prev = token
        
        res = res.replace("  ;  ", "")
        res = res.replace("  ; ", "")
        res = res.replace("  ;", "")
        res = res.replace(" ; ", "")
        res = res.replace("; ", "")
        res = res.replace(" ;", "")
        res = res.replace(";", "")
        return res

    def get_info(self) -> str:
        return {
            "model":"LLama-13b-hf-q8_0",
            "model_path":self.model_path,
            "n_gpu_layers":self.n_gpu_layers,
            "n_batch":self.n_batch,
            "n_ctx":self.n_ctx,
        }


In [None]:
# test libs

from pprint import pprint
import pandas as pd
import numpy as np
# from transformers import LlamaTokenizerFast

In [None]:
# test request

df = pd.read_csv("train.csv")

def compose(gdf):
    comp = [f"name: {e[2]} ; ingredients: {e[4]} ; preparation:{e[3]}" for e in gdf.values[:-1]]
    # print(gdf.values[-1])
    comp = comp + [f"name: {e[2]} ; ingredients: {e[4]} ; preparation:" for e in [gdf.values[-1]]]
    return " ; ".join(comp)

# req = compose(df.iloc[[11, 15, 112, 122, 133, 144, 155, 8]])
# req = "The second planet in the solar system is "

In [None]:
model = LlamaCPP()


In [None]:
import random

req = compose(df.iloc[[random.randint(0, len(df)) for _ in range(4)]])
print(req)
res = model.send_request(req) # przykład send request
print(res)

In [None]:
import random
import time

I_ITER = 100


reqs = []
ress = []

start = time.time()
for _ in range(I_ITER):
    req = compose(df.iloc[[random.randint(0, len(df)) for _ in range(8)]])

    res = model.send_request(req) # przykład send request

    reqs.append(req)
    ress.append(res)
    print("----------------------------------------------------------------------------------------------------------------------------------------")
    print(req)
    print(res)
end = time.time()
print((end - start)/I_ITER)


In [None]:
df = pd.DataFrame()

In [None]:
ress