In [1]:
## Imports
import os
import time

from typing import List

from langchain_core.prompts import ChatPromptTemplate
from langchain_community.llms import LlamaCpp 


## Define text feedback function
def generate_text(input_prompts : List[str],
                 system_prompt: str = "You are a helpful assistant.", 
                 model_path: str = "./model/Meta-Llama-3-8B-Instruct-Q3_K_S.gguf" ,
                 temperature: float = 0.3, 
                 n_ctx: int = 512, 
                 max_tokens: int = 256) -> dict: 
    """
    Generate text using a LlamaCpp model. 

    Args: 
        input_prompts (List[str]): List of prompts to generate text for.
        system_prompt (str): System prompt for the model.
        model_path (str): Path to the model file.
        temperature (float): Sampling temperature for model output.
        n_ctx (int): Number of tokens in the context window.
        max_tokens (int): Maximum tokens to generate in a single call.
    """

    ## Ensure model path exists
    assert os.path.exists(model_path)
    
    ## set batch size
    n_batch = n_ctx // 4
                   
    ## Instruct
    # Load the LlamaCpp language model, adjust GPU usage based on your hardware
    llm = LlamaCpp(
        # Path to model
        model_path=model_path,
        # Number of tokens in the context window
        n_ctx=n_ctx, 
        # Batch size for model processing
        n_batch=n_batch,   
        # Maximum tokens to generate in a single call
        max_tokens=max_tokens, 
        # Sampling temperature for model output 
        temperature=temperature, 
        # Tokens to indicate when to stop generating
        stop=["<|eot_id|>",  "assistant\n\n"] 
        )

    # Define the prompt template with a placeholder for the question
    prompt_end = "<|eot_id|><|start_header_id|>assistant<|end_header_id|>"
    template = ChatPromptTemplate.from_messages([
        ("system", system_prompt),
        ("human", "{user_input}" + prompt_end),
        ])   

    # Create an LLMChain to manage interactions with the prompt and model
    chain = template | llm 

    # Invoke the chain
    batch_prompts = [{"user_input": p} for p in input_prompts]

    start = time.time()
    responses = chain.batch(batch_prompts)
    end = time.time()
    
    elapsed = end - start

    # Return output
    output = {"system_prompt": system_prompt,
              "prompts": input_prompts, 
              "responses": responses, 
              "elapsed": elapsed}
    return output


In [14]:

## Generate responses for basic prompts
model1 = "C:/Users/pkress/projects/REL-DCPS-workshop/REL-DCPS-NLP-public/model/Meta-Llama-3-8B-Instruct-Q3_K_S.gguf" 
basic_prompts = ["Tell me a joke", "Explain the big bang in simple terms"]
system_prompt = "You are a helpful assistant."
basic_small = generate_text(basic_prompts,
                 system_prompt = system_prompt, 
                 model_path = model1,
                 temperature = 0.3, 
                 n_ctx = 128, max_tokens = 256 )  

AVX = 1 | AVX2 = 1 | AVX512 = 0 | AVX512_VBMI = 0 | AVX512_VNNI = 0 | FMA = 1 | NEON = 0 | ARM_FMA = 0 | F16C = 1 | FP16_VA = 0 | WASM_SIMD = 0 | BLAS = 1 | SSE3 = 1 | SSSE3 = 0 | VSX = 0 | 
Llama.generate: prefix-match hit


In [7]:
os.getcwd()

'C:\\Users\\pkress\\projects\\REL-DCPS-workshop\\REL-DCPS-NLP-public'

In [17]:
os.path.exists("C:/Users/pkress/projects/REL-DCPS-workshop/REL-DCPS-NLP-public/model//Meta-Llama-3-8B-Instruct-Q3_K_S.gguf")

True

In [18]:

print(basic_small)
[print(p) for p in basic_small["responses"]] 

{'system_prompt': 'You are a helpful assistant.', 'prompts': ['Tell me a joke', 'Explain the big bang in simple terms'], 'responses': ["\n\nHere's one:\n\nWhy couldn't the bicycle stand up by itself?\n\n(Wait for it...)\n\nBecause it was two-tired!\n\nHope that made you smile!", '\n\nHello there!\n\nSo, you want to know about the Big Bang? Well, let me tell you all about it!\n\nThe Big Bang was the massive explosion that marked the beginning of our universe.\n\nAbout 13.8 billion years ago, a single point called the singularity existed. This singularity contained all the matter and energy that would eventually make up our universe.\n\nThen, suddenly and explosively, this singularity expanded rapidly, releasing an enormous amount of energy in the process.\n\nThis explosion marked the beginning of our'], 'elapsed': 31.375819206237793}


Here's one:

Why couldn't the bicycle stand up by itself?

(Wait for it...)

Because it was two-tired!

Hope that made you smile!


Hello there!

So, you

[None, None]

In [None]:

## Now read in the generate text function from the primary script
from primary.generate_text import generate_text