# Text-generation

### Setup modules and download model

In [None]:
!pip install llama-cpp-python
!pip install huggingface-hub

llama.cpp enables LLM inference with minimal setup and 
state-of-the-art performance on a wide variety of hardware.
   
References: 

https://github.com/abetlen/llama-cpp-python?tab=readme-ov-file
https://github.com/ggerganov/llama.cpp#build
https://llama-cpp-python.readthedocs.io/en/latest/api-reference/
https://github.com/ggerganov/llama.cpp/blob/master/docs/build.md

In [None]:
from llama_cpp import Llama
import os
import random

In [None]:
# Make a directory to save models
if not os.path.exists('../models'):  
    os.mkdir('../models')

**Download quantized model and move it into folder "models".**

You can find quantization versions here:

https://huggingface.co/lmstudio-community/Meta-Llama-3.1-8B-Instruct-GGUF
https://huggingface.co/QuantFactory/Meta-Llama-3.1-8B-Instruct-GGUF

Info about GGUF format and quantization versions:

https://huggingface.co/docs/hub/en/gguf

Info about Meta-Llama-3.1-8B-Instruct:

https://huggingface.co/meta-llama/Meta-Llama-3.1-8B-Instruct

In [None]:
# Model settings
model_directory = '../models/'
model_name = "Meta-Llama-3.1-8B-Instruct.Q4_K_M.gguf"
repo_id = "QuantFactory/Meta-Llama-3.1-8B-Instruct-GGUF"

In [None]:
# Optional: Uncomment for pulling model from hugging face directly
#if not os.path.exists(model_directory + model_name):
 #   llm = Llama.from_pretrained(
  #      repo_id=repo_id, \
   #     filename=model_name, \
    #    local_dir = "../models/")

### Load model

In [None]:
# Load model
llm = Llama(model_path = model_directory + model_name,
            n_threads = 8, # Set the number of threads to use during generation.
            n_ctx = 2048, # Set the size of the prompt context (default: 512).
            seed = -1, # RNG seed, -1 for random.
            verbose = False, # Print verbose output to stderr.           
)

### Story parameters

In [None]:
# Set story parameters
topic = "happy horses"
prompt_user = "The horses should fly."
language_name = "English"
word_count = ["150", "450", "750", "1500", "2250", "3000", "4500"] # [1, 3, 5, 10, 15, 20, 30] min
main_character = ["Liam", "Olivia", "Noah", "Emma", "Aiden", "Amelia", "Sophia", "Jackson", "Ava", 
                  "Lucas", "Mohammed", "Fatima", "Ali", "Aisha", "Hassan", "Aya", "Yusuf", "Mei", "Hiroshi", 
                  "Sakura", "Ethan", "Mia", "James", "Harper", "Benjamin", "Evelyn", "Elijah", "Abigail", 
                  "Logan", "Emily", "Alexander", "Ella", "Sebastian", "Elizabeth", "William", "Sofia", 
                  "Daniel", "Avery", "Matthew", "Scarlett", "Henry", "Grace", "Michael", "Chloe", "Jackson", 
                  "Victoria", "Samuel", "Riley", "David", "Aria", "José", "María", "Juan", "Ana", "Mateo", 
                  "Santiago", "Valentina", "Lucía"]
setting = ["in the forest", "on an island", "on the moon", "in a medieval village", "under the sea", "in a magical kingdom",
           "in a jungle", "in a spaceship", "in a circus", "in a pirate ship", "in a futuristic city", "in a candy land", ]
age_range = 2 # 0: "0-2", 1: "2-5", 2: "5-7", 3: "7-12"
age_groups_authors = {
    "0-2": ["Eric Carle", "Sandra Boynton", "Margaret Wise Brown", "Karen Katz", "Leslie Patricelli"],
    "2-5": ["Dr. Seuss", "Julia Donaldson", "Beatrix Potter", "Maurice Sendak", "Eric Carle"],
    "5-7": ["Roald Dahl", "Mo Willems", "Dav Pilkey", "E.B. White", "Beverly Cleary"],
    "7-12": ["J.K. Rowling", "Rick Riordan", "Jeff Kinney", "Roald Dahl", "C.S. Lewis"]
}
moral = ["friendship", "diversity", "empathy", "respect", "courage", "honesty", "teamwork", "kindness", "integrity"]

### Prompt generation

In [None]:
# Set initial prompt
prompt_initial = f"""    
    Develop a prompt that enables large language models to create engaging and age-appropriate stories for children in {language_name}.
    Generate an enhanced prompt with the following key points and do not ignore these: 
    - Generate an entire story with approximately {word_count[0]} words for children aged {list(age_groups_authors.keys())[age_range]} about {topic} with a playful tone and narrative writing style like {random.choice(age_groups_authors[list(age_groups_authors.keys())[age_range]])}. 
    - {prompt_user}
    - Start with a meaningful title.
    - The main character is {random.choice(main_character)}. 
    - The story takes place {random.choice(setting)}.  
    - The story should be set in a world that is both familiar and unknown to the child reader. 
    - The story should incorporate a moral lesson about the importance of {random.choice(moral)}.
    - End the story with the saying: "The end!"
"""

In [None]:
print(prompt_initial)

In [None]:
# Prompt generation
output = llm.create_chat_completion( messages = 
        [{"role": "system", "content": """
         You are an assistant specialized in creating prompts for large language models. 
         Your focus is on generating prompts that helps large language models craft stories specifically for children.
         Your task is to generate prompts exclusively. Do not write stories and do not ask questions.
         Just create the prompt within quotation marks and do not write something like: "Here is a prompt that meets the requirements" or "This prompt should enable the large language model to generate a story that meets all the requirements, including the tone, style, and key elements specified.".
         """},
        {"role": "user", "content": prompt_initial}],
        #temperature = 0.9, # Adjust the randomness of the generated text (default: 0.8).
        #top_p = 0.95, # Limit the next token selection to a subset of tokens with a cumulative probability above a threshold P (default: 0.9).
        #top_k = 50, # Limit the next token selection to the K most probable tokens (default: 40).
        #min_p = 0.05, # https://github.com/ggerganov/llama.cpp/pull/3841 (default: 0.05)
        #typical_p = 1.0, # https://arxiv.org/abs/2202.00666 (default: 1.0)
        #repeat_penalty = 1.1 # The repeat-penalty option helps prevent the model from generating repetitive or monotonous text (default: 1.0, 1.0 = disabled).
        #seed = -1
        )

prompt = output["choices"][0]['message']['content']

In [None]:
print(prompt)

### Story generation

In [None]:
# Story generation
output_1 = llm.create_chat_completion( messages = [
        {"role": "system", "content": """
         You are a creative story writing assistant dedicated to crafting appropriate stories for children. 
         Your goal is to write narratives with surprising twists and happy endings.
         Easy to follow and understand, with a clear beginning, middle, and end.  
         Use only child-appropriate sources, and ensure the content is gender-neutral, inclusive, and ethically sound. 
         Adhere to ethical guidelines and avoid perpetuating harmful biases.
         Ensure that all produced stories exclude content related to hate, self-harm, sexual themes, and violence.
         Only generate the story, nothing else and always begin with a title for the story. 
         Start directly with the title and do not write something like this: "Here is a 200-word story for children aged 2-5 with a playful tone:"
         """},
        {"role": "user", "content": prompt}],
        #temperature = 0.9, # Adjust the randomness of the generated text (default: 0.8).
        #top_p = 0.95, # Limit the next token selection to a subset of tokens with a cumulative probability above a threshold P (default: 0.9).
        #top_k = 100, # Limit the next token selection to the K most probable tokens (default: 40).
        #min_p = 0.05, # https://github.com/ggerganov/llama.cpp/pull/3841 (default: 0.05)
        #typical_p = 1.0, # https://arxiv.org/abs/2202.00666 (default: 1.0)
        #repeat_penalty = 1.1 # The repeat-penalty option helps prevent the model from generating repetitive or monotonous text (default: 1.0, 1.0 = disabled).
        seed = -1
        )

story = output_1["choices"][0]['message']['content']

In [None]:
print(story)

### Simple inference example

In [None]:
# Simple inference example
output_2 = llm(
    "Listen children. Happy llamas don't spit! But, they",
    max_tokens=100, #set to None to generate up to the end of the context window
    stop=["The end", "The rest is for tomorrow."], # Stop generating just before the model would generate a new question
    echo=True # Echo the prompt back in the output
) # Generate a completion, can also call create_completion

story_2 = output_2["choices"][0]['message']['content']

In [None]:
print(story_2)

## Translation

In [None]:
# Translate story to German
output_3 = llm.create_chat_completion( messages = [
        {"role": "system", "content": """
        You are a translation assistant. You translate the English input into German.
         """},
        {"role": "user", "content": story}],
        temperature = 0.8, # Adjust the randomness of the generated text (default: 0.8).
        top_p = 0.90, # Limit the next token selection to a subset of tokens with a cumulative probability above a threshold P (default: 0.9).
        top_k = 40, # Limit the next token selection to the K most probable tokens (default: 40).
        min_p = 0.05, # https://github.com/ggerganov/llama.cpp/pull/3841 (default: 0.05)
        typical_p = 1.0, # https://arxiv.org/abs/2202.00666 (default: 1.0)
        repeat_penalty = 1.0 # The repeat-penalty option helps prevent the model from generating repetitive or monotonous text (default: 1.0, 1.0 = disabled).
        )

translation = output_3["choices"][0]['message']['content']

In [None]:
print(translation)

## General info about prompt engineering

https://promptdrive.ai/prompt-engineering/

https://www.megrisoft.com/blog/prompt-engineering-guide

https://www.youtube.com/watch?v=1c9iyoVIwDs

https://www.youtube.com/watch?v=jC4v5AS4RIM