In [None]:
# Install All the Required Packages -- takes about 2-3 minutes
!CMAKE_ARGS="-DLLAMA_CUBLAS=on" FORCE_CMAKE=1 pip install llama-cpp-python==0.1.78 numpy==1.23.4 --force-reinstall --upgrade --no-cache-dir --verbose
!pip install huggingface_hub
!pip install llama-cpp-python==0.1.78
!pip install numpy==1.23.4

!pip install -Uqq WhisperSpeech==0.7.2

!pip install gradio

In [None]:
# Import All the Required Libraries
from huggingface_hub import hf_hub_download
from llama_cpp import Llama

import torch
import torch.nn.functional as F
from whisperspeech.pipeline import Pipeline

import gradio as gr


In [None]:
# Declare LLM model
model_name_or_path = "TheBloke/Llama-2-13B-chat-GGML"
# model_name_or_path = "meta-llama/Meta-Llama-3-8B"
model_basename = "llama-2-13b-chat.ggmlv3.q5_1.bin" # the model is in bin format

# Download LLM model
model_path = hf_hub_download(repo_id=model_name_or_path, filename=model_basename)

#Load the LLM GPU model
lcpp_llm = None
lcpp_llm = Llama(
    model_path=model_path,
    n_threads=2, # CPU cores
    n_batch=512, # Should be between 1 and n_ctx, consider the amount of VRAM in your GPU.
    n_gpu_layers=32 # Change this value based on your model and your GPU VRAM pool.
    )

# Check number of layers in GPU
print(lcpp_llm.params.n_gpu_layers)

# Takes about 1 min

In [None]:
# Declare whisper tts model -- takes about 30sec
pipe = Pipeline(s2a_ref='collabora/whisperspeech:s2a-q4-tiny-en+pl.model')


In [None]:
def generate_story(prompt):
  response=lcpp_llm(prompt=prompt, max_tokens=256, temperature=0.5, top_p=0.95,
                  repeat_penalty=1.2, top_k=150,
                  echo=True)
  story = response["choices"][0]["text"].strip()

  with torch.no_grad():
    pipe.generate_to_file('testing2.wav',story.partition("\n\n")[2])#, speaker='https://upload.wikimedia.org/wikipedia/commons/7/75/Winston_Churchill_-_Be_Ye_Men_of_Valour.ogg')
  return story.partition("\n\n")[2], 'testing2.wav'


In [None]:
# Define CSS styles for gradio app
css = """
div {justify-content: center; /* Center elements horizontally */}
.gradio-container {
  background-image: url("https://images.unsplash.com/reserve/LJIZlzHgQ7WPSh5KVTCB_Typewriter.jpg?q=80&w=1896&auto=format&fit=crop&ixlib=rb-4.0.3&ixid=M3wxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8fA%3D%3D");
  background-size: cover;
  background-repeat: no-repeat;
  background-position: center;  /* Optional: adjust image position */
  justify-content: center;

}

.gr-textbox {
  #width: 100px;  /* Set width to 300 pixels */
  opacity: 0.2;
  background: none;
  top:5500px;
  padding:100;
  margin: 30px;
  justify-content: center;
}
.gr-body {
  margin: 30px;
}
.center-text {
text-align: center;
justify-content: center;
align-items: center;
}
.grid {
  display: grid;
  grid-template-columns: minmax(20px, 1fr) repeat(6, minmax(0, 60px)) minmax(20px, 1fr);
  grid-gap: 10px;
}

.full-width {
  grid-column: 1 / -1;
}

.wrapper {
  grid-column: 2 / -2;
}

.left-edge {
  grid-column: 1 / -2;
}

.right-wrapper {
  grid-column: 4 / -2;
}
"""


In [None]:
# Build the Gradio demo app
with gr.Blocks(css=css, js = js) as demo:
    used_letters_var = gr.State([])
    with gr.Row() as row:
        with gr.Column():
            prompt = gr.Textbox(lines = 3,label="Enter your prompt!", min_width = 100)
            btn = gr.Button("Generate story")
        with gr.Column():
            story_text = gr.Textbox(
                label="Here's your story")
            story_audio = gr.Audio(label="Listen to your story!")

    btn.click(
        generate_story,
        inputs = prompt,
        outputs = [story_text,story_audio],
        )
demo.launch()