# Import Libraries

In [1]:
from __future__ import annotations
from typing import Iterable
import gradio as gr
from gradio.themes.base import Base
from gradio.themes.utils import colors, fonts, sizes

from llama_cpp import Llama

# Load GPT4all model

In [2]:
llm = Llama(model_path="./models/gpt4all-lora-quantized-ggml.bin", seed=0)

AVX = 1 | AVX2 = 1 | AVX512 = 0 | AVX512_VBMI = 0 | AVX512_VNNI = 0 | FMA = 1 | NEON = 0 | ARM_FMA = 0 | F16C = 1 | FP16_VA = 0 | WASM_SIMD = 0 | BLAS = 0 | SSE3 = 1 | VSX = 0 | 


# Define prompt setup

In [3]:
ins = '''### Instruction:
{}
### Response:
'''

def generate(instruction, max_tokens=256, temperature=0.75, top_p=0.9, repeat_penalty=1.1, top_k=40): 
    result = ""
    for x in llm(ins.format(instruction), max_tokens= max_tokens, top_p= top_p,repeat_penalty=repeat_penalty,top_k=top_k, stop=['### Instruction:', '### End'], stream=True):
        result += x['choices'][0]['text']
        yield result

# Gradio Interface

In [17]:
with gr.Blocks(theme=gr.themes.Soft(font=[fonts.GoogleFont("Quicksand"),"ui-sans-serif","sans-serif"]), analytics_enabled=False) as gpt4all_interface:
    with gr.Column():
        gr.Markdown(
            """ ## GPT4ALL
            
            An ecosystem of open-source chatbots trained on a massive collections of clean assistant data including code, stories and dialogue
            
            Type your question in the 'Question box' below and click the 'Generate' button to generate answers to your questions!
            
      """
        )

        with gr.Row():
            with gr.Column(scale=3):
                instruction = gr.Textbox(placeholder="Enter your question here", label="Question", elem_id="q-input")
                max_tokens = gr.Slider(10, 512, label="Max Tokens", interactive=True,step=2, value=250, info="Length of sequences/tokens to be generated")
                temperature = gr.Slider(0.1, 1.0, label="Creativity Quotient", interactive=True, step=0.02, value=0.8, info="Temperature for sampling - Higher the temperature, higher the randomness in content")
                with gr.Accordion(label='Advanced options', open=False):
                    top_p = gr.Slider(0.0, 1.0, label="Top P", step=0.02, value=0.95, info="Top-p value for sampling")
                    repeat_penalty = gr.Slider(0.0, 3.0, label="Repeat Penalty", step=0.1, value=1.1, info="Penalty for repeated tokens")
                    top_k = gr.Slider(0, 80, label="Top K", step=2, value=40, info="Top_k value for sampling")

                with gr.Box():
                    gr.Markdown("**Answer**")
                    output = gr.Markdown(elem_id="q-output")
                submit = gr.Button("Generate", variant="primary")

    submit.click(generate, inputs=[instruction,max_tokens,temperature,top_p,repeat_penalty,top_k], outputs=[output])
    instruction.submit(generate, inputs=[instruction,max_tokens,temperature,top_p,repeat_penalty,top_k], outputs=[output])

gpt4all_interface.queue(concurrency_count=1).launch(debug=True, inbrowser=True)

Running on local URL:  http://127.0.0.1:7860

To create a public link, set `share=True` in `launch()`.


Keyboard interruption in main thread... closing server.


