In [None]:
import subprocess
import sys

# Define the path to the shell script
llama_deps_script_path = "../session/install-deps/install_llama_deps.sh"

try:
    # Run the shell script
    result = subprocess.run(["sh", llama_deps_script_path], capture_output=True, text=True, check=True)

    # Print the output
    print("Output:", result.stdout)
except subprocess.CalledProcessError as e:
    # Handle errors in the subprocess
    print("Error:", e.stderr)
    sys.exit(e.returncode)

In [None]:
!pip install -r ../session/install-deps/requirements.txt

In [None]:
from llama_cpp import Llama
from huggingface_hub import hf_hub_download
import warnings
warnings.filterwarnings("ignore")
import json
import os


# Define the model 
GEN_AI_MODEL_REPO = "TheBloke/Llama-2-13B-chat-GGUF"
GEN_AI_MODEL_FILENAME = "llama-2-13b-chat.Q5_0.gguf"

def load_llama_model():
    gen_ai_model_path = hf_hub_download(repo_id=GEN_AI_MODEL_REPO, filename=GEN_AI_MODEL_FILENAME)
    print("path is:")
    print(gen_ai_model_path)
    llama2_model = Llama(
        model_path=gen_ai_model_path,
        n_gpu_layers=64,
        n_ctx=2000
    )
    return llama2_model


llama2_model = load_llama_model()

# Pass through user input to LLM model with enhanced prompt and stop tokens
def generate_response(json_input):

    try:
        # Assuming json_input is your dictionary
        json_input_str = json.dumps(json_input)
        data = json.loads(json_input_str)
        print("json.loads:")
        print(data)
        question = "Answer this question based on given context: " + data['prompt'] + " "
        context = " Here is the context: " + str(data['context'])
        question_and_context = question + context

        params = {
            "temperature": float(data['temperature']),
            "max_tokens": int(data['max_tokens'])
        }
        response = llama2_model(prompt=question_and_context, **params)

        model_out = response['choices'][0]['text']
        return model_out
    
    except Exception as e:
        print(e)
        return e


In [None]:
# Define the input data
input_data = {
    "prompt": "What is Cloudera Machine Learning?",
    "temperature": 1,
    "max_tokens": 50,
    "context": "Cloudera Machine Learning is a platform for machine learning and analytics that runs in the public cloud or on-premises.",
    "user": "genius"
}

# Call the function with the input data
result = generate_response(input_data)

# Print the result
print(result)