<a href="https://colab.research.google.com/github/priteshshah96/HaikuGenerator/blob/main/haiku_generator_running.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [3]:
# Import required modules and mount Google Drive
from google.colab import drive
import os

# Mount Google Drive
drive.mount('/content/drive')

# Define the path in Google Drive where you want to save the model
model_save_path = '/content/drive/My Drive/HaikuGenerator'

# Ensure the folder exists
os.makedirs(model_save_path, exist_ok=True)

# Install additional Python packages
!pip install huggingface_hub sentence-transformers
!CMAKE_ARGS="-DLLAMA_CUBLAS=on" FORCE_CMAKE=1 pip install git+https://github.com/robgon-art/llama-cpp-python.git --force-reinstall --upgrade --no-cache-dir --verbose
!pip uninstall -y numpy
!pip install numpy==1.25.2


Mounted at /content/drive
Collecting huggingface_hub
  Downloading huggingface_hub-0.19.1-py3-none-any.whl (311 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m311.1/311.1 kB[0m [31m6.7 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting sentence-transformers
  Downloading sentence-transformers-2.2.2.tar.gz (85 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m86.0/86.0 kB[0m [31m12.6 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting transformers<5.0.0,>=4.6.0 (from sentence-transformers)
  Downloading transformers-4.35.0-py3-none-any.whl (7.9 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.9/7.9 MB[0m [31m33.1 MB/s[0m eta [36m0:00:00[0m
Collecting sentencepiece (from sentence-transformers)
  Downloading sentencepiece-0.1.99-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.3 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.3/1.3 MB[0m [31

In [1]:
# Redefining the model save path after the runtime restart
model_save_path = '/content/drive/My Drive/HaikuGenerator'


In [2]:
# Import hf_hub_download after restarting runtime
from huggingface_hub import hf_hub_download

# Define model details
model_name_or_path = "robgonsalves/llama-2-13b-deep-haiku-GGML"
model_basename = "llama-2-13b-deep-haiku.ggml.q5_k_m.ggml"

# Download the model and save it to Google Drive
model_path = hf_hub_download(repo_id=model_name_or_path, filename=model_basename, cache_dir=model_save_path)


Downloading (…)iku.ggml.q5_k_m.ggml:   0%|          | 0.00/9.23G [00:00<?, ?B/s]

In [3]:
model_path = '/content/drive/My Drive/HaikuGenerator/models--robgonsalves--llama-2-13b-deep-haiku-GGML/snapshots/f96462bed5819e070e227e03ae2fd6763cd794e7/llama-2-13b-deep-haiku.ggml.q5_k_m.ggml'


In [4]:
!pip install langchain
!pip install llama_index huggingface_hub sentence-transformers



Collecting langchain
  Downloading langchain-0.0.335-py3-none-any.whl (2.0 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.0/2.0 MB[0m [31m6.6 MB/s[0m eta [36m0:00:00[0m
Collecting dataclasses-json<0.7,>=0.5.7 (from langchain)
  Downloading dataclasses_json-0.6.2-py3-none-any.whl (28 kB)
Collecting jsonpatch<2.0,>=1.33 (from langchain)
  Downloading jsonpatch-1.33-py2.py3-none-any.whl (12 kB)
Collecting langsmith<0.1.0,>=0.0.63 (from langchain)
  Downloading langsmith-0.0.63-py3-none-any.whl (45 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m45.3/45.3 kB[0m [31m5.8 MB/s[0m eta [36m0:00:00[0m
Collecting marshmallow<4.0.0,>=3.18.0 (from dataclasses-json<0.7,>=0.5.7->langchain)
  Downloading marshmallow-3.20.1-py3-none-any.whl (49 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m49.4/49.4 kB[0m [31m6.3 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting typing-inspect<1,>=0.4.0 (from dataclasses-json<0.7,>=0.5.7->langcha

# Model Initialization and Usage
This part involves initializing the model and using it for generating responses.

In [5]:
from langchain.embeddings import HuggingFaceEmbeddings
from llama_index import (
    SimpleDirectoryReader,
    VectorStoreIndex,
    ServiceContext,
)
from llama_index.llms import LlamaCPP
from llama_index.llms.llama_utils import messages_to_prompt, completion_to_prompt

In [6]:
# Initialize LlamaCPP model
llm = LlamaCPP(
    model_path=model_path,  # Model path from Google Drive
    temperature=0.7,
    max_new_tokens=75,
    context_window=3900,
    model_kwargs={"n_gpu_layers": 43},
    messages_to_prompt=messages_to_prompt,
    completion_to_prompt=completion_to_prompt,
    verbose=True,
)



AVX = 1 | AVX2 = 1 | AVX512 = 0 | AVX512_VBMI = 0 | AVX512_VNNI = 0 | FMA = 1 | NEON = 0 | ARM_FMA = 0 | F16C = 1 | FP16_VA = 0 | WASM_SIMD = 0 | BLAS = 1 | SSE3 = 1 | SSSE3 = 1 | VSX = 0 | 


In [7]:
# # Create a prompt and get responses
topic = "sun"
prompt = f"[INST] <> write a haiku in 5-7-5 syllable about the topic for me now. <> {topic} [/INST]"

response_iter = llm.stream_complete(prompt)

for response in response_iter:
  print(response.delta, end="", flush=True)

 The sun is out, oh. / A lovely day to be alive. / I'm so happy today. 

In [9]:
!pip install flask
!pip install flask-ngrok
!pip install ngrok


Collecting ngrok
  Downloading ngrok-0.12.0-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (2.9 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.9/2.9 MB[0m [31m29.5 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: ngrok
Successfully installed ngrok-0.12.0


In [12]:
!wget -q -c -nc https://bin.equinox.io/c/4VmDzA7iaHb/ngrok-stable-linux-amd64.zip
!unzip -qq -n ngrok-stable-linux-amd64.zip

In [14]:
from google.colab import userdata

# Retrieve the secret key
authtoken = userdata.get('authtoken')  # Replace 'ngrokToken' with the actual name of your secret key

# Use the authtoken with ngrok
!./ngrok authtoken {authtoken}

Authtoken saved to configuration file: /root/.ngrok2/ngrok.yml


In [None]:
from flask import Flask, request
from flask_ngrok import run_with_ngrok

app = Flask(__name__)
run_with_ngrok(app)

@app.route("/generate_haiku", methods=['GET'])
def get_haiku():
    topic = request.args.get('topic')
    prompt = f"[INST] <> Please Write me a haiku using 5, 7, 5 syllable format about a given topic using graphemes.<>{topic} [/INST]"
    # language model loaded
    response_iter = llm.stream_complete(prompt)

    haikus = [''.join(response.delta for response in response_iter)]
    return {'haikus': haikus}

if __name__ == '__main__':
    app.run()



 * Serving Flask app '__main__'
 * Debug mode: off


 * Running on http://127.0.0.1:5000
INFO:werkzeug:[33mPress CTRL+C to quit[0m


 * Running on http://7a7a-104-198-100-206.ngrok-free.app
 * Traffic stats available on http://127.0.0.1:4040


INFO:werkzeug:127.0.0.1 - - [13/Nov/2023 18:29:54] "GET /generate_haiku?topic=summer HTTP/1.1" 200 -
INFO:werkzeug:127.0.0.1 - - [13/Nov/2023 18:30:38] "GET /generate_haiku?topic=home+away+from+home HTTP/1.1" 200 -
INFO:werkzeug:127.0.0.1 - - [13/Nov/2023 18:40:06] "GET /generate_haiku?topic=home+alone HTTP/1.1" 200 -
INFO:werkzeug:127.0.0.1 - - [13/Nov/2023 18:40:23] "GET /generate_haiku?topic=dark HTTP/1.1" 200 -
INFO:werkzeug:127.0.0.1 - - [13/Nov/2023 18:41:08] "GET /generate_haiku?topic=wisdom+of+old HTTP/1.1" 200 -
INFO:werkzeug:127.0.0.1 - - [13/Nov/2023 18:41:45] "GET /generate_haiku?topic=poem+of+joy HTTP/1.1" 200 -
INFO:werkzeug:127.0.0.1 - - [13/Nov/2023 18:42:10] "GET /generate_haiku?topic=winter+break HTTP/1.1" 200 -
INFO:werkzeug:127.0.0.1 - - [13/Nov/2023 18:42:24] "GET /generate_haiku?topic=chatgpt HTTP/1.1" 200 -
ERROR:root:Unexpected exception finding object shape
Traceback (most recent call last):
  File "/usr/local/lib/python3.10/dist-packages/google/colab/_debugpy_