## Install the Quantized Llama Model from huging face

## Step 1: Install All Required Packages

In [5]:
# for GPU use
!CT_CUBLAS=1 pip install ctransformers --no-binary ctransformers

Collecting ctransformers
  Downloading ctransformers-0.2.27.tar.gz (376 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m376.1/376.1 kB[0m [31m6.0 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Collecting huggingface-hub (from ctransformers)
  Downloading huggingface_hub-0.19.4-py3-none-any.whl (311 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m311.7/311.7 kB[0m [31m17.3 MB/s[0m eta [36m0:00:00[0m
Building wheels for collected packages: ctransformers
  Building wheel for ctransformers (pyproject.toml) ... [?25l[?25hdone
  Created wheel for ctransformers: filename=ctransformers-0.2.27-cp310-cp310-linux_x86_64.whl size=2338848 sha256=f5db187dcc082f516c161a2ccb16816ae6642ab0e2f65ac79d896e52862d6330
  Stored in directory: /root/.cache/pip/wheels/dd/54/e9/32364da8eee84a2b0b412394983c1

In [6]:
model_id = "TheBloke/Llama-2-13B-chat-GGML"

In [8]:
from ctransformers import AutoModelForCausalLM

# check ctransformers doc for more configs
config = {'max_new_tokens': 256, 'repetition_penalty': 1.1,
          'temperature': 0.1, 'stream': True}

llm = AutoModelForCausalLM.from_pretrained(
      model_id,
      model_type="llama",
      #lib='avx2', for cpu use
      gpu_layers=130, #110 for 7b, 130 for 13b
      **config
      )

Fetching 1 files:   0%|          | 0/1 [00:00<?, ?it/s]

(…)b6b562cd87ee3d7f07109b014dd0/config.json:   0%|          | 0.00/29.0 [00:00<?, ?B/s]

Fetching 1 files:   0%|          | 0/1 [00:00<?, ?it/s]

llama-2-13b-chat.ggmlv3.q2_K.bin:   0%|          | 0.00/5.51G [00:00<?, ?B/s]

In [9]:
prompt="""Generate a recipe using the folowing ingredients ; Chicken, Veggie Stir-Fry with artichokes and cashews"""

In [10]:
tokens = llm.tokenize(prompt)

In [11]:
llm(prompt, stream=False)

'.\n\nHere is the recipe you requested:\n\nChicken Veggie Stir-Fry with Artichokes and Cashews\n\nIngredients:\n\n* 1 lb boneless, skinless chicken breasts, cut into bite-sized pieces\n* 2 medium artichokes, trimmed and chopped\n* 1/2 cup veggie stir-fry mix (such as broccoli, carrots, and bell peppers)\n* 1/4 cup unsalted cashews\n* 2 tablespoons olive oil\n* 1 teaspoon soy sauce\n* Salt and pepper to taste\n\nInstructions:\n\n1. Heat the olive oil in a large skillet or wok over medium-high heat.\n2. Add the chicken to the skillet and cook until browned, about 5 minutes. Remove from the skillet and set aside.\n3. Add the chopped artichokes to the skillet and cook for 2-3 minutes, or until tender.\n4. Add the veggie stir-fry mix to the skillet and cook for an'

In [13]:
import time

start = time.time()
NUM_TOKENS = 0

print('-' * 4 + 'Start Generation' + '-' * 4)

# Assuming llm is your Llama model
for token in llm.generate(tokens):
    print(llm.detokenize(token), end='', flush=True)
    NUM_TOKENS += 1

time_generate = time.time() - start

print('\n')
print('-' * 4 + 'End Generation' + '-' * 4)
print(f'Num of generated tokens: {NUM_TOKENS}')
print(f'Time for complete generation: {time_generate}s')
print(f'Tokens per second: {NUM_TOKENS / time_generate}')
print(f'Time per token: {(time_generate / NUM_TOKENS) * 1000}ms')


----Start Generation----
.

Here is the recipe you requested:

Chicken Veggie Stir-Fry with Artichokes and Cashews

Ingredients:

* 1 lb boneless, skinless chicken breasts, cut into bite-sized pieces
* 2 medium artichokes, trimmed and chopped
* 1/2 cup veggie stir-fry mix (such as broccoli, carrots, and bell peppers)
* 1/4 cup unsalted cashews
* 2 tablespoons olive oil
* 1 teaspoon soy sauce
* Salt and pepper to taste

Instructions:

1. Heat the olive oil in a large skillet or wok over medium-high heat.
2. Add the chicken to the skillet and cook until browned, about 5 minutes. Remove from the skillet and set aside.
3. Add the chopped artichokes to the skillet and cook for 2-3 minutes, or until tender.
4. Add the veggie stir-fry mix to the skillet and cook until tender, about 3-4 minutes.
5. Add the chicken back to the skillet and stir in the soy sauce. Cook for an additional 2-3 minutes, until heated through.
6. Serve hot, garnished with unsalted cashews.

This recipe is a quick and ea