In [None]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline

In [None]:
model = AutoModelForCausalLM.from_pretrained(
    "microsoft/Phi-3-mini-4k-instruct",
    device_map="cuda",
    torch_dtype="auto",
    trust_remote_code=False
)

tokenizer = AutoTokenizer.from_pretrained("microsoft/Phi-3-mini-4k-instruct")

In [None]:
pipe = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    return_full_text=False,
    max_new_tokens=500,
    do_sample=False
)

In [None]:
messages = [
    {"role": "user", "content": "Create a funny joke about chickens."}
]
output = pipe(messages)
print(output[0]["generated_text"])

In [None]:
# Check prompt template

prompt = pipe.tokenizer.apply_chat_template(messages, tokenize=False)
prompt

In [None]:
output = pipe(messages, do_sample=True, temperature=1)
print(output)
print()
print(output[0]["generated_text"])

In [None]:
output = pipe(messages, do_sample=True, top_p=1)
output

## Prompt Components

In [None]:
persona = "Kamu adalah ahli dalam Large Language Model. Kamu unggul dalam memecah makalah rumit menjadi makalah yang mudah dimengerti.\n"
instruction = "Ringkaslah temuan utama dari makalah yang akan diberikan.\n"
context = "Rangkuman yang dibuat harus ada poin yang paling krusial yang bisa membantu peneliti mudah mengerti informasi paling vital dalam makalahnya.\n"
data_format = "Buatkan format ringkasannya dalam bentuk urutan poin-poin yang menjelaskan metodenya. Di setiap poinnya terdapat paragraf ringkas yang menjelaskan poin utamanya.\n"
audience = "Rangkuman ini ditujukan oleh peneliti sibuk yang butuh secara cepat memahami trend terbaru dalam Large Language Model.\n"
tone = "Nadanya haruslah terlihat professional dan jelas.\n"
text = "MY TEXT TO SUMMARIZE.\n"
data = f"Text to summarize: {text}"

query = persona + instruction + context + data_format + audience + tone + text + data
query

## One-shot Example

In [None]:
one_shot_prompt = [
    {
        "role": "user",
        "content": "A 'Gigamuru' is a type of Japanese musical instrument. An example of a sentence that uses the word Gigamuru is:"
    },
    {
        "role": "system",
        "content": "I have a Gigamuru that my uncle gave me as a gift. I love to play it at home."
    },
    {
        "role": "user",
        "content": "To 'screeg' something is to swing a sword at it. An example of a sentence that uses the word screeg is: "
    }
]

print(tokenizer.apply_chat_template(one_shot_prompt, tokenize=False))
print()

outputs = pipe(one_shot_prompt)
print(outputs[0]["generated_text"])

## Chain of Instruction

In [None]:
product_prompt = [
    {
        "role": "user",
        "content": "Buatkan nama dan slogan untuk sebuah chatbot yang memanfaatkan LLM."
    }
]
outputs = pipe(product_prompt)
product_description = outputs[0]["generated_text"]
product_description

In [None]:
sales_prompt = [
    {
        "role": "user",
        "content": f"Generate sebuah sales pitch untuk produk ini: {product_description}"
    }
]
outputs = pipe(sales_prompt)
sales_pitch = outputs[0]["generated_text"]\
sales_pitch

## Chain of Tought

In [None]:
cot_prompt = [
    {
        "role": "user",
        "content": "Si A punya 5 kelereng. Dia beli 2 bungkus kelereng, setiap bungkusnya ada 3 kelereng. Berapa banyak kelereng yang dia punya sekarang ?"
    },
    {
        "role": "assistant",
        "content": "Awalnya si A punya 5 kelereng. Setiap bungkus kelereng berisi 3 kelereng. Si A beli 2 bungkus. 5 + (3 * 2) = 11."
    },
    {
        "role": "user",
        "content": "Si A beli gorengan 3. Dia beli lagi 2 bungkus gorengan, setiap bungkusnya ada 6 gorengan. Berapa gorengan si A sekarang ?"
    }
]

outputs = pipe(cot_prompt)
outputs[0]["generated_text"]

## Zero Shot Chain of Tought

In [None]:
zeroshot_cot_prompt = [
    {
        "role": "user",
        "content": "The cafeteria had 23 apples. If they used 20 to make lunch and bought 6 more, how many apples do they have? Let's think step-by-step."
    }
]

outputs = pipe(zeroshot_cot_prompt)
outputs

## Zero Shot Tree of Tought

In [None]:
zeroshot_tot_prompt = [
    {
        "role": "user",
        "content": """
        Bayangkan ada 3 orang ahli matematika yang berbeda mencoba menjawab 1 pertanyaan.
        Setiap ahli tersebut haruslah menuliskan langkah per langkah bagaimana dia berpikir untuk
        menyelesaikan permasalahannya dan harus membagikan jawabannya kepada ahli yang lain.
        Diakhir para ahli harus berdiskusi dan sepakat mana jawaban yang benar.

        Berikut pertanyaannya:
        1. Berapakah hasil dari x + 5 = 7
        """
    }
]

outputs = pipe(zeroshot_tot_prompt)
outputs

## Ouput Verification

In [None]:
import gc
import torch

# del model, tokenizer, pipe

# Flush Memory

gc.collect()
torch.cuda.empty_cache()

In [1]:
!pip install llama-cpp-python --upgrade

Collecting llama-cpp-python
  Downloading llama_cpp_python-0.3.9.tar.gz (67.9 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m67.9/67.9 MB[0m [31m24.9 MB/s[0m eta [36m0:00:00[0m:00:01[0m00:01[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Installing backend dependencies ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Collecting diskcache>=5.6.1 (from llama-cpp-python)
  Downloading diskcache-5.6.3-py3-none-any.whl.metadata (20 kB)
Downloading diskcache-5.6.3-py3-none-any.whl (45 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m45.5/45.5 kB[0m [31m2.4 MB/s[0m eta [36m0:00:00[0m
[?25hBuilding wheels for collected packages: llama-cpp-python
  Building wheel for llama-cpp-python (pyproject.toml) ... [?25l[?25hdone
  Created wheel for llama-cpp-python: filename=llama_cpp_python-0.3.9-cp311-cp311-linux_x86_64.whl size=4127103 sha256=d

In [2]:
from llama_cpp.llama import Llama

llm = Llama.from_pretrained(
    repo_id="microsoft/Phi-3-mini-4k-instruct-gguf",
    filename="*fp16.gguf",
    n_gpu_layers=-1,
    n_ctx=2048,
    verbose=False
)

Phi-3-mini-4k-instruct-fp16.gguf:   0%|          | 0.00/7.64G [00:00<?, ?B/s]

llama_context: n_ctx_per_seq (2048) < n_ctx_train (4096) -- the full capacity of the model will not be utilized


In [8]:
output = llm.create_chat_completion(
    messages=[
        {"role": "user", "content": "Create a warrior for an RPG in JSON format."}
    ],
    response_format={"type": "json_object"},
    temperature=0
)['choices'][0]['message']['content']

In [10]:
import json

json_output = json.dumps(json.loads(output), indent = 4)
print(json_output)

{
    "warrior": {
        "name": "Eldric Stormbringer",
        "class": "Warrior",
        "level": 5,
        "attributes": {
            "strength": 18,
            "dexterity": 10,
            "constitution": 16,
            "intelligence": 8,
            "wisdom": 10,
            "charisma": 12
        },
        "skills": [
            {
                "name": "Martial Arts",
                "proficiency": 18
            },
            {
                "name": "Heavy Armor",
                "proficiency": 16
            },
            {
                "name": "Swordsmanship",
                "proficiency": 17
            },
            {
                "name": "Shield Mastery",
                "proficiency": 15
            },
            {
                "name": "Survival",
                "proficiency": 12
            }
        ],
        "equipment": [
            {
                "name": "Iron Sword",
                "type": "Weapon",
                "damage": 12,
    