In [None]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline

# Load model and tokenizer
model = AutoModelForCausalLM.from_pretrained(
    "microsoft/Phi-3-mini-4k-instruct",
    device_map="cuda",
    torch_dtype="auto",
    trust_remote_code=True,
)
tokenizer = AutoTokenizer.from_pretrained("microsoft/Phi-3-mini-4k-instruct")

# Create a pipeline
pipe = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    return_full_text=False,
    max_new_tokens=500,
    do_sample=False,
)

## Instruction-Based Prompting
This is the most straightforward form of prompting, where you give the model clear, direct instructions.

In [None]:
# PROMPT
query = "List three benfits of yoga"

messages = [
    {"role": "user", "content": query}
]

# Generate the output
outputs = pipe(messages)
print(outputs[0]["generated_text"])

## In-Context Learning
In-context learning is where you provide examples in the prompt to guide the model on the desired format or tone.

In [None]:
# PROMPT
few_shot_prompt = [
    {"role": "user", "content": "Classify the price of the item"},
    {"role": "user", "content": "Yacht"},
    {"role": "assistant", "content": "Expensive"},
    {"role": "user", "content": "Pencil"},
    {"role": "assistant", "content": "Cheap"},
    {"role": "user", "content": "Diamond"},
    {"role": "assistant", "content": "Expensive"},
    {"role": "user", "content": "Burger"}
]

# Generate the output
outputs = pipe(few_shot_prompt)
print(outputs[0]["generated_text"])

## Chain Prompting
Chain prompting is about breaking down a task into a sequence of smaller tasks, where each answer informs the next question.

In [None]:
# Get a startup idea
messages = [
    {"role": "user", "content": "Suggest one real world problem to focus to build a startup"}
]
startup_idea = pipe(messages)[0]["generated_text"]
startup_idea

In [None]:
# Get startup's name and slogan 
messages = [
    {"role": "user", "content": f"Suggest a catchy name and slogan for the following startup idea: {startup_idea}"}
]
startup_description = pipe(messages)[0]["generated_text"]
startup_description

In [None]:
# Create funding pitch for startup
messages = [
    {"role": "user", "content": f"Create a small pitch to get funding for {startup_description}"}
]
startup_pitch = pipe(messages)[0]["generated_text"]
startup_pitch

## Chain of Thought Reasoning
Chain of thought (CoT) prompting encourages the model to "think aloud," or provide a step-by-step reasoning process.

In [None]:
# Answering with chain-of-thought
cot_prompt = [
    {"role": "user", "content": "Roger has 5 tennis balls. He buys 2 more cans of tennis balls. Each can has 3 tennis balls. How many tennis balls does he have now?"},
    {"role": "assistant", "content": "Roger started with 5 balls. 2 cans of 3 tennis balls each is 6 tennis balls. 5 + 6 = 11. The answer is 11."},
    {"role": "user", "content": "The cafeteria had 23 apples. If they used 20 to make lunch and bought 6 more, how many apples do they have?"}
]

# Generate the output
outputs = pipe(cot_prompt)
print(outputs[0]["generated_text"])

## Tree of Thought Reasoning
Tree of thought (ToT) involves exploring multiple reasoning paths to reach the best outcome. Think of it like a decision tree.

In [None]:
# Tree-of-Thought Prompt
zeroshot_tot_prompt = [
    {"role": "user", "content": "Imagine two different experts are answering this question. All experts will write down 1 step of their thinking, then share it with the group. Then all experts will go on to the next step, etc. There shall be a maximum of 4 steps. If any expert realises they're wrong at any point then they leave. The question is 'Which company is better for investing 1000 dollars? Nvidia or AMD.' Make sure to discuss the results."}
]

# Generate the output
outputs = pipe(zeroshot_tot_prompt)
print(outputs[0]["generated_text"])

## Self-Consistency
Self-consistency involves generating multiple responses and choosing the most common answer to reduce variability and improve reliability.

In [None]:
# Self consistency prompt
query = [
    {"role": "user", "content": "If 1+5 = -6 and 2+3 = -5, how much is 4+2? Do not explain, just give the answer"}
]

# Store all the responses after multiple runs
responses = [pipe(query)[0]['generated_text'] for _ in range(3)]
print("Answers:", responses)