In [4]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer

def load_phi3_instruct(model_id: str = "microsoft/Phi-3-mini-4k-instruct"):
    """
    Load the Phi-3 instruct model and its tokenizer.
    Returns (tokenizer, model) placed on an appropriate device.
    """
    device = "cuda" if torch.cuda.is_available() else "cpu"

    tokenizer = AutoTokenizer.from_pretrained(model_id)

    model = AutoModelForCausalLM.from_pretrained(
        model_id,
        torch_dtype="auto",
        trust_remote_code=False,
    ).to(device)

    return tokenizer, model, device


def chat_with_model(tokenizer, model, device, chat_history, max_new_tokens: int = 128):
    """
    Given a list of chat messages, run one completion and return the model's reply text.
    """
    # Turn the structured messages into a single text prompt using the model's chat template
    prompt_text = tokenizer.apply_chat_template(
        chat_history,
        tokenize=False,
        add_generation_prompt=True,
    )

    # Tokenize and move inputs to the right device
    inputs = tokenizer(prompt_text, return_tensors="pt").to(device)

    # Generate continuation
    output_ids = model.generate(
        **inputs,
        max_new_tokens=max_new_tokens,
        do_sample=False,
    )

    # We only want the newly generated tokens, not the original prompt part
    generated_ids = output_ids[0, inputs["input_ids"].shape[1]:]
    reply = tokenizer.decode(generated_ids, skip_special_tokens=True)

    return reply.strip()


if __name__ == "__main__":
    # Step 1: Load model + tokenizer
    tokenizer, model, device = load_phi3_instruct()

    # Step 2: Define the conversation input
    conversation = [
        {"role": "user", "content": "Whats is Generative AI."}
    ]

    # Step 3: Ask the model for a response
    answer = chat_with_model(tokenizer, model, device, conversation, max_new_tokens=100)

    # Step 4: Print only the model's reply
    print(answer)

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Learning about Generative AI is important for several reasons:

1. Innovation and Creativity: Generative AI can help humans create new ideas, designs, and content by generating novel outputs. This can lead to innovation in various fields such as art, music, literature, and design.

2. Efficiency and Productivity: Generative AI can automate repetitive tasks, freeing up time for humans to focus on more complex and creative work
