# Lab-1-Introduce Phi-3 Instruct

## Download Phi-3-mini-4k-instruct optimized model

In [None]:
import huggingface_hub as hf_hub
from pathlib import Path

llm_model_id = "OpenVINO/Phi-3-mini-4k-instruct-int4-ov"
llm_model_path = "../model/phi-3-mini-4k-instruct-ov"

if not Path(llm_model_path).exists():
    hf_hub.snapshot_download(llm_model_id, local_dir=llm_model_path)

## Text-completion with Optimum-intel

In [None]:
from transformers import AutoConfig, AutoTokenizer
from optimum.intel.openvino import OVModelForCausalLM

In [None]:
ov_config = {"PERFORMANCE_HINT": "LATENCY", "NUM_STREAMS": "1", "CACHE_DIR": ""}

ov_model = OVModelForCausalLM.from_pretrained(
    llm_model_path,
    device='gpu',
    ov_config=ov_config,
    config=AutoConfig.from_pretrained(llm_model_path, trust_remote_code=True),
    trust_remote_code=True,
)

In [None]:
tok = AutoTokenizer.from_pretrained(llm_model_path, trust_remote_code=True)

In [None]:
tokenizer_kwargs =  {"add_special_tokens": False}

In [None]:
prompt = "<|system|>You are a helpful AI assistant.<|end|><|user|>can you introduce yourself?<|end|><|assistant|>"
input_tokens = tok(prompt, return_tensors="pt", **tokenizer_kwargs)

In [None]:
answer = ov_model.generate(**input_tokens, max_new_tokens=1024)

In [None]:
tok.batch_decode(answer, skip_special_tokens=True)[0]

## Text-completion with GenAI API

In [None]:
import openvino_genai as ov_genai

pipe = ov_genai.LLMPipeline(llm_model_path, "GPU")

In [None]:
prompt = "<|system|>You are a helpful AI assistant.<|end|><|user|>can you introduce yourself?<|end|><|assistant|>"
answer = pipe.generate(prompt, eos_token_id=32007, max_length=200)
print(answer)

### Streaming

In [None]:
def streamer(subword):
    print(subword, end='', flush=True)
    return False

pipe.generate(prompt, eos_token_id=32007, max_length=200, streamer=streamer)

### More examples: [llm-chatbot](https://github.com/openvinotoolkit/openvino_notebooks/tree/latest/notebooks/llm-chatbot)