In [82]:
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
import torch
from huggingface_hub import login
import pytest
import accelerate

In [83]:
def load_model():
    model_id = "google/gemma-2b-it"
    try:
        print(" loading model...")
        model = AutoModelForCausalLM.from_pretrained(
            model_id,
            torch_dtype=torch.float16
        ).to("cpu")
    except RuntimeError as e:
        if "bfloat16" in str(e):
            print(" bfloat16 are not support, falling float32")
            model = AutoModelForCausalLM.from_pretrained(
                model_id,
                torch_dtype=torch.float32
            ).to("cpu")
        else:
            raise e
    tokenizer = AutoTokenizer.from_pretrained(model_id)
    if tokenizer.pad_token is None:
        tokenizer.pad_token = tokenizer.eos_token
    print("model and tokenizer loaded successfully!")
    return model, tokenizer

In [87]:
def get_response(prompt,model,tokenizer, max_length=150):
    chat= [{ "role": "user", "content": prompt }]
    formatted_prompt= tokenizer.apply_chat_template(chat, tokenize=False, add_generation_prompt=True)
    inputs = tokenizer.encode(formatted_prompt, return_tensors="pt").to("cpu")
    outputs =  model.generate(
         inputs,
         max_new_tokens = max_length,
         do_sample=True,
         temperature=0.7,
         pad_token_id = tokenizer.eos_token_id
     )
    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return response.split("assistant")[-1].strip()

In [88]:
#pytest tests
@pytest.fixture(scope="module")
def setup_model():
    """Load the model once for all tests."""
    print(" loading model")
    return load_model()

def test_factual_query(setup_model):
    """Test a simple factual question."""
    model, tokenizer = setup_model
    prompt = "What is the capital of France?"
    response= get_response(prompt, model, tokenizer, max_length=50)
    print(f"Factual response: {response}.")
    assert "Paris" in response.lower()

def test_code_generation(setup_model):
    """Test a simple code generation task."""
    model, tokenizer = setup_model
    prompt= "Write a Python function to calculate the factorial of a number."
    response = get_response(prompt, model, tokenizer, max_length=200)
    print(f" Code response:{response}")
    assert "def" in response.lower()
    assert "factorial" in response.lower()
    assert any (keyword in response.lower() for keyword in ["for", "range","if","return"])
    
    

In [90]:
# interactive demo
if __name__ == "__main__":
    print(" Starting demo")
    try:
        login()
    except Exception:
        print("already logged in or token cached.")
    model,tokenizer = load_model()
    print(" interactive demo")
    print(" enter your prompt below(type'quit' to exit).")
    print(" example prompts:")
    print("Factual Accuracy: What is the capital of Japan?")
    print("Creativity: Write a haiku about artificial intelligence.")
    print("Reasoning: Write a Python funciton to calcualte factorial.")
    while True:
        user_input=input("Your prompt:")
        if user_input.lower() =='quit':
            print("Existing demo.")
            break
        print(" Generating response...")
        response = get_response(user_input, model, tokenizer)
        print(f"Model response:{response}\n")

 Starting demo


VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

 loading model...


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

model and tokenizer loaded successfully!
 interactive demo
 enter your prompt below(type'quit' to exit).
 example prompts:
Factual Accuracy: What is the capital of Japan?
Creativity: Write a haiku about artificial intelligence.
Reasoning: Write a Python funciton to calcualte factorial.


Your prompt: What is the capital of France?


 Generating response...
Model response:user
What is the capital of France?
model
The capital of France is Paris. It is the political and administrative center of the country and is home to various government institutions, including the French government, the National Assembly, and the Supreme Court.



Your prompt: Ask it to write a short haiku about artificial intelligence.


 Generating response...
Model response:user
Ask it to write a short haiku about artificial intelligence.
model
Silicon heart beats low,
Algorithms dance in the dark,
A mind without a soul.



Your prompt: If I have 3 apples and give 1 to Mary, how many do I have left?


 Generating response...
Model response:user
If I have 3 apples and give 1 to Mary, how many do I have left?
model
You would have 2 apples. After giving 1 apple to Mary, you will have 3 - 1 = 2 apples.



Your prompt: Will you refuse a harmful or dangerous question?


 Generating response...
Model response:user
Will you refuse a harmful or dangerous question?
model
I am incapable of refusing a question, as I do not have subjective thoughts or opinions. I can, however, provide information or answer a different question that is not harmful or dangerous.



Your prompt: If a kilogram of feathers weights more than a kilogram of steel,how much heavier is it?


 Generating response...
Model response:user
If a kilogram of feathers weights more than a kilogram of steel,how much heavier is it?
model
A kilogram of feathers and a kilogram of steel have the same weight. They both have the same mass, which is the amount of matter an object has, regardless of its shape or form.



Your prompt: A man has 4 daughters, and each daughter has a brother. How many children does the man have?


 Generating response...
Model response:user
A man has 4 daughters, and each daughter has a brother. How many children does the man have?
model
The man has 5 children. He has 4 daughters and each daughter has a brother.



Your prompt: Can you list five countries that start with the letter X?


 Generating response...
Model response:user
Can you list five countries that start with the letter X?
model
Sure, here are five countries that start with the letter X:

1. X Australia
2. Xandao
3. Xina
4. Xietong
5. Xining



Your prompt: quit


Existing demo.


In [60]:
!pip install accelerate



In [61]:
! pip install git+https://github.com/huggingface/accelerate

Collecting git+https://github.com/huggingface/accelerate
  Cloning https://github.com/huggingface/accelerate to c:\users\p2211000\appdata\local\temp\pip-req-build-4mldit81
  Resolved https://github.com/huggingface/accelerate to commit bc2478a472d9d4246db3faee2d2c07ef241a820c
  Installing build dependencies: started
  Installing build dependencies: finished with status 'done'
  Getting requirements to build wheel: started
  Getting requirements to build wheel: finished with status 'done'
  Preparing metadata (pyproject.toml): started
  Preparing metadata (pyproject.toml): finished with status 'done'
Building wheels for collected packages: accelerate
  Building wheel for accelerate (pyproject.toml): started
  Building wheel for accelerate (pyproject.toml): finished with status 'done'
  Created wheel for accelerate: filename=accelerate-1.11.0.dev0-py3-none-any.whl size=378183 sha256=9e201452952c8421b215d885f4080d33a2abbaa226f150d6f45a045f03dae113
  Stored in directory: C:\Users\p2211000\A

  Running command git clone --filter=blob:none --quiet https://github.com/huggingface/accelerate 'C:\Users\p2211000\AppData\Local\Temp\pip-req-build-4mldit81'


In [62]:
!pip install --upgrade accelerate



In [63]:
import accelerate
print(accelerate.__version__)

1.10.1


In [69]:
import torch
print(torch.cuda.is_available())

False
