In [1]:
!nvidia-smi

Tue Nov 18 21:02:21 2025       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 581.57                 Driver Version: 581.57         CUDA Version: 13.0     |
+-----------------------------------------+------------------------+----------------------+
| GPU  Name                  Driver-Model | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  NVIDIA GeForce GTX 1650      WDDM  |   00000000:01:00.0 Off |                  N/A |
| N/A   54C    P8              3W /   50W |       0MiB /   4096MiB |      0%      Default |
|                                         |                        |                  N/A |
+-----------------------------------------+------------------------+----------------------+

+----------------------------------------------

In [None]:
#%pip install -q --upgrade torch torchvision torchaudio
%pip install -q --upgrade "transformers[torch]"
%pip install -q --upgrade accelerate datasets
%pip install -q bitsandbytes trl[peft] loralib huggingface_hub
%pip install -q gradio

Note: you may need to restart the kernel to use updated packages.
^C
Note: you may need to restart the kernel to use updated packages.


In [2]:
import os
import sys
import torch
import random
import numpy as np
import pandas as pd

from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
from peft import prepare_model_for_kbit_training, PeftModel

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
def set_seed(seed: int = 42):
    """Sets the seed for reproducibility."""
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(seed)
        torch.backends.cudnn.deterministic = True
        torch.backends.cudnn.benchmark = False

set_seed(42)

In [4]:
from huggingface_hub import login
import os
from dotenv import load_dotenv

load_dotenv()
hf_token = os.getenv('HF_Token')
login(token=hf_token)

Note: Environment variable`HF_TOKEN` is set and is the current active token independently from the token you've just configured.


In [5]:
def add_generation_prompt(tokenizer):
    generation_chat_template = """{{ bos_token }}
{%- if messages[0]['role'] == 'system' -%}
    {%- if messages[0]['content'] is string -%}
        {%- set first_user_prefix = messages[0]['content'] + '\n\n' -%}
    {%- else -%}
        {%- set first_user_prefix = messages[0]['content'][0]['text'] + '\n\n' -%}
    {%- endif -%}
    {%- set loop_messages = messages[1:] -%}
{%- else -%}
    {%- set first_user_prefix = "" -%}
    {%- set loop_messages = messages -%}
{%- endif -%}
{%- for message in loop_messages -%}
    {%- if (message['role'] == 'user') != (loop.index0 % 2 == 0) -%}
        {{ raise_exception("Conversation roles must alternate user/assistant/user/assistant/...") }}
    {%- endif -%}
    {%- if (message['role'] == 'assistant') -%}
        {%- set role = "model" -%}
    {%- else -%}
        {%- set role = message['role'] -%}
    {%- endif -%}
    {{ '<start_of_turn>' + role + '\n' + (first_user_prefix if loop.first else "") }}
    {%- if message['role'] == 'assistant' -%}
        {% generation %}
        {%- if message['content'] is string -%}
            {{ message['content'] | trim }}
        {%- elif message['content'] is iterable -%}
            {%- for item in message['content'] -%}
                {%- if item['type'] == 'image' -%}
                    {{ '<start_of_image>' }}
                {%- elif item['type'] == 'text' -%}
                    {{ item['text'] | trim }}
                {%- endif -%}
            {%- endfor -%}
        {%- else -%}
            {{ raise_exception("Invalid content type") }}
        {%- endif -%}
        {{ '<end_of_turn>\n' }}
        {% endgeneration %}
    {%- else -%}
        {%- if message['content'] is string -%}
            {{ message['content'] | trim }}
        {%- elif message['content'] is iterable -%}
            {%- for item in message['content'] -%}
                {%- if item['type'] == 'image' -%}
                    {{ '<start_of_image>' }}
                {%- elif item['type'] == 'text' -%}
                    {{ item['text'] | trim }}
                {%- endif -%}
            {%- endfor -%}
        {%- else -%}
            {{ raise_exception("Invalid content type") }}
        {%- endif -%}
        {{ '<end_of_turn>\n' }}
    {%- endif -%}
{%- endfor -%}
{%- if add_generation_prompt -%}
    {{'<start_of_turn>model
'}}
{%- endif -%}"""
    tokenizer.chat_template = generation_chat_template
    return tokenizer


# Define a helper function to load and set up the model and tokenizer
def get_model_tokenizer(model_name, return_model=True, return_tokenizer=True):

    model = None
    tokenizer = None
    if return_tokenizer:
        tokenizer = AutoTokenizer.from_pretrained(model_name)
        tokenizer = add_generation_prompt(tokenizer)
    if return_model:
        # Set up the quantization config
        quant_config = BitsAndBytesConfig(
          load_in_4bit=True,
          bnb_4bit_use_double_quant=True,
          bnb_4bit_quant_type="nf4",
          bnb_4bit_compute_dtype="bfloat16"
        )
        # Load the model from Huggingface and apply quantization
        model = AutoModelForCausalLM.from_pretrained(
          model_name,
          quantization_config=quant_config,
          trust_remote_code=True,
          low_cpu_mem_usage=True,
        )
        model = prepare_model_for_kbit_training(model)
    if return_model and return_tokenizer:
        tokenizer.pad_token_id = 0
        tokenizer.eos_token_id = 1
        model.eos_token_id = tokenizer.eos_token_id
        model.config.eos_token_id = tokenizer.eos_token_id

    return model, tokenizer

def apply_adapter(model, adapter_name):
    result_model = PeftModel.from_pretrained(
        model,
        adapter_name,
        device_map="auto"
    )
    return result_model

In [6]:
SAVE_FULL_MODEL = False # Set to True if you want to save the full model

SAVE_TO_DRIVE = False # Set to True if you want to save the model to your Google Drive
# Modify CKPT_PATH to the path you want to save
CKPT_PATH = "\\HW7"

# CAUTION: If both SAVE_FULL_MODEL and SAVE_TO_DRIVE is set True, ensure your Google Drive has sufficient space.
# Otherwise it is very possible that you exceed your drive space

USE_WANDB = False # Set to True if you want to use wandb

## Phase 1: SFT - Instruction Tuning

In [7]:
from tqdm import tqdm
import gradio as gr
from peft import LoraConfig, get_peft_model
from datasets import load_dataset, DatasetDict, concatenate_datasets
from trl import SFTTrainer, SFTConfig

In [8]:
base_model_name = "jaxon3062/gemma-3-4b-pt-chat"
model, tokenizer = get_model_tokenizer(base_model_name)

Loading checkpoint shards: 100%|██████████| 4/4 [03:57<00:00, 59.28s/it]


In [9]:
def chat_interface(message, history):
    # Format the chat history for the model
    prompt = ""
    SYSTEM_PROMPT = "You are a helpful assistant."
    prompt += SYSTEM_PROMPT
    for human, assistant in history:
        prompt += human
        prompt += assistant
    prompt += message

    # Get the model response
    model.eval()
    with torch.no_grad():
        inputs = tokenizer(prompt, return_tensors="pt", add_special_tokens=True).to(model.device)
        out = model.generate(
            **inputs,
            max_new_tokens=64,
            do_sample=False,
            eos_token_id=tokenizer.convert_tokens_to_ids(["<eos>", "<end_of_turn>"])
        )
        output = tokenizer.decode(out[0], skip_special_tokens=False).strip()
        response = tokenizer.decode(out[0][len(inputs["input_ids"]):], skip_special_tokens=True).strip()

    return response

# Create the Gradio interface
iface = gr.ChatInterface(
    fn=chat_interface,
    title="Gemma 3 4b Chat",
    description="Chat with the Gemma model.",
    examples=[
        ["Where is the capital of France?"],
        ["Who is Julius Caesar?"],
    ],
)

iface.launch(debug=False)

  self.chatbot = Chatbot(


* Running on local URL:  http://127.0.0.1:7860
* To create a public link, set `share=True` in `launch()`.




The following generation flags are not valid and may be ignored: ['top_p', 'top_k']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
