In [1]:
import os
import copy
from dataclasses import dataclass

import numpy as np
import pandas as pd

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import torch.nn.functional as F
from torch import Tensor

#from datasets import Dataset

from tqdm import tqdm

from transformers import (
    BitsAndBytesConfig,
    AutoTokenizer,
    AutoModel,
    AutoModelForCausalLM,
)

from peft import LoraModel, PeftModel, LoraConfig, get_peft_model, prepare_model_for_kbit_training, TaskType

import ModelsUtils as Utils
import Configurations as Configs


In [2]:
print('Torch version:', torch.__version__)
print('Torch is build with CUDA:', torch.cuda.is_available())
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f'Torch device : {device}')
print('------------------------------')

#device = 'cpu'

Torch version: 2.5.1+cu118
Torch is build with CUDA: True
Torch device : cuda
------------------------------


## Config

In [3]:
config_file = 'Configs.py'
manager = Configs.ConfigManager(config_file)
config = manager.micro

In [4]:
lora_config = LoraConfig(
    r=config.lora_r,
    lora_alpha=config.lora_alpha,
    # only target self-attention
    target_modules=["q_proj", "k_proj", "v_proj"],
    #target_modules=["all-linear"],
    #target_modules=["self_attn"],
    #layers_to_transform=[i for i in range(config.max_layers) if i >= config.freeze_layers],
    #layers_to_transform=[0],
    #layers_pattern="layers",
    lora_dropout=config.lora_dropout,
    bias=config.lora_bias,
    #task_type=TaskType.FEATURE_EXTRACTION, #SEQ_CLS
)

In [5]:
access_token=''

___________________________________________________________________________

## Tokenize

In [6]:
tokenizer = AutoTokenizer.from_pretrained(config.transformers_basemodel_path, token=access_token)
tokenizer.add_eos_token = True      # We'll add <eos> at the end
tokenizer.padding_side = "right"

## Model

In [7]:
quantization_config = None
if config.quantize=='4bit':
    quantization_config=BitsAndBytesConfig(
        load_in_4bit=True,
        bnb_4bit_compute_dtype=torch.bfloat16,
        bnb_4bit_quant_type='nf4',
        bnb_4bit_use_double_quant=True,
        )

model_base = AutoModel.from_pretrained(config.transformers_basemodel_path, 
            torch_dtype=torch.float16,
            device_map=device, 
            quantization_config=quantization_config,
            token=access_token
            )

model_base

Gemma2Model(
  (embed_tokens): Embedding(256000, 2304, padding_idx=0)
  (layers): ModuleList(
    (0-25): 26 x Gemma2DecoderLayer(
      (self_attn): Gemma2Attention(
        (q_proj): Linear4bit(in_features=2304, out_features=2048, bias=False)
        (k_proj): Linear4bit(in_features=2304, out_features=1024, bias=False)
        (v_proj): Linear4bit(in_features=2304, out_features=1024, bias=False)
        (o_proj): Linear4bit(in_features=2048, out_features=2304, bias=False)
        (rotary_emb): Gemma2RotaryEmbedding()
      )
      (mlp): Gemma2MLP(
        (gate_proj): Linear4bit(in_features=2304, out_features=9216, bias=False)
        (up_proj): Linear4bit(in_features=2304, out_features=9216, bias=False)
        (down_proj): Linear4bit(in_features=9216, out_features=2304, bias=False)
        (act_fn): PytorchGELUTanh()
      )
      (input_layernorm): Gemma2RMSNorm((2304,), eps=1e-06)
      (post_attention_layernorm): Gemma2RMSNorm((2304,), eps=1e-06)
      (pre_feedforward_layernor

In [8]:
save_path = config.basemodel_path

#save base model
model_base.save_pretrained(save_path)
tokenizer.save_pretrained(save_path)

('../BaseModel/gemma2_2b_unsloth_fp16_4bit\\tokenizer_config.json',
 '../BaseModel/gemma2_2b_unsloth_fp16_4bit\\special_tokens_map.json',
 '../BaseModel/gemma2_2b_unsloth_fp16_4bit\\tokenizer.json')

In [9]:
model_base.config.use_cache = False
model_base = prepare_model_for_kbit_training(model_base)
lora_model = get_peft_model(model_base, lora_config)
lora_model.config.use_cache = False
lora_model

PeftModel(
  (base_model): LoraModel(
    (model): Gemma2Model(
      (embed_tokens): Embedding(256000, 2304, padding_idx=0)
      (layers): ModuleList(
        (0-25): 26 x Gemma2DecoderLayer(
          (self_attn): Gemma2Attention(
            (q_proj): lora.Linear4bit(
              (base_layer): Linear4bit(in_features=2304, out_features=2048, bias=False)
              (lora_dropout): ModuleDict(
                (default): Dropout(p=0.05, inplace=False)
              )
              (lora_A): ModuleDict(
                (default): Linear(in_features=2304, out_features=16, bias=False)
              )
              (lora_B): ModuleDict(
                (default): Linear(in_features=16, out_features=2048, bias=False)
              )
              (lora_embedding_A): ParameterDict()
              (lora_embedding_B): ParameterDict()
              (lora_magnitude_vector): ModuleDict()
            )
            (k_proj): lora.Linear4bit(
              (base_layer): Linear4bit(in_features=2

In [10]:
lora_model.print_trainable_parameters()

trainable params: 4,579,328 || all params: 2,618,921,216 || trainable%: 0.1749


In [11]:
lora_model

PeftModel(
  (base_model): LoraModel(
    (model): Gemma2Model(
      (embed_tokens): Embedding(256000, 2304, padding_idx=0)
      (layers): ModuleList(
        (0-25): 26 x Gemma2DecoderLayer(
          (self_attn): Gemma2Attention(
            (q_proj): lora.Linear4bit(
              (base_layer): Linear4bit(in_features=2304, out_features=2048, bias=False)
              (lora_dropout): ModuleDict(
                (default): Dropout(p=0.05, inplace=False)
              )
              (lora_A): ModuleDict(
                (default): Linear(in_features=2304, out_features=16, bias=False)
              )
              (lora_B): ModuleDict(
                (default): Linear(in_features=16, out_features=2048, bias=False)
              )
              (lora_embedding_A): ParameterDict()
              (lora_embedding_B): ParameterDict()
              (lora_magnitude_vector): ModuleDict()
            )
            (k_proj): lora.Linear4bit(
              (base_layer): Linear4bit(in_features=2

In [12]:
predictionModel_original = Utils.PreferencePredictionModel(
                gemma_model=lora_model,
                feature_dim=config.feature_dims,
                hidden_dim=config.hidden_dim,
                num_classes=config.num_classes)

In [13]:
predictionModel_original

PreferencePredictionModel(
  (gemma_model): PeftModel(
    (base_model): LoraModel(
      (model): Gemma2Model(
        (embed_tokens): Embedding(256000, 2304, padding_idx=0)
        (layers): ModuleList(
          (0-25): 26 x Gemma2DecoderLayer(
            (self_attn): Gemma2Attention(
              (q_proj): lora.Linear4bit(
                (base_layer): Linear4bit(in_features=2304, out_features=2048, bias=False)
                (lora_dropout): ModuleDict(
                  (default): Dropout(p=0.05, inplace=False)
                )
                (lora_A): ModuleDict(
                  (default): Linear(in_features=2304, out_features=16, bias=False)
                )
                (lora_B): ModuleDict(
                  (default): Linear(in_features=16, out_features=2048, bias=False)
                )
                (lora_embedding_A): ParameterDict()
                (lora_embedding_B): ParameterDict()
                (lora_magnitude_vector): ModuleDict()
              )
     

## Save

In [14]:
Utils.custom_save_model_chkpt(predictionModel_original, config, checkpointName="Original_notrain")