In [1]:
import os
import copy
from dataclasses import dataclass

import numpy as np
import pandas as pd

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import torch.nn.functional as F
from torch import Tensor

#from datasets import Dataset

from tqdm import tqdm

from transformers import (
    BitsAndBytesConfig,
    AutoTokenizer,
    AutoModel,
    AutoModelForCausalLM,
)

from peft import LoraModel, PeftModel, LoraConfig, get_peft_model, prepare_model_for_kbit_training, TaskType

import ModelsUtils as Utils
import Configurations as Configs


In [None]:
print('Torch version:', torch.__version__)
print('Torch is build with CUDA:', torch.cuda.is_available())
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f'Torch device : {device}')
print('------------------------------')

#device = 'cpu'

## Config

In [3]:
config_file = 'Configs.py'
manager = Configs.ConfigManager(config_file)
config = manager.micro
#config = manager.BGE_multi_gemma2_9b_fp16_4bit_h1536

In [4]:
lora_config = LoraConfig(
    r=config.lora_r,
    lora_alpha=config.lora_alpha,
    # only target self-attention
    target_modules=["q_proj", "k_proj", "v_proj"],
    #target_modules=["all-linear"],
    #target_modules=["self_attn"],
    #layers_to_transform=[i for i in range(config.max_layers) if i >= config.freeze_layers],
    #layers_to_transform=[0],
    #layers_pattern="layers",
    lora_dropout=config.lora_dropout,
    bias=config.lora_bias,
    #task_type=TaskType.FEATURE_EXTRACTION, #SEQ_CLS
)

In [5]:
access_token=''

___________________________________________________________________________

## Tokenize

In [6]:
tokenizer = AutoTokenizer.from_pretrained(config.transformers_basemodel_path, token=access_token)
tokenizer.add_eos_token = True      # We'll add <eos> at the end
tokenizer.padding_side = "right"

## Model

In [None]:
quantization_config = None
if config.quantize=='4bit':
    quantization_config=BitsAndBytesConfig(
        load_in_4bit=True,
        bnb_4bit_compute_dtype=torch.bfloat16,
        bnb_4bit_quant_type='nf4',
        bnb_4bit_use_double_quant=True,
        )

model_base = AutoModel.from_pretrained(config.transformers_basemodel_path, 
            torch_dtype=torch.float16,
            device_map=device, 
            quantization_config=quantization_config,
            token=access_token
            )

model_base

In [None]:
save_path = config.basemodel_path

#save base model
model_base.save_pretrained(save_path)
tokenizer.save_pretrained(save_path)

In [None]:
model_base.config.use_cache = False
model_base = prepare_model_for_kbit_training(model_base)
lora_model = get_peft_model(model_base, lora_config)
lora_model.config.use_cache = False
lora_model

In [None]:
lora_model.print_trainable_parameters()

In [None]:
lora_model

In [12]:
predictionModel_original = Utils.PreferencePredictionModel(
                gemma_model=lora_model, # remove causalLM head
                feature_dim=config.feature_dims,
                hidden_dim=config.hidden_dim,
                num_classes=config.num_classes)

In [None]:
predictionModel_original

## Save

In [14]:
Utils.custom_save_model_chkpt(predictionModel_original, config, checkpointName="Original_notrain")