In [3]:
!pip install peft

Collecting peft
  Downloading peft-0.12.0-py3-none-any.whl.metadata (13 kB)
Downloading peft-0.12.0-py3-none-any.whl (296 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m296.4/296.4 kB[0m [31m9.6 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: peft
Successfully installed peft-0.12.0


In [4]:
import torch
import transformers
from transformers import AutoTokenizer, AutoModelForCausalLM
from peft import LoraConfig, get_peft_model
from datasets import load_dataset

In [5]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'

In [32]:
model = AutoModelForCausalLM.from_pretrained(
    "ytu-ce-cosmos/turkish-gpt2-large-750m-instruct-v0.1",
    device_map='auto',
)
tokenizer = AutoTokenizer.from_pretrained("ytu-ce-cosmos/turkish-gpt2-large-750m-instruct-v0.1")
tokenizer.pad_token = tokenizer.eos_token

In [7]:
for param in model.parameters():
    param.requires_grad = False

In [8]:
config = LoraConfig(
    r=16,
    lora_alpha=32,
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM"
)

In [9]:
model = get_peft_model(model, config)



In [10]:
# Source directory
import shutil

source_dir = ""

# Destination directory
destination_dir = ""

shutil.copytree(source_dir, destination_dir)

'/kaggle/working/train-data-2'

In [11]:
# LOAD AND STURCTURE DATA
data = load_dataset('csv', data_files="")
data = data.remove_columns('Unnamed: 0')
data = data['train'].train_test_split(test_size=0.12)

Generating train split: 0 examples [00:00, ? examples/s]

In [12]:
#data = data.map(lambda samples: tokenizer(samples['instruct_ytu']), batched=True)

def tokenize_function(examples):
    return tokenizer(examples["instruct"], max_length=64, truncation=True, padding="max_length")

tokenized_datasets = data.map(tokenize_function, batched=True, num_proc=2, remove_columns=['instruct_ytu'])

def copy_input_ids(example):
    example["labels"] = example["input_ids"].copy()
    return example

tokenized_datasets = tokenized_datasets.map(copy_input_ids)

Map (num_proc=2):   0%|          | 0/3256 [00:00<?, ? examples/s]

Map (num_proc=2):   0%|          | 0/445 [00:00<?, ? examples/s]

Map:   0%|          | 0/3256 [00:00<?, ? examples/s]

Map:   0%|          | 0/445 [00:00<?, ? examples/s]

In [13]:
def print_trainable_parameters(model):
    """
    Prints the number of trainable parameters in the model.
    """
    trainable_params = 0
    all_param = 0
    for _, param in model.named_parameters():
        all_param += param.numel()
        if param.requires_grad:
            trainable_params += param.numel()
    print(
        f"trainable params: {trainable_params} || all params: {all_param} || trainable%: {100 * trainable_params / all_param}"
    )

In [15]:
print_trainable_parameters(model)

trainable params: 2949120 || all params: 776980480 || trainable%: 0.37956165900075123


In [16]:
# TRAINING
trainer = transformers.Trainer(
    
    model=model,
    train_dataset=tokenized_datasets['train'],
    eval_dataset=tokenized_datasets["test"],
    
    args=transformers.TrainingArguments(
        num_train_epochs=36,
        per_device_train_batch_size=32,
        gradient_accumulation_steps=32,
        #warmup_steps=100,
        
        evaluation_strategy = "steps",
        logging_strategy="steps",
        save_strategy="steps",
        
        eval_steps=2,
        logging_steps=2,
        save_steps=2,
        
        weight_decay=0.01,
        save_total_limit=10,
        
        learning_rate=1e-3,
        output_dir='outputs',
        auto_find_batch_size=True
    ),
    data_collator=transformers.DataCollatorForLanguageModeling(tokenizer, mlm=False)
)

model.config.use_cache = False

2024-08-07 07:34:16.270702: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-08-07 07:34:16.270822: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-08-07 07:34:16.398579: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


In [18]:
import os
os.environ["WANDB_MODE"] = "disabled"

In [19]:
trainer.train()



Step,Training Loss,Validation Loss
2,5.128,4.224212
4,4.1295,3.376271
6,3.3196,2.847066
8,2.8761,2.636115
10,2.7138,2.558121
12,2.5958,2.506381
14,2.539,2.482688
16,2.5012,2.46097
18,2.4932,2.44574
20,2.4591,2.436125


TrainOutput(global_step=108, training_loss=2.477675053808424, metrics={'train_runtime': 4389.7274, 'train_samples_per_second': 26.702, 'train_steps_per_second': 0.025, 'total_flos': 3.013657596002304e+16, 'train_loss': 2.477675053808424, 'epoch': 33.88235294117647})

In [20]:
torch.save(model.state_dict(), 'lora_33.pt')
model.load_state_dict(torch.load('model.pt'))