In [8]:
%%capture
!pip install unsloth
!pip install --force-reinstall --no-cache-dir --no-deps git+https://github.com/unslothai/unsloth.git

import pandas as pd
import numpy as np
import shutil
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from transformers import AutoTokenizer, TFXLMRobertaForSequenceClassification, AutoModelForCausalLM
from imblearn.over_sampling import SMOTE
from unsloth import FastLanguageModel, is_bfloat16_supported
from trl import SFTTrainer
from transformers import TrainingArguments


In [9]:
file_path = "/kaggle/input/deepseekversion/dataset (2).csv"
data = pd.read_csv(file_path)
label_encoder = LabelEncoder()
data['encoded_emotion'] = label_encoder.fit_transform(data['Emotion'])
data['Review'] = data['Review'].astype(str)
train_data, test_data = train_test_split(
    data, test_size=0.2, random_state=42, stratify=data['encoded_emotion']
)


In [10]:
from unsloth import FastLanguageModel

from unsloth import FastLanguageModel

max_seq_length = 512 
dtype = None 
load_in_4bit = True


model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = "unsloth/DeepSeek-R1-Distill-Qwen-1.5B-unsloth-bnb-4bit",
    max_seq_length = max_seq_length,
    dtype = dtype,
    load_in_4bit = load_in_4bit, 
)



==((====))==  Unsloth 2025.2.12: Fast Qwen2 patching. Transformers: 4.48.3.
   \\   /|    GPU: Tesla P100-PCIE-16GB. Max memory: 15.888 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.6.0+cu124. CUDA: 6.0. CUDA Toolkit: 12.4. Triton: 3.2.0
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.29.post3. FA2 = False]
 "-____-"     Free Apache license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


In [11]:
# Apply LoRA to specific layers
model = FastLanguageModel.get_peft_model(
    model,
    r=16,  
    target_modules=[
        "q_proj", "k_proj", "v_proj", "o_proj",
        "gate_proj", "up_proj", "down_proj",
    ],
    lora_alpha=16,
    lora_dropout=0,
    bias="none",
    use_gradient_checkpointing="unsloth",
    random_state=3407,
    use_rslora=False,
    loftq_config=None,
)


In [12]:
from datasets import Dataset
dataset_hf = Dataset.from_pandas(data[['Review', 'encoded_emotion']].rename(columns={'Review': 'text', 'encoded_emotion': 'labels'}))

# Split dataset
dataset_hf = dataset_hf.train_test_split(test_size=0.2, seed=3407)
train_dataset = dataset_hf["train"].train_test_split(test_size=0.1, seed=3407)

val_dataset = train_dataset["test"]
train_dataset = train_dataset["train"]
test_dataset = dataset_hf["test"]

# Print dataset sizes
print(f"Train set size: {len(train_dataset)}")
print(f"Validation set size: {len(val_dataset)}")
print(f"Test set size: {len(test_dataset)}")


Train set size: 56253
Validation set size: 6251
Test set size: 15626


In [13]:
# Tokenization function
def tokenize_function(examples):
    return tokenizer(examples["text"], padding="max_length", truncation=True, max_length=max_seq_length)

# Apply tokenization
train_dataset = train_dataset.map(tokenize_function, batched=True, batch_size=8)
val_dataset = val_dataset.map(tokenize_function, batched=True, batch_size=8)
test_dataset = test_dataset.map(tokenize_function, batched=True, batch_size=8)


Map:   0%|          | 0/56253 [00:00<?, ? examples/s]

Map:   0%|          | 0/6251 [00:00<?, ? examples/s]

Map:   0%|          | 0/15626 [00:00<?, ? examples/s]

In [14]:
trainer = SFTTrainer(
    model=model,
    tokenizer=tokenizer,
    train_dataset=train_dataset,
    eval_dataset=val_dataset,
    dataset_text_field="text",
    max_seq_length=max_seq_length,
    dataset_num_proc=2,
    args=TrainingArguments(
        per_device_train_batch_size=4,
        gradient_accumulation_steps=2,
        warmup_steps=5,
        num_train_epochs=2,
        learning_rate=2e-4,
        fp16=not is_bfloat16_supported(),
        bf16=is_bfloat16_supported(),
        logging_steps=20,
        logging_dir="logs",
        optim="adamw_8bit",
        weight_decay=0.01,
        lr_scheduler_type="linear",
        seed=3407,
        output_dir="outputs",
        report_to="none",
    ),
)

trainer_stats = trainer.train()


Applying chat template to train dataset (num_proc=2):   0%|          | 0/56253 [00:00<?, ? examples/s]

Tokenizing train dataset (num_proc=2):   0%|          | 0/56253 [00:00<?, ? examples/s]

Tokenizing train dataset (num_proc=2):   0%|          | 0/56253 [00:00<?, ? examples/s]

Applying chat template to eval dataset (num_proc=2):   0%|          | 0/6251 [00:00<?, ? examples/s]

Tokenizing eval dataset (num_proc=2):   0%|          | 0/6251 [00:00<?, ? examples/s]

Tokenizing eval dataset (num_proc=2):   0%|          | 0/6251 [00:00<?, ? examples/s]

Step,Training Loss
20,3.4496
40,3.4118
60,2.8307
80,2.4045
100,2.0392
120,2.1847
140,2.2438
160,2.4126
180,2.3771
200,2.1399


In [15]:
for log in trainer.state.log_history:
    print(log)

# Extract step numbers and loss values
steps = [log["step"] for log in trainer.state.log_history if "loss" in log]
losses = [log["loss"] for log in trainer.state.log_history if "loss" in log]




{'loss': 3.4496, 'grad_norm': 1.0441828966140747, 'learning_rate': 0.000199786613557152, 'epoch': 0.002844141069397042, 'step': 20}
{'loss': 3.4118, 'grad_norm': 0.8361836075782776, 'learning_rate': 0.00019950209830002136, 'epoch': 0.005688282138794084, 'step': 40}
{'loss': 2.8307, 'grad_norm': 1.0122020244598389, 'learning_rate': 0.0001992175830428907, 'epoch': 0.008532423208191127, 'step': 60}
{'loss': 2.4045, 'grad_norm': 1.0860599279403687, 'learning_rate': 0.00019893306778576, 'epoch': 0.011376564277588168, 'step': 80}
{'loss': 2.0392, 'grad_norm': 1.6354293823242188, 'learning_rate': 0.00019864855252862935, 'epoch': 0.01422070534698521, 'step': 100}
{'loss': 2.1847, 'grad_norm': 0.9926186203956604, 'learning_rate': 0.0001983640372714987, 'epoch': 0.017064846416382253, 'step': 120}
{'loss': 2.2438, 'grad_norm': 1.6499133110046387, 'learning_rate': 0.00019807952201436802, 'epoch': 0.019908987485779295, 'step': 140}
{'loss': 2.4126, 'grad_norm': 2.5881173610687256, 'learning_rate': 

NameError: name 'plt' is not defined

In [1]:
import matplotlib.pyplot as plt

plt.plot(steps, losses, label="Training Loss", marker="o")
plt.xlabel("Training Steps")
plt.ylabel("Loss")
plt.title("Training Loss Curve")
plt.legend()
plt.grid()
plt.show()

NameError: name 'steps' is not defined

In [None]:
test_results = trainer.evaluate(test_dataset)
print("Test Results:", test_results)


In [None]:
metric = load_metric("accuracy")

def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)
    return metric.compute(predictions=predictions, references=labels)

predictions = trainer.predict(test_dataset)
accuracy_score = compute_metrics(predictions)

print("Final Test Accuracy:", accuracy_score["accuracy"])


In [None]:
model_output_dir = "outputs"
trainer.model.save_pretrained(model_output_dir)
tokenizer.save_pretrained(model_output_dir)

shutil.make_archive("deepseekr1_model_checkpoint", 'zip', model_output_dir)
print("Model and tokenizer saved successfully!")


In [None]:
model_path = "path_to_extracted_folder"

model = AutoModelForCausalLM.from_pretrained(model_path)
tokenizer = AutoTokenizer.from_pretrained(model_path)
