In [2]:
!pip install -q transformers datasets accelerate peft bitsandbytes sentencepiece scikit-learn

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m485.4/485.4 kB[0m [31m9.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m76.1/76.1 MB[0m [31m28.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m116.3/116.3 kB[0m [31m5.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m143.5/143.5 kB[0m [31m9.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m363.4/363.4 MB[0m [31m3.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m13.8/13.8 MB[0m [31m107.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m24.6/24.6 MB[0m [31m86.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m883.7/883.7 kB[0m [31m57.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

In [3]:
import os
import torch
import numpy as np
from datasets import load_dataset
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    TrainingArguments,
    Trainer,
    DataCollatorForLanguageModeling
)
from peft import (
    LoraConfig,
    get_peft_model,
    prepare_model_for_kbit_training,
    TaskType
)
from sklearn.metrics import precision_recall_fscore_support, classification_report, f1_score

In [4]:
os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True,max_split_size_mb:128"
torch.backends.cudnn.benchmark = False
torch.backends.cudnn.deterministic = True

In [5]:
def optimize_memory():
    """
    Optimize GPU memory and provide memory utilization tracking
    """
    # Clear CUDA cache
    torch.cuda.empty_cache()
    torch.cuda.reset_peak_memory_stats()

    # Set lower precision for reduced memory footprint
    torch.set_float32_matmul_precision('medium')

    
    def print_gpu_utilization():
        try:
            import pynvml
            pynvml.nvmlInit()

            try:
                handle = pynvml.nvmlDeviceGetHandleByIndex(0)
                info = pynvml.nvmlDeviceGetMemoryInfo(handle)

                print(f"GPU Total Memory: {info.total / 1024**2:.2f} MB")
                print(f"GPU Used Memory: {info.used / 1024**2:.2f} MB")
                print(f"GPU Free Memory: {info.free / 1024**2:.2f} MB")

            except Exception as device_error:
                print(f"Error getting GPU memory info: {device_error}")

            finally:
                pynvml.nvmlShutdown()

        except ImportError:
            print("NVML not available. Using PyTorch memory tracking.")
            print(f"Allocated GPU Memory: {torch.cuda.memory_allocated() / 1024**2:.2f} MB")
            print(f"Cached GPU Memory: {torch.cuda.memory_reserved() / 1024**2:.2f} MB")

    # Print GPU utilization
    print_gpu_utilization()


In [6]:
def load_model_with_minimal_memory(model_id):
    """
    Load model with memory-efficient configuration
    """
    bnb_config = BitsAndBytesConfig(
        load_in_4bit=True,
        bnb_4bit_quant_type="nf4",
        bnb_4bit_compute_dtype=torch.bfloat16,
        bnb_4bit_use_double_quant=True,
    )

    try:
        model = AutoModelForCausalLM.from_pretrained(
            model_id,
            quantization_config=bnb_config,
            device_map="auto",  # Intelligent device mapping
            torch_dtype=torch.bfloat16,
            low_cpu_mem_usage=True,
            use_cache=False
        )
    except Exception as e:
        print(f"Error loading model with Flash Attention: {e}")
        model = AutoModelForCausalLM.from_pretrained(
            model_id,
            quantization_config=bnb_config,
            device_map="auto",
            torch_dtype=torch.bfloat16,
            low_cpu_mem_usage=True,
            use_cache=False
        )

    return model

In [7]:
def create_efficient_tokenizer(model_id):
    """
    Create memory-efficient tokenizer
    """
    tokenizer = AutoTokenizer.from_pretrained(
        model_id,
        use_fast=True,
        add_eos_token=True,
        add_bos_token=True
    )

    tokenizer.pad_token = tokenizer.eos_token
    tokenizer.padding_side = "right"

    return tokenizer


In [8]:
def prepare_dataset(dataset, tokenizer, max_length=512):
    """
    Prepare and tokenize dataset efficiently
    """
    def tokenize_function(examples):
        prompts = [
            f"Sentiment Analysis Task: Classify the sentiment of the following movie review.\n\nReview: {text}\n\nSentiment (0=Negative, 1=Positive):"
            for text in examples['text']
        ]

        tokenized_inputs = tokenizer(
            prompts,
            truncation=True,
            padding='max_length',
            max_length=max_length,
            return_tensors='pt'
        )

        tokenized_inputs['labels'] = torch.tensor(examples['label'])

        return tokenized_inputs

    dataset = dataset.shuffle(seed=42)
    tokenized_dataset = dataset.map(
        tokenize_function,
        batched=True,
        remove_columns=dataset.column_names,
        num_proc=4
    )
    return tokenized_dataset

In [15]:

def create_memory_optimized_training_args():
    """
    Create training arguments with memory optimization
    """
    return TrainingArguments(
        output_dir="./deepseek_imdb_finetune",

        per_device_train_batch_size=1,
        gradient_accumulation_steps=16,
        gradient_checkpointing=True,

        learning_rate=1e-4,
        weight_decay=0.01,

        max_grad_norm=0.3,
        max_steps=200,
        num_train_epochs=1,

        fp16=True,
        optim="adamw_torch_fused",

        logging_dir="./logs",
        logging_steps=10,

        evaluation_strategy="epoch",
        eval_steps=50,
        save_strategy="epoch",

        save_total_limit=3,
        load_best_model_at_end=True,
        metric_for_best_model="eval_loss",

        dataloader_num_workers=4,
        dataloader_prefetch_factor=2,
    )

In [16]:
def train_deepseek_on_imdb():
    optimize_memory()

    MODEL_ID = "deepseek-ai/deepseek-llm-7b-base"
    model = load_model_with_minimal_memory(MODEL_ID)
    tokenizer = create_efficient_tokenizer(MODEL_ID)

    peft_config = LoraConfig(
        task_type=TaskType.CAUSAL_LM,
        r=16,  # LoRA rank
        lora_alpha=32,
        lora_dropout=0.1,
        target_modules=[
            "q_proj", "k_proj", "v_proj",
            "o_proj", "gate_proj",
            "down_proj", "up_proj"
        ]
    )

    model = prepare_model_for_kbit_training(model)
    model = get_peft_model(model, peft_config)

    dataset = load_dataset("imdb")

    train_dataset = prepare_dataset(dataset['train'], tokenizer)
    eval_dataset = prepare_dataset(dataset['test'], tokenizer)

    
    data_collator = DataCollatorForLanguageModeling(
        tokenizer=tokenizer,
        mlm=False
    )

    training_args = create_memory_optimized_training_args()

    trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=train_dataset,
        eval_dataset=eval_dataset,
        data_collator=data_collator,
        compute_metrics=compute_metrics
    )

    trainer.train()

    trainer.save_model("./final_deepseek_imdb_model")

    return trainer

In [None]:
trainer = train_deepseek_on_imdb()

In [None]:
print("Training Loss History:", trainer.state.log_history)

In [None]:
eval_results = trainer.evaluate()

In [None]:
import matplotlib.pyplot as plt

training_losses = [entry['loss'] for entry in trainer.state.log_history if 'loss' in entry]

plt.figure(figsize=(10, 5))
plt.plot(training_losses)
plt.title('Training Loss Over Steps')
plt.xlabel('Training Steps')
plt.ylabel('Loss')
plt.show()

In [None]:
MODEL_ID = "deepseek-ai/deepseek-llm-7b-base"
tokenizer = create_efficient_tokenizer(MODEL_ID)
dataset = load_dataset("imdb", split="train+test")
tokenized_dataset = create_efficient_tokenizer(
        dataset,
        tokenizer
    )

train_dataset = tokenized_dataset.select(range(int(len(tokenized_dataset)*0.8)))
eval_dataset = tokenized_dataset.select(range(int(len(tokenized_dataset)*0.8), len(tokenized_dataset)))

In [None]:
test_dataset = eval_dataset  
predictions = trainer.predict(test_dataset)

logits = predictions.predictions
predicted_labels = np.argmax(logits, axis=-1)
true_labels = predictions.label_ids

from sklearn.metrics import accuracy_score, f1_score, classification_report

accuracy = accuracy_score(true_labels, predicted_labels)
f1 = f1_score(true_labels, predicted_labels, average='binary')

print(f"Accuracy: {accuracy:.4f}")
print(f"F1 Score: {f1:.4f}")