In [1]:
pip install bitsandbytes numba scikit-learn

Collecting bitsandbytes
  Downloading bitsandbytes-0.45.4-py3-none-manylinux_2_24_x86_64.whl.metadata (5.0 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch<3,>=2.0->bitsandbytes)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch<3,>=2.0->bitsandbytes)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch<3,>=2.0->bitsandbytes)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch<3,>=2.0->bitsandbytes)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch<3,>=2.0->bitsandbytes)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-

In [3]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
from transformers import AutoModel, AutoTokenizer, AutoModelForSequenceClassification, \
                         TrainingArguments, Trainer, DataCollatorWithPadding, BitsAndBytesConfig
from peft import LoraConfig, get_peft_model
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score, f1_score, make_scorer
import os
import torch
import pdb

# Tokenizing

## Label tokenizing

labelEncoder = LabelEncoder()
labelEncoder.fit(train_set['complexity'])

## Feature tokenizing

In [5]:
def tokenize_data(samples, tokenizer):
    tokenized = tokenizer(samples['code'], truncation=True, max_length=512)
    tokenized['labels'] = labelEncoder.transform(samples['complexity'])
    return tokenized


def set_tokenizer(checkpoint):
    try:
        tokenizer = AutoTokenizer.from_pretrained(checkpoint, pad_token = "<pad>")
    except Exception as e:
        print(f"Failed to load {checkpoint}: {e}")
        checkpoint = "-".join(checkpoint.split("-")[:2])
        tokenizer = AutoTokenizer.from_pretrained(checkpoint)
        print(f"Falling back to {checkpoint}")

    X_train = train_set.map(lambda x: tokenize_data(x, tokenizer), batched=True)
    X_eval = test_set.map(lambda x: tokenize_data(x, tokenizer), batched=True)

    # Collator for batch padding
    data_collator = DataCollatorWithPadding(tokenizer=tokenizer)
    return tokenizer, data_collator, X_train, X_eval

# Model

## Device

In [6]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

## Checkpoint

In [7]:
checkpoint = "deepseek-ai/DeepSeek-Coder-V2-Lite-Base"

## Quantizing

In [8]:
# Configure 4-bit quantization
quant_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type = 'nf4',
    bnb_4bit_compute_dtype=torch.bfloat16,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_storage=torch.bfloat16

)

## Model loading

In [9]:
def set_model(checkpoint):
    model = AutoModel.from_pretrained(checkpoint, torch_dtype='bfloat16', num_labels=7,
                                      trust_remote_code=True, device_map='auto', quantization_config=quant_config)
    # Configuring padding token in case is absent
    model.config.pad_token_id = tokenizer.pad_token_id
    # As well, as resizing the embeddings to accomodate the new *pad* token
    model.resize_token_embeddings(len(tokenizer))


    return model

## Classifier head

In [10]:
import pdb
from transformers import AutoModelForCausalLM, AutoConfig, PreTrainedModel

class DeepseekV2ForSequenceClassification(PreTrainedModel):
    config_class = AutoConfig

    def __init__(self, checkpoint, config):
      super().__init__(config)
      self.num_labels = config.num_labels
      self.base_model = set_model(checkpoint)

      self.dense = nn.Linear(config.hidden_size, config.num_labels)
      self.post_init()

    def forward(self, input_ids, attention_mask, labels=None):
      #pdb.set_trace()
      outputs = self.base_model(input_ids, attention_mask)
      hidden_states, _ = outputs.last_hidden_state
      pooled_outputs = hidden_states[:, 0, :]
      logits = self.dense(pooled_outputs)
      return logits



In [None]:
config = AutoConfig.from_pretrained(checkpoint, num_labels=7, trust_remote_code=True,
                                    device_map='auto', quantization_config=quant_config)
tokenizer = AutoTokenizer.from_pretrained(checkpoint, pad_token = "<pad>")
model = DeepseekV2ForSequenceClassification(checkpoint, config)

modeling_deepseek.py:   0%|          | 0.00/78.7k [00:00<?, ?B/s]

A new version of the following files was downloaded from https://huggingface.co/deepseek-ai/DeepSeek-Coder-V2-Lite-Base:
- modeling_deepseek.py
. Make sure to double-check they do not contain any added malicious code. To avoid downloading new versions of the code file, you can pin a revision.


model.safetensors.index.json:   0%|          | 0.00/480k [00:00<?, ?B/s]

Fetching 4 files:   0%|          | 0/4 [00:00<?, ?it/s]

model-00003-of-000004.safetensors:   0%|          | 0.00/8.59G [00:00<?, ?B/s]

model-00001-of-000004.safetensors:   0%|          | 0.00/8.59G [00:00<?, ?B/s]

model-00004-of-000004.safetensors:   0%|          | 0.00/5.64G [00:00<?, ?B/s]

model-00002-of-000004.safetensors:   0%|          | 0.00/8.59G [00:00<?, ?B/s]

In [None]:
testing_sample = "Hello world"
inputs = tokenizer(testing_sample)
outputs = model(**inputs)

# Flushing CUDA

In [None]:
!pip install GPUtil

import torch
from GPUtil import showUtilization as gpu_usage
from numba import cuda

def free_gpu_cache():
    print("Initial GPU Usage")
    gpu_usage()

    torch.cuda.empty_cache()

    cuda.select_device(0)
    cuda.close()
    cuda.select_device(0)

free_gpu_cache()