In [None]:
# Hugginface playground

import os
from huggingface_hub import login
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
from adapters import AutoAdapterModel, list_adapters

login(token=os.getenv('HF_READ_TOKEN'))
model_id = 'deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B'

# ls -la /c/Users/Siarhei_Kushniaruk/.cache/huggingface
tokenizer = AutoTokenizer.from_pretrained(model_id, use_fast=True)
model = AutoModelForCausalLM.from_pretrained(model_id, device_map='auto')
model.eval()

adapter_infos = list_adapters(model_name=model_id)
for adapter_info in adapter_infos:
    print('Id:', adapter_info.adapter_id)
    print('Model name:', adapter_info.model_name)
    print('Uploaded by:', adapter_info.username)
# model = AutoAdapterModel.from_pretrained(model_id)
# adapter_name = model.load_adapter('AdapterHub/roberta-base-pf-imdb', source='hf')
# model.active_adapters = adapter_name

prompt = [
'''
<|system|> You are a experienced DevOps Engineer with robust skills in Azure and Python </s>\
<|user|> How to write a unit tests on Python project? </s>\
<|assistant|>
''',
'''
<|system|> You are a experienced DevOps Engineer with robust skills in Azure and Python </s>\
<|user|> Once upon a time... </s>\
<|assistant|>
'''
]

print(f'tokens: {tokenizer.tokenize(prompt)}')
print(f'ids: {tokenizer.convert_tokens_to_ids(tokenizer.tokenize(prompt))}')
print(f'summary: {tokenizer(prompt).to('cpu')}')
print(f'decoded: {tokenizer.decode(tokenizer.convert_tokens_to_ids(tokenizer.tokenize(prompt)))}')

pipe = pipeline(task='text-generation', model=model, tokenizer=tokenizer)
result = pipe(
    prompt,
    temperature=0.5,
    top_p=0.85,
    max_new_tokens=256,
    do_sample=True,
    return_full_text=False
)

for reply in result:
    print(f'>>> {reply[0]['generated_text']}')

Device set to use cpu


tokens: ['Ċ', '<', '|', 'system', '|', '>', 'ĠYou', 'Ġare', 'Ġa', 'Ġexperienced', 'ĠDev', 'Ops', 'ĠEngineer', 'Ġwith', 'Ġrobust', 'Ġskills', 'Ġin', 'ĠAzure', 'Ġand', 'ĠPython', 'Ġ</', 's', '><', '|', 'user', '|', '>', 'ĠHow', 'Ġto', 'Ġwrite', 'Ġa', 'Ġunit', 'Ġtests', 'Ġon', 'ĠPython', 'Ġproject', '?', 'Ġ</', 's', '><', '|', 'assistant', '|', '>Ċ', 'Ċ', '<', '|', 'system', '|', '>', 'ĠYou', 'Ġare', 'Ġa', 'Ġexperienced', 'ĠDev', 'Ops', 'ĠEngineer', 'Ġwith', 'Ġrobust', 'Ġskills', 'Ġin', 'ĠAzure', 'Ġand', 'ĠPython', 'Ġ</', 's', '><', '|', 'user', '|', '>', 'ĠOnce', 'Ġupon', 'Ġa', 'Ġtime', '...', 'Ġ</', 's', '><', '|', 'assistant', '|', '>Ċ']
ids: [198, 27, 91, 8948, 91, 29, 1446, 525, 264, 10321, 6040, 38904, 28383, 448, 21765, 7361, 304, 34119, 323, 13027, 690, 82, 1784, 91, 872, 91, 29, 2585, 311, 3270, 264, 4982, 7032, 389, 13027, 2390, 30, 690, 82, 1784, 91, 77091, 91, 397, 198, 27, 91, 8948, 91, 29, 1446, 525, 264, 10321, 6040, 38904, 28383, 448, 21765, 7361, 304, 34119, 323, 13027, 6

In [None]:
# bitsandbytes quantization

import os
import bitsandbytes
from huggingface_hub import login
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig, pipeline

login(token=os.getenv('HF_READ_TOKEN'))
model_id = 'mistralai/Mistral-7B-Instruct-v0.2'
quantization_config = BitsAndBytesConfig(load_in_4bit=True)

# ls -la /c/Users/Siarhei_Kushniaruk/.cache/huggingface
tokenizer = AutoTokenizer.from_pretrained(model_id, use_fast=True)
model_4bit = AutoModelForCausalLM.from_pretrained(model_id, quantization_config=quantization_config, device_map='auto')

prompt = '''
<|system|> You are a experienced DevOps Engineer with robust skills in Azure and Python </s>\
<|user|> How to write a unit tests on Python project? </s>\
<|assistant|>
'''

pipe = pipeline(task='text-generation', model=model_4bit, tokenizer=tokenizer)
result = pipe(
    prompt,
    temperature=0.5,
    top_p=0.85,
    max_new_tokens=256,
    do_sample=True,
    return_full_text=False
)
print(f'{[model_4bit.get_memory_footprint()]} >>> {result[0]['generated_text']}')

In [None]:
# plot sigmoid function

import numpy as np
import matplotlib.pyplot as plt

# Define the function
x = np.linspace(-10, 10, 400)
y = 1 / (1 + np.exp(-x))

# Plot
plt.figure(figsize=(8, 5))
plt.plot(x, y, linewidth=2)
plt.title(r'Plot of $f(x) = \frac{1}{1 + e^{-x}}$ (Sigmoid Function)')
plt.xlabel('x')
plt.ylabel('f(x)')
plt.grid(True)
plt.show()

In [None]:
# vLLM usage when using quantized model

from vllm import LLM
from transformers import AutoTokenizer

login(token=os.getenv('HF_READ_TOKEN'))
model_id = 'ibm-granite/granite-4.0-1b'

# ls -la /c/Users/Siarhei_Kushniaruk/.cache/huggingface
tokenizer = AutoTokenizer.from_pretrained(model_id, use_fast=True)

engine = LLM(
    model=model_id,
    max_num_seqs=8,
    max_num_batched_tokens=512,
    dtype="float16",
    gpu_memory_utilization=0.9,
    enforce_eager=False,
    enable_chunked_prefill=True
)

prompt = 'Once upon a time'

result = engine.generate(prompt, max_tokens=256)
print(result)

In [None]:
# QLoRA fine-tuning

import hf_xet
from transformers import AutoModelForCausalLM, AutoTokenizer, Trainer, TrainingArguments
from peft import get_peft_model, LoraConfig, TaskType
from datasets import load_dataset

try:
    print('>>> Load LLM')
    model_name = 'gpt2'
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    model = AutoModelForCausalLM.from_pretrained(model_name, device_map='auto')

    peft_config = LoraConfig(
        task_type=TaskType.CAUSAL_LM,
        inference_mode=False,
        fan_in_fan_out=True,
        r=8,
        lora_alpha=16,
        lora_dropout=0.05
    )
    model = get_peft_model(model, peft_config)

    def tokenize_fn(examples):
        return tokenizer(examples['text'], truncation=True, padding='max_length', max_length=512)

    print('>>> Tokenize loaded datasets')
    dataset = load_dataset('mteb/tweet_sentiment_extraction')
    tokenizer.pad_token = tokenizer.eos_token
    tokenized_datasets = dataset.map(tokenize_fn, batched=True)

    training_args = TrainingArguments(
        output_dir='./LoRA_output',
        per_device_train_batch_size=4,
        num_train_epochs=3,
        save_steps=100,
        seed=42,
        fp16=True,
        dataloader_pin_memory=False,
        dataloader_num_workers=0
    )

    trainer = Trainer(
        model=model,
        args=training_args,
        processing_class=tokenizer,
        train_dataset=tokenized_datasets['train']
    )

    print('>>> Training')
    trainer.train()

    print('>>> Validating')
    trainer.evaluate()
except Exception as e:
    print(e)