In [7]:
# Hugginface playground

import os
import torch
from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM
from huggingface_hub import login

login(token=os.getenv('HF_READ_TOKEN'))
model_id='ibm-granite/granite-4.0-h-350m'

tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(model_id)
model.eval()

pipe = pipeline('text-generation', model=model, tokenizer=tokenizer)
res = pipe(
    'Moh is obviously my lovely cat',
    temperature=0.7,
    max_length=30,
    num_return_sequences=2
)
print(res)

tokenizer_config.json: 0.00B [00:00, ?B/s]

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development


vocab.json: 0.00B [00:00, ?B/s]

merges.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/579 [00:00<?, ?B/s]

chat_template.jinja: 0.00B [00:00, ?B/s]

config.json: 0.00B [00:00, ?B/s]

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


model.safetensors:   0%|          | 0.00/681M [00:00<?, ?B/s]

KeyboardInterrupt: 

In [None]:
# plot sigmoid function

import numpy as np
import matplotlib.pyplot as plt

# Define the function
x = np.linspace(-10, 10, 400)
y = 1 / (1 + np.exp(-x))

# Plot
plt.figure(figsize=(8, 5))
plt.plot(x, y, linewidth=2)
plt.title(r'Plot of $f(x) = \frac{1}{1 + e^{-x}}$ (Sigmoid Function)')
plt.xlabel('x')
plt.ylabel('f(x)')
plt.grid(True)
plt.show()

In [None]:
# LoRA fine-tuning

import hf_xet
from transformers import AutoModelForCausalLM, AutoTokenizer, Trainer, TrainingArguments
from peft import get_peft_model, LoraConfig, TaskType
from datasets import load_dataset

try:
    print('>>> Load LLM')
    model_name = 'gpt2'
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    model = AutoModelForCausalLM.from_pretrained(model_name, device_map='auto')

    peft_config = LoraConfig(
        task_type=TaskType.CAUSAL_LM,
        inference_mode=False,
        fan_in_fan_out=True,
        r=8,
        lora_alpha=16,
        lora_dropout=0.05
    )
    model = get_peft_model(model, peft_config)

    def tokenize_fn(examples):
        return tokenizer(examples['text'], truncation=True, padding='max_length', max_length=512)

    print('>>> Tokenize loaded datasets')
    dataset = load_dataset('mteb/tweet_sentiment_extraction')
    tokenizer.pad_token = tokenizer.eos_token
    tokenized_datasets = dataset.map(tokenize_fn, batched=True)

    training_args = TrainingArguments(
        output_dir='./LoRA_output',
        per_device_train_batch_size=4,
        num_train_epochs=3,
        save_steps=100,
        seed=42,
        fp16=True,
        dataloader_pin_memory=False,
        dataloader_num_workers=0
    )

    trainer = Trainer(
        model=model,
        args=training_args,
        processing_class=tokenizer,
        train_dataset=tokenized_datasets['train']
    )

    print('>>> Training')
    trainer.train()

    print('>>> Validating')
    trainer.evaluate()
except Exception as e:
    print(e)

In [None]:
# GGUF quantization


# Import necessary libraries
import torch
import torch.nn as nn
import torch.quantization as quant

# Step 1: Define a simple neural network model in PyTorch
class SimpleModel(nn.Module):
    def __init__(self):
        super(SimpleModel, self).__init__()
        self.fc1 = nn.Linear(10, 50)  # First fully connected layer
        self.fc2 = nn.Linear(50, 20)  # Second fully connected layer
        self.fc3 = nn.Linear(20, 5)   # Output layer

    def forward(self, x):
        x = torch.relu(self.fc1(x))   # ReLU activation after first layer
        x = torch.relu(self.fc2(x))   # ReLU activation after second layer
        x = self.fc3(x)               # Output layer
        return x

# Step 2: Initialize the model and switch to evaluation mode
model = SimpleModel()
model.eval()


torch.save(model, './LLMs/simple_model.pth')

quantized_model = torch.ao.quantization.quantize_dynamic(
    model, {nn.Linear},
    dtype=torch.qint8
)
torch.save(quantized_model, './LLMs/quantized_simple_model.pth')


dummy_input = torch.randn(1, 10)
print('Dummy input', dummy_input)
print('Model output:', model(dummy_input))
print('Quantized model output:', quantized_model(dummy_input))