In [5]:
import os
import torch
from torch import nn
import torch.nn.functional as F
from torchvision import transforms
from torchvision.datasets import MNIST
from torch.utils.data import DataLoader
import lightning as L

In [6]:
class MLP(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(MLP, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(hidden_size, output_size)
        self.fin = nn.Softmax(dim=1)
    def forward(self, x):
        out = self.fc1(x)
        out = self.relu(out)
        out = self.fc2(out)
        out = self.fin(out)
        
        return out

In [7]:
dataset = MNIST(os.getcwd(), download=True, transform=transforms.ToTensor())
train_loader = DataLoader(dataset)

In [43]:
next(iter(train_loader))[0].shape

torch.Size([1, 1, 28, 28])

In [38]:
ds = [(dataset.data[0],dataset.targets[0])]

In [39]:
shallow_loader = DataLoader(ds,batch_size=1, shuffle=True)

In [41]:
next(iter(shallow_loader))

[tensor([[[  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
             0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0],
          [  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
             0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0],
          [  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
             0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0],
          [  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
             0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0],
          [  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
             0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0],
          [  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   3,  18,
            18,  18, 126, 136, 175,  26, 166, 255, 247, 127,   0,   0,   0,   0],
          [  0,   0,  

# Lightning test. May be valid 

In [32]:
class LitMLP(L.LightningModule):
    def __init__(self, mlp):
        super().__init__()
        self.mlp = mlp

    def training_step(self, batch, batch_idx):
        x, y = batch
        x = x.view(x.size(0), -1)
        
        logits = self.mlp(x)

        loss = F.cross_entropy(logits, y)
        return loss

    def configure_optimizers(self):
        optimizer = torch.optim.Adam(self.parameters(), lr=1e-3)
        return optimizer

In [122]:
# model
fin = 784
hid = 128
fout = 784
mlp_lit = LitMLP(MLP(fin,hid,fout))

# train model
trainer = L.Trainer()
trainer.fit(model=mlp_lit, train_dataloaders=train_loader)

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name | Type | Params
------------------------------
0 | mlp  | MLP  | 101 K 
------------------------------
101 K     Trainable params
0         Non-trainable params
101 K     Total params
0.407     Total estimated model params size (MB)


Epoch 0:   1%|▍                                                          | 400/60000 [00:02<06:05, 163.17it/s, v_num=8]

/home/nikita/.local/lib/python3.10/site-packages/lightning/pytorch/trainer/call.py:54: Detected KeyboardInterrupt, attempting graceful shutdown...


# W&B integration sample

In [4]:
import os
from dotenv import load_dotenv, find_dotenv

__ENV_FILE = find_dotenv(f'{os.getenv("ENV", "var")}.env')
load_dotenv(__ENV_FILE)

import wandb
wandb_api_key = os.environ.get('WANDB_API_KEY')
wandb.login(key=wandb_api_key)

epochs = 10
lr = 0.01

input_size = 784
hidden_size = 128
output_size = 10
num_epochs = 10
batch_size = 784
learning_rate = 1e-3

run = wandb.init(
    project="huggingface",
    config={
        "learning_rate": learning_rate,
        "epochs": num_epochs,
    },
)

model = MLP(input_size, hidden_size, output_size)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
dataloader = train_loader

for epoch in range(num_epochs):
    running_loss = 0.0
    i = 0
    for batch, labels in dataloader:
        if i == 10:
            break
        i+=1
        # print(batch.shape)
        for inputs in batch:
            # plt.imshow(inputs.squeeze());
            inputs = inputs.view(inputs.shape[0], -1)
            # print(inputs)
            # print(inputs.shape)
            
            logits = model(inputs)
            # print(logits.shape)
            # print(logits)
            pred_label = torch.argmax(logits, dim=1)
            # print(pred_label)
            loss = criterion(logits, labels)
            
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            
            wandb.log({"loss": loss})

[34m[1mwandb[0m: Currently logged in as: [33mmalcevnik99[0m ([33mstaff[0m). Use [1m`wandb login --relogin`[0m to force relogin
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /home/nikita/.netrc


# Transfer PyTorch model to Hugging Face

In [12]:
from huggingface_hub import login
login(token="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")

Token has not been saved to git credential helper. Pass `add_to_git_credential=True` if you want to set the git credential as well.
Token is valid (permission: fineGrained).
Your token has been saved to /home/nikita/.cache/huggingface/token
Login successful


In [26]:
import torch.nn as nn
from transformers import PreTrainedModel, PretrainedConfig
from transformers import AutoModel, AutoConfig

class MLPConfig(PretrainedConfig):
    model_type = 'mlp'
    def __init__(self,
                 fin=784,
                 hid=128,
                 fout=10,
                 **kwargs):
        super().__init__(**kwargs)
        self.input_size = fin
        self.hidden_size = hid
        self.output_size = fout
        
class MLPModel(PreTrainedModel):
    config_class: MLPConfig = MLPConfig
    def __init__(self, config):
        super().__init__(config)
        self.config = config
        self.model = MLP(
            input_size=config.input_size,
            hidden_size=config.hidden_size,
            output_size=config.output_size
        )
        
    def forward(self, input):
        return self.model(input)

In [27]:
input_size = 784
hidden_size = 128
output_size = 10

config = MLPConfig(fin=input_size, hid=hidden_size, fout=output_size)
model = MLPModel(config)
model.save_pretrained('Malecc/mlp')

new_model = MLPModel.from_pretrained('Malecc/mlp')
new_model

MLPModel(
  (model): MLP(
    (fc1): Linear(in_features=784, out_features=128, bias=True)
    (relu): ReLU()
    (fc2): Linear(in_features=128, out_features=10, bias=True)
    (fin): Softmax(dim=1)
  )
)

In [42]:
x,y = next(iter(train_loader))
new_model(x.squeeze().view(1, 28*28))

tensor([[0.1030, 0.0924, 0.0983, 0.0958, 0.0965, 0.1020, 0.0962, 0.0968, 0.1161,
         0.1030]], grad_fn=<SoftmaxBackward0>)

# Evaluation harness

Там довольно понятная поддержка для моделей с HG, потому выше оно и надо.
А дальше уже что-то такое, там же есть ещё и поддержка w&b для этих прогонов, +флаги + ещё там что-то из зависимостей надо подтянуть

In [None]:
# lm_eval --model hf \
#     --model_args pretrained=Malecc/mlp \
#     --tasks hellaswag \
#     --device cuda:0 \
#     --batch_size 8