Прочитать как скачать датасет с каггла - [тут](https://github.com/Kaggle/kaggle-api).

In [1]:
import os
os.environ['KAGGLE_USERNAME'] = ""
os.environ['KAGGLE_KEY'] = ""

In [8]:
!kaggle datasets download -d slothkong/10-monkey-species

Downloading 10-monkey-species.zip to /home/mark/stuff
 99%|████████████████████████████████████████▌| 542M/547M [00:04<00:00, 136MB/s]
100%|█████████████████████████████████████████| 547M/547M [00:04<00:00, 131MB/s]


In [9]:
import zipfile
with zipfile.ZipFile('10-monkey-species.zip', 'r') as zip_ref:
    zip_ref.extractall()

In [2]:
from matplotlib import pyplot as plt
from torch.utils.data import Dataset, DataLoader
import torch
import numpy as np
from torchvision import transforms
from torchvision.datasets import ImageFolder
import torchvision
import torchvision.models as models
import pytorch_lightning as pl
import torch.nn as nn
from torchmetrics import Accuracy
import torch.optim as optim
from pytorch_lightning.callbacks.early_stopping import EarlyStopping
from pytorch_lightning.callbacks import ModelCheckpoint
from collections import Counter
import torch.nn.functional as F

# Dataset

In [3]:
transforms = transforms.Compose([transforms.Resize((224,224)),
              transforms.ToTensor(),
              transforms.Normalize([0.485, 0.456, 0.406],[0.229, 0.224, 0.225])
        ])

In [4]:
train_ds = ImageFolder('./training/training/',transform=transforms)
test_ds = ImageFolder('./validation/validation/',transform=transforms)

In [5]:
train_cnt = Counter(train_ds.targets)
test_cnt = Counter(test_ds.targets)
print(train_cnt)
print(test_cnt)

Counter({3: 122, 7: 114, 5: 113, 1: 111, 2: 110, 6: 106, 8: 106, 0: 105, 4: 105, 9: 105})
Counter({3: 30, 1: 28, 5: 28, 7: 28, 2: 27, 8: 27, 0: 26, 4: 26, 6: 26, 9: 26})


# Dataloader

In [6]:
train_dl = DataLoader(train_ds, batch_size=64, shuffle=True,num_workers=4)
test_dl = DataLoader(test_ds, batch_size=64, shuffle=False,num_workers=4)

# Model

In [7]:
class HWModel(pl.LightningModule):
    def __init__(self, num_classes, lr=2e-4):
        super().__init__()
        self.save_hyperparameters()
        self.lr = lr
        self.num_classes = num_classes
        self.model = models.resnet50(pretrained=True)
        in_feat = list(self.model.children())[-1].in_features
        self.model.fc = nn.Linear(in_feat, self.num_classes)
        
        self.loss_fn = nn.CrossEntropyLoss()
        self.accuracy = Accuracy(task="multiclass", num_classes=self.num_classes)
        
    def forward(self, x):
        x = self.model(x)
        return x
    def configure_optimizers(self):
        optimizer = torch.optim.SGD(self.parameters(), lr=self.lr)
        return optimizer
    
    def training_step(self, batch, batch_idx):
        
        x, y = batch
        
        preds = self(x)
        loss = self.loss_fn(preds, y)
        acc = self.accuracy(torch.argmax(preds, dim=1), y)
        
        self.log('train_loss', loss.item(), on_epoch=True,logger=True)
        self.log('train_acc', acc, on_epoch=True,logger=True)
        
        return loss
    
    def validation_step(self, batch, batch_idx):
        
        x,y = batch
        
        preds = self(x)
        
        loss = self.loss_fn(preds, y)
        acc = self.accuracy(torch.argmax(preds, dim=1), y)
        
        self.log('val_loss', loss, on_epoch=True,prog_bar=True,logger=True)
        self.log('val_acc', acc, on_epoch=True,prog_bar=True,logger=True)
        
    def test_step(self, batch, batch_idx):
        
        x,y = batch
        preds = self(x)
        acc = self.accuracy(torch.argmax(preds, dim=1), y)
        
        self.log('test_acc', acc, on_epoch=True,prog_bar=True)

# Train

In [8]:
from torch import Tensor
from typing import Type

In [12]:
model = HWModel(num_classes = 10,lr = 1e-3)
trainer_args = {
        "accelerator": "gpu",
        "max_epochs": 10,
    }
trainer = pl.Trainer(**trainer_args,enable_progress_bar=True)
trainer.fit(model,train_dl,test_dl)