## signate 画像分類コンペ(2クラス)

In [17]:
# インポート
import glob
import random
import pickle

import tqdm
import os
import numpy as np
import pandas as pd
import pytorch_lightning as pl
import timm
import torch
import torch.nn as nn
import torch.optim as optim
from PIL import Image
from pytorch_lightning.callbacks import EarlyStopping, ModelCheckpoint
from pytorch_lightning.loggers import WandbLogger
from sklearn.model_selection import train_test_split
from torch.utils.data import DataLoader, Dataset
from torchvision import transforms
import wandb
import yaml

### 初期処理

In [18]:
BASE_PATH = 'C:\\Users\\zigza\\GitFile\\signate\\package_analysis\\'
DATA_PATH = BASE_PATH + 'datasets\\'
TRAIN_PATH = DATA_PATH + 'train\\'
TEST_PATH = DATA_PATH + 'test\\'
OUT_PATH = BASE_PATH + 'out\\'
# C:\\Users\\zigza\\GitFile\\signate\\package_analysis\\train.csv
# C:\Users\zigza\GitFile\signate\package_analysis\datasets\train.csv

In [19]:
train_df = pd.read_csv(DATA_PATH + 'train.csv')
train_df.head()


Unnamed: 0,image_name,label
0,0000.png,0
1,0001.png,1
2,0002.png,1
3,0003.png,1
4,0004.png,0


In [20]:
# seedの固定
def fix_seed(seed):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True

SEED = 0
fix_seed(SEED)

### Datamodule

In [21]:
class MyDataset(Dataset):

    def __init__(self, file_list, transform=None):
        self.file_list = file_list
        self.transform = transform
        self.img_path = TRAIN_PATH

    def __len__(self):
        return len(self.file_list)

    def __getitem__(self, index):
        # 画像を読みこんで、指定の方法でtransform
        img_name = os.path.join(self.img_path, self.file_list.iloc[index,0])
        img = Image.open(img_name)
        img_transformed = self.transform(img)
        label = int(self.file_list.iloc[index,1])

        return img_transformed, label

In [22]:
class CreateDataModule(pl.LightningDataModule):

    def __init__(self, train_df, val_df, test_df, img_size=224,
                 mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225),
                 batch_size=16):
        super().__init__()
        self.train_df = train_df
        self.val_df = val_df
        self.test_df = test_df
        self.batch_size = batch_size

        # train時、val/test時の前処理をそれぞれ定義
        self.train_transforms = transforms.Compose([
            transforms.RandomResizedCrop(img_size, scale=(0.5, 1.0)),
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),
            transforms.Normalize(mean, std)
        ])

        self.val_test_transforms = transforms.Compose([
            transforms.Resize(img_size),
            transforms.CenterCrop(img_size),
            transforms.ToTensor(),
            transforms.Normalize(mean, std)
        ])

    # データのダウンロードなどを行う場合は定義、今回は不要
    def prepare_data(self):
        pass

    # Trainer.fit()ではtrain/valのDatasetを、Trainer.test()ではtestのDatasetを生成
    def setup(self, stage=None):
        if stage == 'fit' or stage is None:
            self.train_dataset = MyDataset(self.train_df, self.train_transforms)
            self.val_dataset = MyDataset(self.val_df, self.val_test_transforms)

        if stage == 'test' or stage is None:
            self.test_dataset = MyDataset(self.test_df, self.val_test_transforms)

    # こちらもTrainer.fit()ではtrain/valのDataLoaderを、Trainer.test()ではtestのDataLoaderを生成
    # trainはshuffleあり、val/testはshuffleなし
    def train_dataloader(self):
        return DataLoader(self.train_dataset, batch_size=self.batch_size, shuffle=True)

    def val_dataloader(self):
        return DataLoader(self.val_dataset, batch_size=self.batch_size)

    def test_dataloader(self):
        return DataLoader(self.test_dataset, batch_size=self.batch_size)


In [23]:
# seedを固定
fix_seed(SEED)

#train_df内のデータを7:2:1の割合でval_df,test_dfに分割
train_df, val_df = train_test_split(train_df, test_size=0.3, shuffle=True, random_state=SEED)
val_df, test_df = train_test_split(val_df, test_size=0.33, shuffle=True, random_state=SEED)

# インスタンスを作成
data_module = CreateDataModule(train_df,val_df,test_df)

### Classifier

In [24]:
class ImageClassifier(pl.LightningModule):
    def __init__(self, model_name, n_classes, lr=0.0001, criterion=torch.nn.CrossEntropyLoss()):
        super().__init__()
        self.save_hyperparameters()

        # timmで学習済みモデルをダウンロードし、classifier部分を付替え
        # n_classesにはラベルの件数を渡す（今回はアリとハチの2つなので2）
        self.model = timm.create_model(model_name, pretrained=True)
        self.model.classifier = nn.Linear(self.model.classifier.in_features, n_classes)

        self.lr = lr
        self.criterion = criterion
        self.outputs = []
        
        # net属性としてmodelをエイリアス化
        self.net = self.model

    # 順伝搬
    def forward(self, imgs, labels=None):
        preds = self.model(imgs)
        loss = 0
        if labels is not None:
            loss = self.criterion(preds, labels)
        return loss, preds

    # trainのミニバッチに対して行う処理
    def training_step(self, batch, batch_idx):
        imgs, labels = batch
        loss, preds = self.forward(imgs=imgs, labels=labels)
        self.log(f"train_loss", loss, on_step=True, on_epoch=True,prog_bar=True, logger=True)
        return {'loss': loss, 'batch_preds': preds.detach(), 'batch_labels': labels.detach()}

    # validation、testでもtrain_stepと同じ処理を行う
    def validation_step(self, batch, batch_idx):
        result = self.training_step(batch, batch_idx)
        self.outputs.append(result)
        self.log(f"val_loss", result['loss'], logger=True)
        return result

    def test_step(self, batch, batch_idx):
        result = self.training_step(batch, batch_idx)
        self.log(f"test_loss", result['loss'], logger=True)
        return result

    # epoch終了時にvalidationのlossとaccuracyを記録
    def on_validation_epoch_end(self):

        # loss計算
        epoch_preds = torch.cat([x['batch_preds'] for x in self.outputs])
        epoch_labels = torch.cat([x['batch_labels'] for x in self.outputs])
        epoch_loss = self.criterion(epoch_preds, epoch_labels)
        self.log(f"val_loss", epoch_loss, logger=True)

        # accuracy計算
        num_correct = (epoch_preds.argmax(dim=1) == epoch_labels).sum().item()
        epoch_accuracy = num_correct / len(epoch_labels)
        self.log(f"val_accuracy", epoch_accuracy, logger=True)
        self.outputs = []


    def on_test_epoch_end(self):
        ret = self.validation_epoch_end(self.outputs, "test")
        self.outputs = []
        return ret

    def configure_optimizers(self):
        optimizer = optim.AdamW(lr=self.lr, params=self.model.parameters())
        scheduler = {'scheduler': optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.2)}
        return [optimizer], [scheduler]



In [25]:
def train_model():
    #EarlyStoppingの設定
    # 3epochで'val_loss'が0.05以上減少しなければ学習をストップ
    early_stop_callback = EarlyStopping(
        monitor='val_loss', min_delta=0.05, patience=3, mode='min')
    
    
    wandb.init(project="sweep_package_analysis")
    config=wandb.config
    wandb_logger = WandbLogger()
    model = ImageClassifier(model_name="efficientnet_b0", n_classes=2)

    wandb_logger.watch(model.net)
    
    trainer = pl.Trainer(
        #accelerator='gpu',
        devices=1,
        max_epochs=30, 
        logger=wandb_logger,
        callbacks=[early_stop_callback],
        log_every_n_steps=10
        )
    
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    trainer.fit(model, data_module)

### Train/Validation

In [26]:
with open('config_sweep.yaml', 'r') as file:
    sweep_config = yaml.safe_load(file)
print(sweep_config)

sweep_id=wandb.sweep(sweep_config, project="test_sweep")
wandb.agent(sweep_id=sweep_id, function=train_model, count=10)

{'method': 'random', 'metric': {'name': 'acc', 'goal': 'maximize'}, 'parameters': {'lr': {'values': [0.0001, 1e-05]}, 'batch_size': {'values': [256, 128, 64, 32]}}}
Create sweep with ID: cyiaxv1v
Sweep URL: https://wandb.ai/masa1357/test_sweep/sweeps/cyiaxv1v


[34m[1mwandb[0m: Agent Starting Run: wxf62hz9 with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	lr: 1e-05
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


  rank_zero_warn(
  rank_zero_warn(
[34m[1mwandb[0m: logging graph, to disable use `wandb.watch(log_graph=False)`
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name      | Type             | Params
-----------------------------------------------
0 | model     | EfficientNet     | 4.0 M 
1 | criterion | CrossEntropyLoss | 0     
-----------------------------------------------
4.0 M     Trainable params
0         Non-trainable params
4.0 M     Total params
16.040    Total estimated model params size (MB)


Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(
  rank_zero_warn(


Training: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

VBox(children=(Label(value='0.027 MB of 0.027 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
epoch,▁▁▁▁▁▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇█████
train_loss_epoch,▇█▆▆▆▅▅▃▅▂▅▂▅▁▅▁
train_loss_step,▆▅▆▄▆▄▅▄▆▄▄▃▃▄▂▃▅▄▄▃▃▁▃▅▁▁█▁▃▃▁▃▃▅▄▂▃▃▃▃
trainer/global_step,▁▁▁▁▁▂▁▁▁▁▃▂▂▂▂▄▂▂▂▂▅▂▂▂▂▆▂▂▂▆▇▃▃▃▇█▃▃▃█
val_accuracy,▁▂▃▅▆█▇█
val_loss,█▅▄▃▂▁▁▁

0,1
epoch,7.0
train_loss_epoch,0.11767
train_loss_step,0.35961
trainer/global_step,767.0
val_accuracy,0.84439
val_loss,0.39993


[34m[1mwandb[0m: Agent Starting Run: xtw6ad4w with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	lr: 1e-05
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


  rank_zero_warn(
  rank_zero_warn(
[34m[1mwandb[0m: logging graph, to disable use `wandb.watch(log_graph=False)`
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name      | Type             | Params
-----------------------------------------------
0 | model     | EfficientNet     | 4.0 M 
1 | criterion | CrossEntropyLoss | 0     
-----------------------------------------------
4.0 M     Trainable params
0         Non-trainable params
4.0 M     Total params
16.040    Total estimated model params size (MB)


Sanity Checking: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]