In [1]:
import os
import numpy as np
import pandas as pd
import random

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader

from tqdm import tqdm
from sklearn.model_selection import train_test_split

from PIL import Image
from torchvision import datasets, models, transforms
from sklearn.metrics import accuracy_score, f1_score

In [2]:
DATA_DIR = '/workspace/Competition/SLEEP/EEG/data/train/'
label_dir = os.path.join(DATA_DIR, 'train1_labels.csv')
train_dir = os.path.join(DATA_DIR, 'trace1', 'eeg1')

## Device

In [3]:
os.environ['CUDA_VISIBLE_DEVICES']="1"
DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

## Dataset

In [4]:
class TraceDataset(Dataset):
    def __init__(self, datapath, labeldf):
        #self.df = pd.read_csv(labelpath)
        self.df = labeldf
        self.label_encoding = {'N':0, 'R':1, 'W':2}
        self.data_path = datapath
        self.file_names = self.df['fname']
        self.labels = self.df['state']
        self.transforms = transforms.Compose([
            transforms.ToTensor(),
            #transforms.Normalize([0.485,0.456,0.406],[0.229,0.224,0.225])
        ])          

    def __len__(self):
        return len(self.file_names)

    def __getitem__(self,index):
        image_path = os.path.join(self.data_path, self.file_names[index]+'.png')
        image = Image.open(image_path)
        image = image.convert('RGB')
        image = self.transforms(image)
        lbl = self.labels[index]
        lbl = self.label_encoding[lbl]
        return image, lbl



## Model

In [5]:
class EffNet(nn.Module):
    def __init__(self):
        self.model = timm.create_model('efficientnet_b4', pretrained=True)
        self.model.classifier = nn.Sequential(
            nn.Linear(in_features=1792, out_features=512),
            nn.ReLU(),
            nn.Dropout(p=0.3),
            nn.Linear(512, 256),
            nn.ReLU(),
            nn.Linear(256,3)
        )
        for param in self.model.parameters():
            param.requires_grad = False
        for param in self.model.classifier.parameters():
            param.requires_grad = True
    
    def forward(self, x):
        output = self.model(x)
        return output

In [6]:
class ResNet(nn.Module):
    def __init__(self):
        super(ResNet_frozen, self).__init__()
        #res18_modules = list(models.resnet18().children())[:-1]
        self.model = models.resnet18(pretrained=True)
        self.model.fc = nn.Sequential(
            nn.Linear(512,256),
            nn.ReLU(),
            nn.Linear(256,128),
            nn.ReLU(),
            nn.Linear(128,3),
        )

    def forward(self, x):
        x = self.model(x)
        x = self.fc(x)
        return x

In [7]:
class ResNet_frozen(nn.Module):
    def __init__(self):
        super(ResNet_frozen, self).__init__()
        #res18_modules = list(models.resnet18().children())[:-1]
        self.model = models.resnet18(pretrained=True)
        self.model.fc = nn.Sequential(
            nn.Linear(512,256),
            nn.ReLU(),
            nn.Linear(256,128),
            nn.ReLU(),
            nn.Linear(128,3),
        )
        for param in self.model.parameters():
            param.requires_grad = False
        for param in self.model.fc.parameters():
            param.requires_grad = True

    def forward(self, x):
        x = self.model(x)
        return x

## Dataloader

In [8]:
# Dataloader 관련 인자
BATCH_SIZE = 16
NUM_WORKERS = 1
SHUFFLE = True
PIN_MEMORY = True
DROP_LAST = False

In [9]:
train = pd.read_csv(label_dir)
traindf,valdf = train_test_split(train, test_size=0.2)
traindf = traindf.reset_index(drop=True)
valdf = valdf.reset_index(drop=True)

In [10]:
train_dataset = TraceDataset(datapath = train_dir, labeldf = traindf)
val_dataset = TraceDataset(datapath = train_dir, labeldf = valdf)

train_loader = DataLoader(dataset = train_dataset,
                            batch_size = BATCH_SIZE,
                            num_workers = NUM_WORKERS,
                            shuffle = SHUFFLE,
                            pin_memory = PIN_MEMORY,
                            drop_last = DROP_LAST)

val_loader = DataLoader(dataset = val_dataset,
                            batch_size = BATCH_SIZE,
                            num_workers = NUM_WORKERS,
                            shuffle = SHUFFLE,
                            pin_memory = PIN_MEMORY,
                            drop_last = DROP_LAST)

In [11]:
LEARNING_RATE = 5e-5
EPOCHS = 1

In [12]:
model = ResNet_frozen()
model.to(DEVICE)

ResNet_frozen(
  (model): ResNet(
    (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu): ReLU(inplace=True)
    (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (layer1): Sequential(
      (0): BasicBlock(
        (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu): ReLU(inplace=True)
        (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
      (1): BasicBlock(
        (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track

In [13]:
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr = LEARNING_RATE)

## Train

In [14]:
RECORDER_DIR = '/workspace/Competition/SLEEP/EEG/classifier1/results'

In [15]:
best_loss = np.Inf

for epoch in range(EPOCHS):
    model.train()

    train_total_loss = 0
    target_list = []
    pred_list = []

    for batch_index, (x,y) in tqdm(enumerate(train_loader)):
        x,y = x.to(DEVICE), y.to(DEVICE)
        y_pred = model(x)
        loss = criterion(y_pred, y)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        train_total_loss += loss.item()
        pred_list.extend(y_pred.argmax(dim=1).cpu().tolist())
        target_list.extend(y.cpu().tolist())
    train_mean_loss = train_total_loss / (batch_index+1)
    train_accuracy = accuracy_score(target_list, pred_list)
    train_f1score = f1_score(target_list, pred_list, average='macro')

    model.eval()
    val_total_loss = 0
    target_list = []
    pred_list = []
    with torch.no_grad():
        for batch_index, (x,y) in tqdm(enumerate(val_loader)):
            x = x.to(DEVICE)
            y = y.to(DEVICE)
            y_pred = model(x)
            loss = criterion(y_pred, y)
            #
            val_total_loss += loss.item()
            target_list.extend(y.cpu().tolist())
            pred_list.extend(y_pred.argmax(dim=1).cpu().tolist())
    val_mean_loss = val_total_loss / (batch_index+1)
    val_accuracy = accuracy_score(target_list, pred_list)
    val_f1score = f1_score(target_list, pred_list, average='macro')

    msg1 = f"Epoch {epoch}/{EPOCHS} - Train loss: {train_mean_loss}; Train Accuracy: {train_accuracy}; Train F1: {train_f1score}"
    msg2 = f"Valid loss: {val_mean_loss}; Val Accuracy: {val_accuracy}; Val F1: {val_f1score}"
    print(msg1)
    print(msg2)

    if val_mean_loss < best_loss:                               
        best_loss = val_mean_loss
        check_point = {                                         
            'model': model.state_dict(),
            'optimizer': optimizer.state_dict(),
        }
        torch.save(check_point, os.path.join(RECORDER_DIR,'best.pt')) 


2292it [08:29,  4.50it/s]
573it [02:11,  4.35it/s]


Epoch 0/1 - Train loss: 0.3879741866850208; Train Accuracy: 0.8605405995144969; Train F1: 0.7197908376925892
Valid loss: 0.1992095664962736; Val Accuracy: 0.9418503163866463; Val F1: 0.9035563540020632
