In [None]:
import pandas as pd
import torch
import numpy as np
from torch.utils.data import Dataset, DataLoader
import torchvision
from torchvision import transforms , models
from PIL import Image
import torch.nn as nn
import torch.nn.functional as f
import torch.optim as optim
import os
from tqdm import tqdm

## 데이터 불러오기

In [None]:
train = pd.read_csv('./input/data/train/train.csv')

## EDA

In [None]:
train['gender'].value_counts()

In [None]:
train['age'].value_counts()

In [None]:
gender_group=train.groupby(train['gender'])
pd.set_option('display.max_rows',None)
gender_group['age'].value_counts()

In [None]:
condition=[(train.age<30), (train.age>=30) & (train.age <60) , (train.age >=60)]
choice=['~29','30~59','60~']
train['age2']=np.select(condition, choice)
train

In [None]:
train.age2.value_counts()

In [None]:
gender_group.age2.value_counts()

## dataset / dataloader

#### label 함수

In [None]:
def label(gender,age,mask):
    if mask == 'incorrect_mask':
        if gender == 'male':
            if age < 30:
                return 6
            elif age >=60:
                return 8
            else:
                return 7
        else:
            if age < 30:
                return 9
            elif age >= 60:
                return 11
            else:
                return 10
    elif mask == 'normal':
        if gender == 'male':
            if age < 30:
                return 12
            elif age >= 60:
                return 14
            else:
                return 13
        else:
            if age < 30:
                return 15
            elif age >= 60:
                return 17
            else:
                return 16
    else:
        if gender == 'male':
            if age < 30: 
                return 0
            elif age >= 60:
                return 2
            else:
                return 1
        else:
            if age < 30:
                return 3
            elif age >= 60:
                return 5
            else:
                return 4

#### dataset

In [None]:
class mydataset():
    def __init__(self):
        path=os.path.join(os.getcwd(),'input/data/train/images')
        imagelist=[]
        labellist=[]
        for folder in os.listdir(path):
            if folder.startswith('.'):
                continue
            id,gender,race,age = folder.split('_')
            age = int(age)
            for image in os.listdir(os.path.join(path, folder)):
                if image.startswith('.'):
                    continue
                mask,ext=image.split('.')
                imagelist.append(os.path.join(path,folder,image))
                labellist.append(label(gender,age,mask))
        self.imagelist=imagelist
        self.labellist=labellist
            

    def __len__(self):
        return len(self.imagelist)

    def __getitem__(self,idx):
        image_path = self.imagelist[idx]
        image = Image.open(image_path)
        transform = transforms.Compose([
            transforms.ToTensor()])
        image = transform(image)
        label = self.labellist[idx]
        return image,label

In [None]:
dataset=mydataset()
split=torch.utils.data.random_split(dataset, [int(18900*0.8), int(18900*0.2)])
trainset,valset = split[0],split[1]

#### dataloader

In [None]:
train_loader = DataLoader(
    trainset,
    batch_size=256,
    num_workers=0,
    shuffle=True
)

val_loader = DataLoader(
    valset,
    batch_size=256,
    num_workers=0,
    shuffle=False
)

## model

In [None]:
class model(nn.Module):
    def __init__(self):
        super().__init__()
        self.model = models.resnet18(pretrained=True)
        for param in self.model.parameters():
            param.requires_grad_(False)
        self.model.fc=nn.Linear(512,18)
                
    def forward(self, dataset):
        self.x=self.model(dataset)
        return self.x

In [None]:
model=model()
device = torch.device('cuda')
model.to(device)

#### loss , optimizer

In [None]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr = 0.01)

## train / validation

#### train

In [None]:
epochs=
batch_size=
acc = 0
model.train()
for epoch in range(epochs):
    running_loss = 0.0
    correct = 0
    for i, data in tqdm(enumerate(train_loader)):
        inputs, labels = data
        inputs, labels = inputs.cuda() , labels.cuda()
        optimizer.zero_grad()
        outputs = model(inputs)
        preds = torch.argmax(outputs, dim=-1)
        correct += (preds==labels).sum().item()
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.data
    print('[%d, %d] loss: %.3f' %
      (epoch + 1, i + 1, running_loss / (len(trainset)/batch_size+1)))
    running_loss = 0.0
    print(correct / len(trainset)*100)
    
    # 저장
    if acc < correct / len(trainset)*100:
        acc = correct / len(trainset)*100
        torch.save(model.state_dict(), os.path.join(os.getcwd(),f'save/epoch{epoch}.pth'))

print('Finished Training')

#### validation

In [None]:
path=
with torch.no_grad():
    model=model()
    model.to(device)
    model.load_state_dict(torch.load(path))
    model.eval()
    for val_batch in tqdm(val_loader):
        inputs, labels = val_batch
        inputs = inputs.to(device)
        labels = labels.to(device)
        outputs = model(inputs)
        preds = torch.argmax(outputs, dim=-1)
        correct += (preds==labels).sum().item()
    print(correct / len(valset)*100)