<a href="https://colab.research.google.com/github/park-geun-hyeong/practice_pytorch/blob/main/cifar10/cifar10_CNN.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Unzip file & Read Data Path

In [1]:
import os
import zipfile
import glob
import tqdm

In [2]:
path = '/content/drive/MyDrive/PyTorch/Multi-classification/cifar10_Conv/'

os.chdir(path)

In [3]:
os.listdir(path)

['cifar10_Conv.ipynb',
 'cifar10_modified.zip',
 'cifar10',
 'cifar10_model2.pth',
 'model_201716175.pth']

In [None]:
zip = zipfile.ZipFile('cifar10_modified.zip')
zip.extractall('cifar10')
zip.close()

In [None]:
train_list = glob.glob(path+'cifar10/train/*')

In [None]:
train = []

for i in train_list:
    train += glob.glob(i+'/*.png')

len(train)

50000

In [None]:
target=[]

for i in range(len(train_list)):
    target += len(glob.glob(train_list[i]+'/*.png'))*[i]

print(len(target))

50000


In [None]:
test_list = glob.glob(path+'cifar10/test_small/*')

In [None]:
val_data = []

for i in test_list:
    val_data += glob.glob(i+'/*.png')

len(val_data)

1000

In [None]:
val_target = []

for i in range(len(test_list)):
    val_target += len(glob.glob(test_list[i]+'/*.png'))*[i]

print(len(val_target))

1000


In [4]:
import torch
import torch.nn as nn
import torch.optim as optim

import cv2
import matplotlib.pyplot as plt
%matplotlib inline

import numpy as np

In [5]:
device = 'cuda' if torch.cuda.is_available else 'cpu'

## Make DataSet & DataLoader

In [None]:
from torch.utils.data import Dataset, DataLoader

In [None]:
class train_cifar10(Dataset):
    def __init__(self, train, target):
        self.train = train
        self.target = target

    def __len__(self):
        return len(self.train)

    def __getitem__(self, idx):
        path = self.train[idx]
        img = cv2.imread(path, cv2.IMREAD_COLOR)
        img = np.array(img).astype(np.float32)
        img = np.transpose(img,(2,0,1))

        target = self.target[idx]
        target = np.array(target).astype(np.int64)

        return torch.tensor(img).float(), torch.tensor(target)

In [None]:
class val_cifar10(Dataset):
    def __init__(self, val_data, val_target):
        self.val_data = val_data
        self.val_target = val_target

    def __len__(self):
        return len(self.val_data)

    def __getitem__(self, idx):
        path = self.val_data[idx]
        img = cv2.imread(path, cv2.IMREAD_COLOR)
        img = np.array(img).astype(np.float32)
        img = np.transpose(img, (2,0,1))

        target = self.val_target[idx]
        target = np.array(target).astype(np.int64)

        return torch.tensor(img).float(), torch.tensor(target)

In [None]:
train_dataset = train_cifar10(train = train, target = target)
val_dataset = val_cifar10(val_data = val_data, val_target = val_target)

train_loader = DataLoader(train_dataset, batch_size = 1000, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size = 100, shuffle = False)

In [None]:
for input, output in train_loader:
    break

In [None]:
input.shape, output.shape

(torch.Size([1000, 3, 28, 28]), torch.Size([1000]))

## Model

In [None]:
model = nn.Sequential(
    nn.Conv2d(3, 16, 3, 1, padding=1),
    nn.BatchNorm2d(16),
    nn.ReLU(),

    nn.Conv2d(16, 16, 3, 1, padding=1),
    nn.BatchNorm2d(16),
    nn.ReLU(),

    nn.Conv2d(16, 32, 3, 1, padding=1),
    nn.BatchNorm2d(32),
    nn.ReLU(),

    nn.Conv2d(32, 32, 3, 1, padding=1),
    nn.BatchNorm2d(32),
    nn.ReLU(),
    nn.MaxPool2d(2,2),

    nn.Conv2d(32, 64, 3, 1, padding=1),
    nn.BatchNorm2d(64),
    nn.ReLU(),

    nn.Conv2d(64, 64, 3, 1, padding=1),
    nn.BatchNorm2d(64),
    nn.ReLU(),
    nn.MaxPool2d(2,2),

    nn.Flatten(),

    nn.Linear(64*7*7, 1024),
    nn.ReLU(),

    nn.Linear(1024, 256),
    nn.ReLU(),

    nn.Linear(256, 64),
    nn.ReLU(),

    nn.Linear(64, 10)
)

In [None]:
loss_fn = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
lr_sc = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', verbose=True, patience=3, factor=0.5)

from tqdm import tqdm_notebook
from sklearn.metrics import accuracy_score

In [None]:
import gc
gc.collect()

44

## Training

In [None]:
model = model.to(device)
best_score = -1

for epoch in tqdm_notebook(range(20)):
    train_loss=[]
    model.train()

    for inputs, outputs in train_loader:
        
        inputs = inputs.to(device)
        outputs = outputs.to(device)

        optimizer.zero_grad()

        cost = model(inputs)
        loss = loss_fn(cost, outputs)
        
        loss.backward()
        optimizer.step()

        train_loss.append(loss.item())

    val_loss=[]
    val_pred=[]
    val_true=[]

    model.eval()
    with torch.no_grad():
        for inputs, outputs in val_loader:

            inputs = inputs.to(device)
            outputs = outputs.to(device)

            cost = model(inputs)
            loss = loss_fn(cost, outputs)

            val_loss.append(loss.item())

            val_pred.append(cost.cpu().argmax(dim=1).data.numpy())
            val_true.append(outputs.cpu().numpy())

        
    val_pred = np.concatenate(val_pred)
    val_true = np.concatenate(val_true)

    lr_sc.step(np.mean(val_loss))

    acc = accuracy_score(val_pred, val_true)

    print(f"epoch:{epoch}, train_loss:{np.mean(train_loss):.4f}, val_loss:{np.mean(val_loss):.4f}, acc:{acc:.4f}")

    if acc>best_score:
        best_score = acc

        torch.save(model, path+'cifar10_model2.pth')

        

Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  after removing the cwd from sys.path.


HBox(children=(FloatProgress(value=0.0, max=20.0), HTML(value='')))

epoch:0, train_loss:1.6694, val_loss:1.4473, acc:0.4770
epoch:1, train_loss:1.2126, val_loss:1.1536, acc:0.5640
epoch:2, train_loss:0.9796, val_loss:0.9555, acc:0.6530
epoch:3, train_loss:0.8209, val_loss:0.9498, acc:0.6570
epoch:4, train_loss:0.7176, val_loss:0.8145, acc:0.6930
epoch:5, train_loss:0.6438, val_loss:0.8030, acc:0.7100
epoch:6, train_loss:0.5735, val_loss:0.9197, acc:0.6900
epoch:7, train_loss:0.5120, val_loss:0.7004, acc:0.7420
epoch:8, train_loss:0.4604, val_loss:0.7587, acc:0.7590
epoch:9, train_loss:0.3996, val_loss:0.7022, acc:0.7600
epoch:10, train_loss:0.3411, val_loss:0.7626, acc:0.7650
Epoch    12: reducing learning rate of group 0 to 5.0000e-04.
epoch:11, train_loss:0.3014, val_loss:0.8236, acc:0.7490
epoch:12, train_loss:0.1988, val_loss:0.6894, acc:0.7790
epoch:13, train_loss:0.1438, val_loss:0.7711, acc:0.7840
epoch:14, train_loss:0.1118, val_loss:0.8686, acc:0.7810
epoch:15, train_loss:0.0890, val_loss:0.8922, acc:0.7700
Epoch    17: reducing learning rate 

## Load Model & Test for accuracy

In [None]:
model2 = torch.load(path+'cifar10_model2.pth')   

In [7]:
folder_list = glob.glob("cifar10/test_small/*")
test_imgpath_list = []
for folder in folder_list:
    test_imgpath_list += glob.glob(folder+"/*.png")


img_list = []
for img_path in test_imgpath_list:
    img  = cv2.imread(img_path, cv2.IMREAD_COLOR)
    img_list.append(img)
img_test_np = np.array(img_list)
img_test_np = np.transpose(img_test_np, (0,3,1,2))

test_target_list = []
for itr in range(len(folder_list)):
    tr_imgs_num = len(glob.glob(folder_list[itr]+"/*.png"))
    test_target_list += tr_imgs_num*[itr]
target_test_np = np.array(test_target_list)

In [8]:
img_test_np.shape, target_test_np.shape

((1000, 3, 28, 28), (1000,))

In [9]:
X_test_tensor = torch.tensor(img_test_np).float().to(device)

output = model2(X_test_tensor)

pred = torch.argmax(output, dim=1)
pred_np = pred.cpu().detach().numpy()

accu = np.mean(pred_np == target_test_np)

print(accu)

0.784
