### 재활용품 분류

<br>

[재활용품 분류 및 선별 데이터](https://www.aihub.or.kr/aihubdata/data/view.do?currMenu=115&topMenu=100&aihubDataSe=realm&dataSetSn=71362)
<br>[재활용품 분류](https://aifactory.space/task/2637/overview)

<br>

In [1]:
from torchvision import transforms
import torch
import sys
import tqdm
import numpy as np
import random
import os
from PIL import Image
import pandas as pd
import torchvision.models as models
import torch.nn as nn
import glob
from PIL import ImageFile
ImageFile.LOAD_TRUNCATED_IMAGES = True

In [2]:
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = True

seed_everything(123) # Seed 고정

In [3]:
if torch.cuda.is_available():
    DEVICE = torch.device('cuda')
else:
    DEVICE = torch.device('cpu')
print('Using PyTorch version:', torch.__version__, ' Device:', DEVICE)

Using PyTorch version: 2.1.0+cu121  Device: cuda


In [32]:
x_train_path = 'data/train.zip'
y_train_path = 'data/train_y.csv'

In [33]:
directory_to_extract_to = x_train_path[:-4]

In [6]:
"""
if not os.path.exists(directory_to_extract_to):
  !unzip {x_train_path} -d {directory_to_extract_to}
"""

!unzip {'data/train.zip'} -d {'data/train'}

Archive:  data/train.zip
replace data/train/100528@5_01002_220809_P1_T1.jpg? [y]es, [n]o, [A]ll, [N]one, [r]ename: ^C


In [34]:
IMAGE_SIZE = 224

data_transform = transforms.Compose([transforms.Resize((IMAGE_SIZE, IMAGE_SIZE)),
                                             transforms.ToTensor()])

# Define and map the class label
# It would be better to sort the class label names alphabetically
class_labels = ["battery","can","glass","light","paper"]
class_labels_map = {}
for indx, label in enumerate(class_labels):
  class_labels_map[str(indx)] = label

In [35]:
BATCH_SIZE = 128

class CustomDataSet(torch.utils.data.Dataset):
    def __init__(self, csv_file, class_list, transform=None):
        self.df = pd.read_csv(csv_file)
        self.transform = transform
        self.class_list = class_list

    def __len__(self):
        return self.df.shape[0]

    def __getitem__(self, index):
        image = Image.open(directory_to_extract_to+'/'+self.df.path[index])
        label = self.class_list.index(self.df.label[index])

        if self.transform:
            image = self.transform(image)
        return image, label

train_data_object = CustomDataSet(y_train_path, class_labels, data_transform)

# Now lets use Data loader to load the data in batches
train_loader = torch.utils.data.DataLoader(
        train_data_object,
        batch_size=BATCH_SIZE,
        shuffle=True
    )

In [36]:
model = models.resnet34(pretrained = True)
num_ftrs = model.fc.in_features
model.fc = nn.Linear(num_ftrs, 5)
model = model.to(DEVICE)

optimizer = torch.optim.Adam(model.parameters(), lr = 0.001)
criterion = nn.CrossEntropyLoss()



In [None]:
def train(model, train_loader, optimizer, epoch, log_interval):
    model.train()
    correct = 0
    total = 0
    for batch_idx, (image, label) in enumerate(tqdm.tqdm(train_loader)):
        image = image.to(DEVICE)
        label = label.to(DEVICE)
        optimizer.zero_grad()
        output = model(image)
        _, predicted = torch.max(output, 1)
        correct += (predicted == label).sum()
        total += len(label)
        loss = criterion(output, label)
        loss.backward()
        optimizer.step()
        """
        if batch_idx % log_interval == 0:
            print("Train Epoch: {} [{}/{} ({:.0f}%)]\tTrain Loss: {:.6f}".format(
            epoch, batch_idx * len(image),
            len(train_loader.dataset), 100. * batch_idx / len(train_loader),
            loss.item()))
        """
    accuracy = round((correct.float() / total).item(), 4)

    print("Train Epoch: {}\tTrain Loss: {:.6f}\tTrain Accuracy: {:.6f}".format(
        epoch, loss.item(), accuracy))

    torch.save(model, 'model_' + str(epoch) + '.pth')

EPOCHS = 3
for epoch in range(1, EPOCHS + 1):
    train(model, train_loader, optimizer, epoch, log_interval = 10)

In [38]:
x_test_path = 'data/test.zip'
y_pred_save_path = 'y_pred.csv'

directory_to_extract_to_test = x_test_path[:-4]

if not os.path.exists(directory_to_extract_to_test):
  !unzip {x_test_path} -d {directory_to_extract_to_test}


data_transform = transforms.Compose([transforms.Resize((IMAGE_SIZE, IMAGE_SIZE)),
                                             transforms.ToTensor()])

# Define and map the class label
# It would be better to sort the class label names alphabetically
class_labels = ["battery","can","glass","light","paper"]
class_labels_map = {}
for indx, label in enumerate(class_labels):
  class_labels_map[str(indx)] = label

In [39]:
class CustomDataSet(torch.utils.data.Dataset):
    def __init__(self, class_list, transform=None):
        self.df = glob.glob(directory_to_extract_to_test + "/*.jpg")
        self.transform = transform
        self.class_list = class_list

    def __len__(self):
        return len(self.df)

    def __getitem__(self, index):
        #print(self.df[index])
        name = self.df[index]
        image = Image.open(name)
        path = name[len(str(directory_to_extract_to_test + "/")):]

        if self.transform:
            image = self.transform(image)
        return image, path

test_data_object = CustomDataSet(class_labels, data_transform)

BATCH_SIZE = 128

# Now lets use Data loader to load the data in batches
test_loader = torch.utils.data.DataLoader(
        test_data_object,
        batch_size=BATCH_SIZE,
        shuffle=True
    )

In [40]:
model = models.resnet34(pretrained = True)
num_ftrs = model.fc.in_features
model.fc = nn.Linear(num_ftrs, 5)
model = model.to(DEVICE)

optimizer = torch.optim.Adam(model.parameters(), lr = 0.001)
criterion = nn.CrossEntropyLoss()

In [42]:
def evaluate(model, test_loader):
    model.eval()
    predictions = []
    df = pd.DataFrame({"path":[], "pred":[]})
    with torch.no_grad():
        for image, name in tqdm.tqdm(test_loader):
            image = image.to(DEVICE)
            name = name#.to(DEVICE)
            output = model(image)
            
            for i in range(len(image)):
                #prediction.append(name[i].cpu())
                #prediction.append(output[i].argmax().cpu().numpy())
                prediction = output[0].argmax().cpu().numpy()
                df2 = pd.DataFrame({"path":[name[i]], "pred":[prediction]})
                df = pd.concat([df, df2], ignore_index=True)

    #pd.DataFrame(prediction).to_csv(y_pred_save_path, header=False, index=False)
    df.loc[df['pred'] ==0, 'pred'] = 'battery'
    df.loc[df['pred'] ==1, 'pred'] = 'can'
    df.loc[df['pred'] ==2, 'pred'] = 'glass'
    df.loc[df['pred'] ==3, 'pred'] = 'light'
    df.loc[df['pred'] ==4, 'pred'] = 'paper'
    y_pred_save_path = 'y_pred.csv'
    df.to_csv(y_pred_save_path, index=False)
    y_pred_labels = np.array(predictions)
    y_pred_save_path = 'y_pred.npy'
    np.save(y_pred_save_path, y_pred_labels)

model_path = 'model_1.pth'
model = torch.load(model_path)
output = evaluate(model, test_loader)

  0%|          | 0/10 [00:00<?, ?it/s]

100%|██████████| 10/10 [00:21<00:00,  2.11s/it]
