# Libaray

In [28]:
import os ,time

import glob
import random
from PIL import Image, ImageFile
import ttach as tta

import torch
import torch.nn as nn
from torchvision import transforms
import torchvision.transforms.functional as TF
from torch.utils.data import Dataset, DataLoader
from torch.cuda.amp import autocast, grad_scaler
from torch.optim.lr_scheduler import _LRScheduler
import torch_optimizer as optim
from efficientnet_pytorch import EfficientNet

import numpy as np
import pandas as pd, cv2
from tqdm import tqdm
from easydict import EasyDict
from sklearn.preprocessing import LabelEncoder #Label Encoder로 숫자로 변경함
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import f1_score
from ptflops import get_model_complexity_info

from collections import Counter
import argparse
import neptune.new as neptune
import timm

#안하면 오류남
import warnings
from pandas.core.common import SettingWithCopyWarning
warnings.simplefilter(action="ignore", category=SettingWithCopyWarning)
warnings.filterwarnings(action='ignore')

# device setting

In [29]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"device setting : {device}")

device setting : cuda


## Args Setting

In [30]:
args = EasyDict({'encoder_name':'efficientnet_b2',
                 'drop_path_rate':0.2,
                 'num_classes':88,
                 'use_b7' : False,
                 'bad' : False
                })

In [31]:
train_df = pd.read_csv('../Data/train.csv')
test_df = pd.read_csv('../Data/test.csv')
bad_df = pd.read_csv('../Data/train_df_bad.csv')

In [32]:
display(bad_df)

Unnamed: 0,index,file_name,class,state,label
0,0,10000.png,transistor,good,transistor-good
1,1,10001.png,capsule,good,capsule-good
2,2,10002.png,transistor,good,transistor-good
3,3,10003.png,wood,good,wood-good
4,4,10004.png,bottle,good,bottle-good
...,...,...,...,...,...
2995,2995,12995.png,screw,good,screw-good
2996,2996,12996.png,wood,good,wood-good
2997,2997,12997.png,zipper,good,zipper-good
2998,2998,12998.png,tile,oil,tile-bad


In [33]:
display(train_df)

Unnamed: 0,index,file_name,class,state,label
0,0,10000.png,transistor,good,transistor-good
1,1,10001.png,capsule,good,capsule-good
2,2,10002.png,transistor,good,transistor-good
3,3,10003.png,wood,good,wood-good
4,4,10004.png,bottle,good,bottle-good
...,...,...,...,...,...
2995,2995,12995.png,screw,good,screw-good
2996,2996,12996.png,wood,good,wood-good
2997,2997,12997.png,zipper,good,zipper-good
2998,2998,12998.png,tile,oil,tile-oil


In [34]:
display(test_df)

Unnamed: 0,index,file_name,class,state,label
0,0,13000.png,,,
1,1,13001.png,,,
2,2,13002.png,,,
3,3,13003.png,,,
4,4,13004.png,,,
...,...,...,...,...,...
995,995,13995.png,,,
996,996,13996.png,,,
997,997,13997.png,,,
998,998,13998.png,,,


In [35]:
le=LabelEncoder()
df_train_label = le.fit_transform(train_df['label'])
train_df['label'] = df_train_label
print(df_train_label)

le_bad = LabelEncoder()
df_train_bad_label = le_bad.fit_transform(bad_df['label'])
print(df_train_bad_label)

[72 15 72 ... 84 65 61]
[25  5 25 ... 29 20 20]


# train_augmentation_transform

In [36]:
def get_train_augmentation(img_size, ver):
    if ver==1:
        transform = transforms.Compose([
            transforms.ToTensor(), 
            transforms.Resize((img_size, img_size)),
            transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                 std=[0.229, 0.224, 0.225]),
                ])      
    return transform

In [37]:
transform = get_train_augmentation(img_size = 512, ver = 1)

# Network

In [38]:
class Network(nn.Module):
    def __init__(self, args):
        super().__init__()
        self.encoder = timm.create_model(args.encoder_name, pretrained=False, drop_path_rate = args.drop_path_rate,
                                            num_classes = args.num_classes)
        
    def forward(self, x):
        x = self.encoder(x)
        return x

# Test Data Loader

In [39]:
class Test_Dataset(Dataset):
    def __init__(self, df, transform=None):
        self.file_name = df['file_name'].values
        self.transform = transform

        print(f"테스트 데이터셋 생성 완료,,\n 데이터셋 사이즈 : {len(self.file_name)}")

    def __getitem__(self, index):        
        image = np.array(Image.open(f'../Data/test/{self.file_name[index]}').convert('RGB'))

        if self.transform is not None:
            image = self.transform(Image.fromarray(image))

        return image

    def __len__(self):
        return len(self.file_name)

In [40]:
test_dataset = Test_Dataset(test_df, transform)
test_load = DataLoader(test_dataset, batch_size=16, shuffle=False, num_workers=4)

테스트 데이터셋 생성 완료,,
 데이터셋 사이즈 : 1000


In [41]:
def predict(args, test_load, model_path):
    _transforms = tta.Compose([
        tta.Rotate90(angles=[60, 150, 240, 330]),
    ])
    model = Network(args).to(device)
    model.load_state_dict(torch.load(model_path)['state_dict'])
    model = tta.ClassificationTTAWrapper(model, _transforms).to(device)
    model.eval()

    output = []
    pred = []
    with torch.no_grad():
        with autocast():
            for batch in tqdm(test_load):
                images = torch.tensor(batch, dtype = torch.float32, device = device).clone().detach()
                preds = model(images)
                pred.extend(preds)
                output.extend(torch.tensor(torch.argmax(preds, dim=1), dtype=torch.int32).cpu().numpy())
                
    if args.bad == False:
        labels = le.inverse_transform(output)
    else:
        labels = le_bad.inverse_transform(output)
    
    return labels, output, pred

In [42]:
#---------------------------------------------------------------------#
model_path = "./result/001/best_model.pth"
args = EasyDict({'encoder_name':'efficientnet_b7',
                 'drop_path_rate':0.2,
                 'num_classes':88,
                 'use_b7' : False,
                 'bad' : False
                })
predicts1,output1, preds1 = predict(args, test_load, model_path)
#---------------------------------------------------------------------#
model_path = "./result/002/best_model.pth"
args = EasyDict({'encoder_name':'efficientnet_b6',
                 'drop_path_rate':0.2,
                 'num_classes':88,
                 'use_b7' : False,
                 'bad' : False
                })
predicts2,output2,preds2 = predict(args, test_load, model_path)
#---------------------------------------------------------------------#
model_path = "./result/003/best_model.pth"
predicts3,output3, preds3 = predict(args, test_load, model_path)
#---------------------------------------------------------------------#
model_path = "./result/004/best_model.pth"
args = EasyDict({'encoder_name':'efficientnet_b7',
                 'drop_path_rate':0.2,
                 'num_classes':88,
                 'use_b7' : True,
                 'bad' : False
                })
predicts4,output4,preds4 = predict(args, test_load, model_path)

#---------------------------------------------------------------------#
model_path = "./result/009/best_model.pth"
args = EasyDict({'encoder_name':'efficientnet_b6',
                 'drop_path_rate':0.2,
                 'num_classes':30,
                 'bad' : True
                })

predicts5,output5,preds5 = predict(args, test_load, model_path)
#---------------------------------------------------------------------#

100%|███████████████████████████████████████████| 63/63 [00:38<00:00,  1.65it/s]
100%|███████████████████████████████████████████| 63/63 [00:29<00:00,  2.12it/s]
100%|███████████████████████████████████████████| 63/63 [00:30<00:00,  2.10it/s]
100%|███████████████████████████████████████████| 63/63 [00:39<00:00,  1.60it/s]
100%|███████████████████████████████████████████| 63/63 [00:29<00:00,  2.11it/s]


In [43]:
softmax = []
softmax_second = []

for i in range(len(preds1)):
    softvoting = (preds1[i] + preds2[i] + preds3[i] + preds4[i])/4
    softmax.append(torch.tensor(torch.argmax(softvoting).cpu().numpy()))
    softmax_second.append(torch.tensor(torch.topk(softvoting,2).indices[1]).cpu().numpy())
    
predict_softmax = le.inverse_transform(softmax)
predict_softmax_second = le.inverse_transform(softmax_second)

In [44]:
print(predict_softmax[:5])
print(predict_softmax_second[:5])

['pill-good' 'leather-good' 'hazelnut-good' 'zipper-good' 'screw-good']
['pill-crack' 'leather-cut' 'hazelnut-cut' 'zipper-split_teeth'
 'screw-manipulated_front']


In [45]:
fix_good = predicts2.copy()
for i in range(len(predicts2)):
    if "good" in predicts2[i] and "good" not in predicts5[i]:
        print(f"{predicts2[i], predicts5[i]}")
        fix_good[i] = predict_softmax_second[i]
        print(f"so transfrom : {fix_good[i]}")
        print("---------------------------")
        
        

('cable-good', 'cable-bad')
so transfrom : cable-poke_insulation
---------------------------
('leather-good', 'zipper-bad')
so transfrom : zipper-good
---------------------------
('metal_nut-good', 'metal_nut-bad')
so transfrom : metal_nut-flip
---------------------------
('cable-good', 'cable-bad')
so transfrom : cable-poke_insulation
---------------------------
('cable-good', 'cable-bad')
so transfrom : cable-poke_insulation
---------------------------
('metal_nut-good', 'metal_nut-bad')
so transfrom : metal_nut-bent
---------------------------
('metal_nut-good', 'metal_nut-bad')
so transfrom : metal_nut-flip
---------------------------
('leather-good', 'zipper-bad')
so transfrom : zipper-split_teeth
---------------------------
('metal_nut-good', 'metal_nut-bad')
so transfrom : metal_nut-bent
---------------------------
('cable-good', 'cable-bad')
so transfrom : cable-combined
---------------------------
('cable-good', 'cable-bad')
so transfrom : cable-poke_insulation
---------------

In [46]:
class_ = []
states_ = []

for i in fix_good:
    a,b = i.split("-")
    class_.append(a)
    states_.append(b)

test_df["label"] = fix_good
test_df["class"] = class_
test_df["state"] = states_

test_df.to_csv('/home/twogudack/Czion_anomaly/Data/submission/submission.csv', index=False)

In [47]:
from sklearn.metrics import accuracy_score, f1_score

a = pd.read_excel("/home/twogudack/Czion_anomaly/Data/123.xlsx")
bad = pd.read_csv("../Data/bad123.csv")


submit_labels = predicts1
answer_labels = a["label"]
print(f1_score(answer_labels,submit_labels,average='macro'))


submit_labels = predicts2
print(f1_score(answer_labels,submit_labels,average='macro'))

submit_labels = predicts3
print(f1_score(answer_labels,submit_labels,average='macro'))

submit_labels = predicts4
print(f1_score(answer_labels,submit_labels,average='macro'))

submit_labels = predict_softmax
print(f"softmax : {f1_score(answer_labels,submit_labels,average='macro')}")

submit_labels = fix_good
print(f"fix good bad : {f1_score(answer_labels,submit_labels,average='macro')}")


answer_labels = bad["label"]
submit_labels = predicts5

print(f1_score(answer_labels,submit_labels,average='macro'))

0.14628760243259611
0.20159623501412852
0.16981406843157873
0.1634554556022279
softmax : 0.181495139700781
fix good bad : 0.18962886431214981
0.40809920419172346


In [48]:
submit_labels = predicts2
answer_labels = a["label"]

In [49]:
train_df["class"].unique()

array(['transistor', 'capsule', 'wood', 'bottle', 'screw', 'cable',
       'carpet', 'hazelnut', 'pill', 'metal_nut', 'zipper', 'leather',
       'toothbrush', 'tile', 'grid'], dtype=object)

In [50]:
transistor = []
transistor_as = []

capsule = []
capsule_as = []

wood = []
wood_as = []

bottle = []
bottle_as = []

screw = []
screw_as = []

cable = []
cable_as = []

carpet = []
carpet_as = []

hazelnut = []
hazelnut_as = []

pill = []
pill_as = []

metal_nut = []
metal_nut_as = []

zipper = []
zipper_as = []

leather = []
leather_as = []


toothbrush = []
toothbrush_as = []

tile = []
tile_as = []

grid = []
grid_as = []

class_check = [transistor,capsule,wood,bottle,screw,cable,carpet,hazelnut,pill,metal_nut,zipper,leather,toothbrush,tile,grid]
class_check_as = [transistor_as,capsule_as,wood_as,bottle_as,screw_as,cable_as,carpet_as,hazelnut_as,pill_as,metal_nut_as,zipper_as,leather_as,toothbrush_as,tile_as,grid_as]

In [51]:
submit_labels = predicts2
answer_labels = a["label"]


for i in range(len(submit_labels)):
    for j in range(len(class_check)):
        if train_df["class"].unique()[j] in submit_labels[i] :
            class_check[j].append(submit_labels[i])
            class_check_as[j].append(answer_labels[i])
        

In [52]:
for i in range(len(class_check)):
    print(train_df["class"].unique()[i])
    print(f1_score(class_check[i],class_check_as[i],average='macro'))
    if i == 0:
        for j in range(len(class_check[i])):
            print(f"{class_check[i][j]} ->>> {class_check_as[i][j]}")

transistor
0.1488372093023256
transistor-good ->>> transistor-cut_lead
transistor-good ->>> transistor-good
transistor-good ->>> transistor-good
transistor-good ->>> transistor-good
transistor-good ->>> transistor-good
transistor-good ->>> transistor-cut_lead
transistor-misplaced ->>> transistor-good
transistor-misplaced ->>> transistor-good
transistor-good ->>> transistor-good
transistor-misplaced ->>> transistor-good
transistor-misplaced ->>> transistor-good
transistor-good ->>> transistor-good
transistor-good ->>> transistor-good
transistor-good ->>> transistor-good
transistor-misplaced ->>> transistor-good
transistor-good ->>> transistor-good
transistor-good ->>> transistor-good
transistor-misplaced ->>> transistor-good
transistor-good ->>> transistor-good
transistor-good ->>> transistor-good
transistor-good ->>> transistor-good
transistor-good ->>> transistor-good
transistor-good ->>> transistor-good
transistor-misplaced ->>> transistor-good
transistor-misplaced ->>> transistor-go