In [1]:
import torch
from torch.utils.data import Dataset, DataLoader
from torchvision import models
from torchvision import transforms

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.svm import LinearSVC
from sklearn.feature_selection import SelectPercentile, chi2
from sklearn.pipeline import Pipeline
from sklearn.metrics import matthews_corrcoef, confusion_matrix, accuracy_score, f1_score, precision_score, recall_score

import os
import numpy as np
import re
import cv2
from glob import glob
import random
from PIL import Image
import time
import copy

In [2]:
Path = './C2_TrainDev_Toy'
SAVE_TSV = False
defect_P_C_map = {
    0: (50, 1.0), # 50 1
    1: (10, 1),  # 10 0.1
    2: (20, 0.1), # 20 0.1
    3: (100, 1),  # 10 0.1
    4: (20, 1.0)  # 20 1
}
# device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
device = "cpu"

In [3]:
def load_mango_csv(csv_path):
    path = []
    box = []
    label = []
    subdir = csv_path.split('/')[-1].split('.')[0].capitalize()
    with open(csv_path, 'r', encoding='utf8') as f:
        for line in f:
            clean_line = re.sub(',+\n', '', line).replace('\n', '').replace('\ufeff', '').split(',')
            curr_img_path = f'{Path}/{subdir}/{clean_line[0]}'
            curr_info = np.array(clean_line[1:]).reshape(-1, 5)
            curr_box = curr_info[:, :-1].astype('float16').tolist()
            curr_label = curr_info[:, -1].tolist()
            path.append(curr_img_path)
            box.append(curr_box)
            label.append(curr_label)

    return path, box, label

In [4]:
def load_data():
    if os.path.isfile(f'{Path}/X_train.npy') and os.path.isfile(f'{Path}/y_train.npy') and os.path.isfile(f'{Path}/X_dev.npy') and os.path.isfile(f'{Path}/y_dev.npy'):
        X_train_total = np.load(f'{Path}/X_train.npy')
        X_dev_total = np.load(f'{Path}/X_dev.npy')
        label_train_total = np.load(f'{Path}/y_train.npy')
        label_dev_total = np.load(f'{Path}/y_dev.npy')
    else:
        X_train_total, label_train_total = load_image(dataset='train')
        X_dev_total, label_dev_total = load_image(dataset='dev')
        np.save(f'{Path}/X_train', X_train_total)
        np.save(f'{Path}/y_train', label_train_total)
        np.save(f'{Path}/X_dev', X_dev_total)
        np.save(f'{Path}/y_dev', label_dev_total)

    return X_train_total, X_dev_total, label_train_total, label_dev_total

In [5]:
def load_dev_image_TSV():
    X = []
    img_name = []
    csv_path = f'{Path}/dev.csv'
    with open(csv_path, 'r', encoding='utf8') as f:
        for line in f:
            clean_line = re.sub(',+\n', '', line).replace('\n', '').replace('\ufeff', '').split(',')
            curr_img_path = f'{Path}/Dev/{clean_line[0]}'
            try:
                img = cv2.cvtColor(cv2.imread(curr_img_path), cv2.COLOR_BGR2RGB)
                X.append(img)
                img_name.append(clean_line[0])
            except:
                continue

    _X = extract_features(np.array(X))
    img_name = np.array(img_name)

    return _X, img_name

In [6]:
def load_image(dataset):
    defect_map = {
        '不良-乳汁吸附': 0,
        '不良-機械傷害': 1,
        '不良-炭疽病': 2,
        '不良-著色不佳': 3,
        '不良-黑斑病': 4
    }

    path, box, label = load_mango_csv(csv_path=f'{Path}/{dataset}.csv')
    X = []
    y_label = []
    for i in range(len(path)):
        if i % 100 == 0:
            print(i)
        try:
            img = cv2.cvtColor(cv2.imread(path[i]), cv2.COLOR_BGR2RGB)
            # _X = data_preprocess(dataset, img)
            _X = data_preprocess(dataset, img).detach().numpy()
            _X = _X.squeeze()
            X.append(_X)
            defect = [0,0,0,0,0]
            for j in range(len(label[i])):
                defect_idx = defect_map[label[i][j]]
                defect[defect_idx] = 1
            y_label.append(defect)
        except:
            print("except")
            continue
    
    return np.array(X), np.array(y_label)

    # X = extract_features(X)
    # return X, np.array(y_label)

In [7]:
def extract_features(X):
    model_1 = models.vgg16(pretrained=True)
    model_1.eval()
    model_2 = models.alexnet(pretrained=True)
    model_2.eval()

    features = []
    with torch.no_grad():
        for i in range(len(X)):
            feature_1 = model_1(X[i])
            feature_2 = model_2(X[i])
            one_features = torch.hstack((feature_1[0], feature_2[0]))
            one_features = one_features.detach().numpy()
            features.append(one_features)

    return np.array(features)

In [8]:
class MyDataset(Dataset):
    def __init__(self, x, y):
        self.x = x
        self.y = y

    def __len__(self):
        return len(self.x)

    def __getitem__(self, idx):
        return self.x[idx], self.y[idx]

    
def fine_tune(X_train, y_train, X_val, y_val):
    # data
#     X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)
    dataloaders_dict = {
        "train": DataLoader(MyDataset(X_train, y_train), batch_size=16, shuffle=True, num_workers=2),
        "val": DataLoader(MyDataset(X_val, y_val), batch_size=16, shuffle=True, num_workers=2)
    }

    # model
    vgg16 = models.vgg16(pretrained=True)
    for param in vgg16.parameters():
        param.requires_grad = False
#     for param in list(vgg16.parameters())[:-18]:
#         param.requires_grad = False

    vgg16.classifier[6] = torch.nn.Linear(4096, 2)
    # print(vgg16)

    params_to_update = []
    for name,param in vgg16.named_parameters():
        if param.requires_grad == True:
            params_to_update.append(param)
            print("\t",name)

    optimizer_ft = torch.optim.Adam(params_to_update, lr=0.00005)
    criterion = torch.nn.CrossEntropyLoss()

    vgg16.to(device)
   
    train_model(vgg16, dataloaders_dict, criterion, optimizer_ft, 10)

In [9]:
def train_model(model, dataloaders, criterion, optimizer, num_epochs):
    
    for epoch in range(num_epochs):
        print('Epoch {}/{}'.format(epoch, num_epochs - 1))
        print('-' * 10)
        
        # Each epoch has a training and validation phase
        for phase in ['train', 'val']:
            if phase == 'train':
                model.train()  # Set model to training mode
            else:
                model.eval()   # Set model to evaluate mode

            running_loss = 0.0
            running_corrects = 0
            
            TP = 0
            TN = 0
            FN = 0
            FP = 0

            # Iterate over data.
            for inputs, labels in dataloaders[phase]:
                
                inputs = inputs.to(device)
                labels = labels.to(device)

                # zero the parameter gradients
                optimizer.zero_grad()

                # forward
                # track history if only in train
                with torch.set_grad_enabled(phase == 'train'):
                    outputs = model(inputs)
                    loss = criterion(outputs, labels)

                    _, preds = torch.max(outputs, 1)

                    # backward + optimize only if in training phase
                    if phase == 'train':
                        loss.backward()
                        optimizer.step()

                # statistics
                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)
                TP += torch.sum((preds == 1) & (labels.data == 1))
                TN += torch.sum((preds == 0) & (labels.data == 0))
                FN += torch.sum((preds == 0) & (labels.data == 1))
                FP += torch.sum((preds == 1) & (labels.data == 0))

            epoch_loss = running_loss / len(dataloaders[phase].dataset)
            epoch_acc = running_corrects.double() / len(dataloaders[phase].dataset)
            if TP == 0:
                F1 = 0
            else:
                p = TP.double() / (TP.double() + FP.double())
                r = TP.double() / (TP.double() + FN.double())
                F1 = 2 * r * p / (r + p)

            print('{} Loss: {:.4f} Acc: {:.4f} F1 sorce: {:.4f}'.format(phase, epoch_loss, epoch_acc, F1))


In [10]:
def data_preprocess(dataset, X):
    
    image_X = Image.fromarray(X)
    preprocess = { 
        "train": transforms.Compose([
            transforms.Resize((224,224)),
            transforms.RandomHorizontalFlip(p=0.5),
            transforms.RandomRotation(degrees=15),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
        ]),
        "dev": transforms.Compose([
            transforms.Resize((224,224)),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
        ]),
    }
    X = preprocess[dataset](image_X)
    X = X.unsqueeze(0)

    return X

In [11]:
def get_defect_balance_data(dataset, defect, X_total, label_total):
    defect_idx = [ i for i in range(len(label_total)) if label_total[i][defect] == 1]
    defect_length = len(defect_idx)
    non_defect_idx = np.delete(np.arange(len(label_total)), defect_idx)
    non_defect_length = len(non_defect_idx)
    if dataset == 'train':
        if defect_length < non_defect_length:
            non_defect_idx = np.random.choice(non_defect_idx, defect_length, replace=False)
            y = np.hstack((np.ones(shape=(defect_length, )), np.zeros(shape=(defect_length, ))))
        else:
            defect_idx = np.random.choice(defect_idx, non_defect_length, replace=False)
            defect_idx = list(defect_idx)
            y = np.hstack((np.ones(shape=(non_defect_length, )), np.zeros(shape=(non_defect_length, ))))
    else:
        y = np.hstack((np.ones(shape=(defect_length, )), np.zeros(shape=(non_defect_length, ))))

    defect_idx.extend(list(non_defect_idx)) # all index
    X = X_total[defect_idx]
    y = y.astype('int64')

    return X, y

In [12]:
def VGG16_ANOVA_SVM(SAVE_TSV, X_train, y_train, X_dev, y_dev, anova_percentile, complexity):

    print('train linear svm model...')
    svm = LinearSVC(random_state=42, C=complexity, class_weight='balanced')
    clf = Pipeline([('scaler', MinMaxScaler()),
                    ('anova', SelectPercentile(chi2)),
                    # ('scaler', StandardScaler()),
                    ('svc', svm)])

    clf.set_params(anova__percentile=anova_percentile)
    clf.fit(X_train, y_train)
    pred_y_dev = clf.predict(X_dev)

    if SAVE_TSV:
        return pred_y_dev

    print('evaluating dev data...')
    cm = confusion_matrix(y_dev, pred_y_dev)
    acc = accuracy_score(y_dev, pred_y_dev)
    f1 = f1_score(y_dev, pred_y_dev)
    p = precision_score(y_dev, pred_y_dev)
    r = recall_score(y_dev, pred_y_dev)

    print(cm, acc, f1, p, r)
    print('---------------------------------------------------------')

    return p, r


In [13]:
# if __name__ == '__main__':

X_total_train, y_total_train = load_image(dataset='train')
X_total_dev, y_total_dev = load_image(dataset='dev')


0
100
200
300
400
0


In [14]:

for defect in range(len(defect_P_C_map)):
    X_train, y_train = get_defect_balance_data('train', defect, X_total_train, y_total_train)
    X_dev, y_dev = get_defect_balance_data('dev', defect, X_total_dev, y_total_dev)

    fine_tune(X_train, y_train, X_dev, y_dev)


	 classifier.6.weight
	 classifier.6.bias
Epoch 0/9
----------
train Loss: 0.7625 Acc: 0.4500 F1 sorce: 0.4762
val Loss: 0.5820 Acc: 0.7300 F1 sorce: 0.0000
Epoch 1/9
----------
train Loss: 0.7616 Acc: 0.4950 F1 sorce: 0.4925
val Loss: 0.5897 Acc: 0.7600 F1 sorce: 0.2500
Epoch 2/9
----------
train Loss: 0.7022 Acc: 0.5600 F1 sorce: 0.6071
val Loss: 0.6028 Acc: 0.7400 F1 sorce: 0.2778
Epoch 3/9
----------
train Loss: 0.6987 Acc: 0.5000 F1 sorce: 0.5283
val Loss: 0.5870 Acc: 0.7600 F1 sorce: 0.2000
Epoch 4/9
----------
train Loss: 0.6909 Acc: 0.5800 F1 sorce: 0.5758
val Loss: 0.5858 Acc: 0.7400 F1 sorce: 0.1333
Epoch 5/9
----------
train Loss: 0.6875 Acc: 0.5700 F1 sorce: 0.5743
val Loss: 0.5879 Acc: 0.7200 F1 sorce: 0.1250
Epoch 6/9
----------
train Loss: 0.7018 Acc: 0.5350 F1 sorce: 0.5507
val Loss: 0.5939 Acc: 0.7300 F1 sorce: 0.1818
Epoch 7/9
----------
train Loss: 0.6658 Acc: 0.6050 F1 sorce: 0.6146
val Loss: 0.5918 Acc: 0.7200 F1 sorce: 0.1250
Epoch 8/9
----------
train Loss: 0.697

In [None]:
'''
    Load all images after feature extraction(X) and all defects(label)
    Both:
        X_train_total: (len(train), 2000) -> 2000 features
        label_train_total: (len(train), 5) -> 5 defects ex:[0,0,0,0,0]
        X_dev_total: (len(dev), 2000)
    TSV:
        img_name: (len(dev), 1) -> 1 image's name ex:01389.jpg
    without TSV:
        label_dev_total: (len(dev), 5)
'''
if SAVE_TSV:
    X_train_total, label_train_total = load_image(dataset='train')
    X_dev_total, img_name = load_dev_image_TSV()
    img_name = np.expand_dims(img_name, axis=1)
else:
    X_train_total, X_dev_total, label_train_total, label_dev_total = load_data()
    
print(X_train_total.shape)
print(X_dev_total.shape)

In [None]:
'''
    Get each defect data, balance train data and predict
    TSV:
        get predicts change to `True` or `False`
    without TSV:
        calculate precision and recall
'''
precision = 0
recall = 0
for defect in range(len(defect_P_C_map)):
    X_train, y_train = get_defect_balance_data('train', defect, X_train_total, label_train_total)

    if SAVE_TSV:
        preds = VGG16_ANOVA_SVM(SAVE_TSV, X_train, y_train, X_dev_total, img_name, anova_percentile=defect_P_C_map[defect][0], complexity=defect_P_C_map[defect][1])
        preds = list(preds)
        _preds = []
        for i in range(len(preds)):
            if preds[i] == 1.0:
                _preds.append("True")
            else:
                _preds.append("False")
        _preds = np.expand_dims(np.array(_preds), axis=1)
        img_name = np.hstack((img_name, _preds))
    else:
        X_dev, y_dev = get_defect_balance_data('dev', defect, X_dev_total, label_dev_total)
        p, r = VGG16_ANOVA_SVM(SAVE_TSV, X_train, y_train, X_dev, y_dev, anova_percentile=defect_P_C_map[defect][0], complexity=defect_P_C_map[defect][1])
        precision += p
        recall += r


'''
    TSV:
        save tsv file
    without TSV:
        calculate f1 score
'''
if SAVE_TSV:
    results = img_name
    np.savetxt("E24066022_predict.tsv", results, delimiter="\t", fmt='%s')
else:
    precision_ma = precision / 5
    recall_ma = recall / 5
    F1_ma = 2 * precision_ma * recall_ma / (precision_ma + recall_ma)
    print(F1_ma)