In [1]:
## model modules for total detection model

"""
*module 1: part_symptom_classifying.h5

    * x_data: part image features: bboxes for each plant image -> extract features
    * y_data: part_status(Y: yes symptom, N: no symptom)
    * model pulpose: regardless which is in a certain part of plant, distinguish the status of input par

*module 2: part_disease_class_classifying.h5

    * x_data: same as module 1 but only use 'Y' labeled parts
    * y_data: disease_class data
    * module purpose: regardless which is in a certain part, distinguish the disease class of 'Y' labeled parts

* module 1, 2 integrated structure(after modules trained)(ex. blight)
    *input data: image[bbox1, bbox2,...] -> 이 리스트를 순회하면서 개별 바운딩 박스에 대해 모듈 추론 적용후 결과 병합)
        *module 1 prediction(input: image[])
            *prediction: image[N, Y, N, Y, ...]
            *classifying: 
                if all image[i] == 'N' --> {normal(output directly and start next loop)}
                else                   --> abnormal(go to module 2)

                *module 2 prediction(input: abnormal([N, Y, N, Y])
                    output_list = []
                    *classifying:
                        if abnormal[] == 'N' --> output list.append() 
                        else                 --> output_list.append(predicted_class)

    final_output:
        *(normal): imagename = {bbox: [[bbox1], [bbox2],...], status: [N, N, ...], class: normal}
        *(abnormal): imagename = {bbox: [[bbox1], [bbox2], ...], status: [N, Y, N, Y], class: blight}

"""

"\n*module 1: part_symptom_classifying.h5\n\n    * x_data: part image features: bboxes for each plant image -> extract features\n    * y_data: part_status(Y: yes symptom, N: no symptom)\n    * model pulpose: regardless which is in a certain part of plant, distinguish the status of input par\n\n*module 2: part_disease_class_classifying.h5\n\n    * x_data: same as module 1 but only use 'Y' labeled parts\n    * y_data: disease_class data\n    * module purpose: regardless which is in a certain part, distinguish the disease class of 'Y' labeled parts\n\n* module 1, 2 integrated structure(after modules trained)\n    *input data: image[bbox1, bbox2,...]\n    *module 1 prediction(input: image[])\n        *prediction: image[N, Y, N, Y, ...]\n        *classifying: \n            if all image[i] == 'N' --> {normal(prediction finished)}\n            else                   --> abnormal(go to module 2)\n\n            *module 2 prediction(input: abnormal([N, Y, N, Y])\n                output_list = []

In [1]:
# import lists

import pandas as pd
import numpy as np
import os
import glob
import torch
from torchvision import models, transforms
from PIL import Image
import cv2

In [4]:
# label data load and define X, Y data

test_data = pd.read_csv('./data_preprocessed/label/test_label.csv', encoding='utf-8')

dir_path = './data_preprocessed/label/'
tr_label_dict = {}
train_num = 20

csv_files = glob.glob(os.path.join(dir_path, 'train_label*.csv'))
csv_files.sort()

for file in csv_files:
     df = pd.read_csv(file, encoding='utf-8')
     tr_label_dict[f'train_{train_num}'] = df
     train_num += 10

     if train_num > 100:
          break

In [3]:
test_data = pd.read_csv('./data_preprocessed/label/test_label.csv', encoding='utf-8')
train_20 = pd.read_csv('./data_preprocessed/label/train_label_20.csv', encoding='utf-8')

In [5]:
# part image extraction and image preprocess for CNN model

class PartImageExtractor:
    def __init__(self):
        self.transform = transforms.Compose([
            transforms.Resize(299),
            transforms.CenterCrop(299),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
        ])
        self.model = models.inception_v3(pretrained=True)
        self.model.eval()

        if torch.cuda.is_available():
            self.model = self.model.cuda()

    def extract_features(self, image, bbox):
        x, y, w, h = bbox
        image = Image.open(image)
        image = image.crop((x, y, x+w, y+h))
        image = self.transform(image)
        image = image.unsqueeze(0)

        if torch.cuda.is_available():
            image = image.cuda()

        with torch.no_grad():
            features = self.model(image)

        return features.cpu().numpy()


In [6]:
extractor = PartImageExtractor()
features_list = []

for index, row in train_20.iterrows():
    folder_path = row['folder_path']
    file_name = row['file_name']
    bbox = (row['bbox_x'], row['bbox_y'], row['bbox_width'], row['bbox_height'])

    image_path = os.path.join(folder_path, file_name)
    features = extractor.extract_features(image_path, bbox)
    features_list.append(features)

features_array = np.array(features_list)
print(features_array.shape)



ZeroDivisionError: division by zero

In [13]:
# 직전에 해결한 문제: bbox y or height 값이 음수로 표기된 데이터 문제
    # 해당 컬럼 데이터에서 음수가 발견되는지 확인 -> 라벨 전처리 과정에서 bbox_y, bbox_height 값이 음수인 행 제외 처리함

# division zero 원인 검토 1: bbox width or height가 0 또는 음수값이 있는지 확인
    # 확인결과: 없음

zero_or_under = train_20[(train_20['bbox_width'] <= 0) | (train_20['bbox_height'] <= 0)]
zero_or_under

Unnamed: 0,file_id,folder_path,file_name,image_width,image_height,class,cause_method,ei_value,pl_value,el_value,pi_value,bbox,bbox_x,bbox_y,bbox_width,bbox_height,category_id,disease_status,name,object_status


In [16]:
# division zero 원인 검토 2: "x + width" or "y + height"가 실제 이미지의 너비, 높이를 초과하는지 확인
    # 확인결과: 전체 데이터 중 32개 행에서 초과 문제 발생 -> 이 행들을 삭제하는 내용을 추가하여 라벨 전처리를 다시 수행해야 함.
    # 추가 확인: "x + width" or "y + height가 실제 이미지의 너비, 높이와 동일한 경우도 식별됨(약 860개 행)
        # 대상 이미지에 해당 바운딩 박스를 그려보았을 때 단일 부위가 아닌 여러 부위들이 포함됨
        # 결론: 해당 데이터들도 이상치로 간주하고 제외해야 함.

size_check = train_20[['image_width', 'image_height', 'bbox_x', 'bbox_y', 'bbox_width', 'bbox_height']].copy()

size_check['x_width'] = size_check['bbox_x'] + size_check['bbox_width']
size_check['y_height'] = size_check['bbox_y'] + size_check['bbox_height']

In [18]:
over_check = size_check[(size_check['image_width'] < size_check['x_width']) | (size_check['image_height'] < size_check['y_height'])]

In [20]:
print(len(over_check))

32


In [21]:
train_100 = pd.read_csv('./data_preprocessed/label/train_label_20.csv', encoding='utf-8')

In [22]:
image_width = train_100['image_width']
image_height = train_100['image_height']
x = train_100['bbox_x']
y = train_100['bbox_y']
width = train_100['bbox_width']
height = train_100['bbox_height']

total_check = train_100[(image_width < (x+width)) | (image_height < (y+height))]

In [23]:
print(len(total_check))

32


In [32]:
train_100_test = train_100.copy()

width_same = train_100_test[image_width == (x+width)]
height_same = train_100_test[image_height == (y+height)]
width_height_same = train_100_test[(image_width == (x+width)) & (image_height == (y+height))]

print(len(width_same))
print(len(height_same))
print(len(width_height_same))
print(len(train_100_test) - len(width_height_same))
print(len(train_100_test))
print(width_same.head(1))

783
81
0
242560
242560
             file_id                           folder_path  \
506  tr_normal_47649  ./data_raw/train_image/train_normal/   

                          file_name  image_width  image_height   class  \
506  normal_55_008_221130091804.jpg          914          1585  normal   

    cause_method  ei_value  pl_value  el_value  pi_value  \
506       normal      0.07       6.2       3.2      11.0   

                                bbox  bbox_x  bbox_y  bbox_width  bbox_height  \
506  [606.36, 577.43, 307.64, 177.8]  606.36  577.43      307.64        177.8   

     category_id disease_status  name object_status  
506            3              N  leaf        normal  


In [None]:
# model build

In [None]:
# model train

In [None]:
# model validation and save