In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import cv2

from tqdm import tqdm
from glob import glob
import os
import json 
import timm

import torch
from torch import nn
from torchvision import models
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from torchsummary import summary

from sklearn.metrics import f1_score
from sklearn.model_selection import train_test_split, StratifiedKFold
import albumentations as A

# Label

In [3]:
# 변수 설명 csv 파일 참조
crop = {'1':'딸기','2':'토마토','3':'파프리카','4':'오이','5':'고추','6':'시설포도'}
disease = {'1':{'a1':'딸기잿빛곰팡이병','a2':'딸기흰가루병','b1':'냉해피해','b6':'다량원소결핍 (N)','b7':'다량원소결핍 (P)','b8':'다량원소결핍 (K)'},
           '2':{'a5':'토마토흰가루병','a6':'토마토잿빛곰팡이병','b2':'열과','b3':'칼슘결핍','b6':'다량원소결핍 (N)','b7':'다량원소결핍 (P)','b8':'다량원소결핍 (K)'},
           '3':{'a9':'파프리카흰가루병','a10':'파프리카잘록병','b3':'칼슘결핍','b6':'다량원소결핍 (N)','b7':'다량원소결핍 (P)','b8':'다량원소결핍 (K)'},
           '4':{'a3':'오이노균병','a4':'오이흰가루병','b1':'냉해피해','b6':'다량원소결핍 (N)','b7':'다량원소결핍 (P)','b8':'다량원소결핍 (K)'},
           '5':{'a7':'고추탄저병','a8':'고추흰가루병','b3':'칼슘결핍','b6':'다량원소결핍 (N)','b7':'다량원소결핍 (P)','b8':'다량원소결핍 (K)'},
           '6':{'a11':'시설포도탄저병','a12':'시설포도노균병','b4':'일소피해','b5':'축과병'}}
risk = {'1':'초기','2':'중기','3':'말기'}

csv_features = ['내부 온도 1 평균', '내부 온도 1 최고', '내부 온도 1 최저', '내부 습도 1 평균', '내부 습도 1 최고', 
                '내부 습도 1 최저', '내부 이슬점 평균', '내부 이슬점 최고', '내부 이슬점 최저']

def label_preprocessing(path) :
    labels = pd.read_csv(path)

    label_encoder = {}
    label_cnt = 0
    previous_label = '0_00_0'
    for i, label in enumerate(tqdm(sorted(labels['label']))) :
        crop_val = label.split('_')[0] # crop
        disease_val = label.split('_')[1] # disease
        risk_val = label.split('_')[2] # risk

        tmp_label = f'{crop_val}_{disease_val}_{risk_val}'
        if previous_label != tmp_label :
            train_label_encoder[tmp_label] = label_cnt
            previous_label = tmp_label
            label_cnt += 1

    train_label_decoder = {val : key for key, val in train_label_encoder.items()}

In [None]:
def label_preprocessing(path) :
    labels = pd.read_csv(path)

    label_encoder = {}
    for i, label in enumerate(tqdm(sorted(labels['label']))) :
        
        if label in label_encoder
        
        tmp_label = f'{crop_val}_{disease_val}_{risk_val}'
        if previous_label != tmp_label :
            train_label_encoder[tmp_label] = label_cnt
            previous_label = tmp_label
            label_cnt += 1

    train_label_decoder = {val : key for key, val in train_label_encoder.items()}

# CSV feature - min, max value 

In [4]:
def csv_feature_dict(path, csv_features) :
    
    csv_files = sorted(glob(os.path.joing(path, '*/*.csv')))

    temp_csv = pd.read_csv(csv_files[0])[csv_features]
    max_arr, min_arr = temp_csv.max().to_numpy(), temp_csv.min().to_numpy()

    # feature 별 최대값, 최솟값 계산
    for csv in tqdm(csv_files[1:]):
        temp_csv = pd.read_csv(csv)[csv_features]
        temp_csv = temp_csv.replace('-',np.nan).dropna()
        if len(temp_csv) == 0:
            continue
        temp_csv = temp_csv.astype(float)
        temp_max, temp_min = temp_csv.max().to_numpy(), temp_csv.min().to_numpy()
        max_arr = np.max([max_arr,temp_max], axis=0)
        min_arr = np.min([min_arr,temp_min], axis=0)

    # feature 별 최대값, 최솟값 dictionary return
    return {csv_features[i]:[min_arr[i], max_arr[i]] for i in range(len(csv_features))}

# Custom Dataset 

In [None]:
class CustomDataset(Dataset):
    def __init__(self, files, transforms, mode='train'):
        self.mode = mode
        self.files = files
        self.label_encoder = train_label_encoder #label_encoder
        self.transforms = transforms
        
    def __len__(self):
        return len(self.files)
    
    def __getitem__(self, i):
        file = self.files[i]
        file_name = file.split('\\')[-1]
        
        
        # image
        image_path = f'{file}/{file_name}.jpg'
        img = cv2.imread(image_path)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
#         img = cv2.resize(img, dsize=(224, 224), interpolation=cv2.INTER_AREA)
#         img = img.astype(np.float32)/255
        img = self.transforms(image=img)["image"]
        img = img.transpose(2,0,1)
        
        if self.mode == 'train':
            json_path = f'{file}/{file_name}.json'
            with open(json_path, 'r') as f:
                json_file = json.load(f)
            
            crop = json_file['annotations']['crop']
            disease = json_file['annotations']['disease']
            risk = json_file['annotations']['risk']
            label = f'{crop}_{disease}_{risk}'
            
            return {
#                 'img' : torch.tensor(img, dtype=torch.float32),
                'img' : torch.tensor(img, dtype=torch.float32) / 255.0,
                'label' : torch.tensor(self.label_encoder[label], dtype=torch.long)
            }
        else:
            return {
                'img' : torch.tensor(img, dtype=torch.float32)/ 255.0
#                 'img' : torch.tensor(img, dtype=torch.float32)
            }