In [1]:
cd /workspace/mnt2/data/tomato_except3/crop/0/

/workspace/mnt2/data/tomato_except3/crop/0


  self.shell.db['dhist'] = compress_dhist(dhist)[-100:]


In [2]:
import os
import gc
import random
import numpy as np
import pandas as pd
import torch
import torch.nn.functional as F
from time import time
from tqdm.auto import tqdm
from PIL import Image
from sklearn.metrics import recall_score, f1_score, roc_auc_score, roc_curve
from sklearn.model_selection import ParameterGrid
from torchvision import transforms
from torch.utils.data import DataLoader, Dataset
from torch import optim, nn
from efficientnet_pytorch import EfficientNet

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
# 데이터 전처리 설정
transform_with_augmentation = transforms.Compose([
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(15),  # 예: 15도 내에서 무작위로 회전
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1),
    transforms.RandomPerspective(distortion_scale=0.5, p=0.5),  # 무작위 투시 변환 추가
    transforms.GaussianBlur(3, sigma=(0.1, 2.0)),  # 가우시안 블러 추가
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

# 데이터 전처리 설정
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

In [4]:
# 하이퍼파라미터 그리드 설정
param_grid = {
    'batch_size': [32, 64, 128],
    'learning_rate': [1e-3, 1e-4, 1e-5],
    'optimizer': ['Adam', 'AdamW'],
    'alpha': [0.25],  # FocalLoss alpha
    'gamma': [2.0],     # FocalLoss gamma
}
#모델 저장 위치
save_directory = os.getcwd()
print(save_directory)

/workspace/mnt2/data/tomato_except3/crop/0


In [5]:
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = True
    
seed_everything(4885) # Seed 고정

In [6]:
# 사용자 정의 데이터셋 클래스
class CustomImageDataset(Dataset):
    def __init__(self, root_dir, transform=None):
        self.root_dir = root_dir
        self.transform = transform
        self.classes = ['healthy', 'PowderyMildew', 'GrayMold', 'YellowLeafCurlVirus', 'LeafMold', 'LateBlight']
        self.img_paths = []
        valid_extensions = ('.jpg', '.jpeg', '.png')
        for dirpath, dirnames, filenames in os.walk(root_dir):
            for filename in filenames:
                if filename.lower().endswith(valid_extensions):
                    self.img_paths.append(os.path.join(dirpath, filename))

    def __len__(self):
        return len(self.img_paths)

    def __getitem__(self, idx):
        img_path = self.img_paths[idx]
        image = Image.open(img_path).convert('RGB')
        if self.transform:
            image = self.transform(image)
        label = self.get_label(img_path)
        return image, label

    def get_label(self, img_path):
        if '0_healthy' in img_path:
            return self.classes.index('healthy')
        elif '1_PowderyMildew' in img_path:
            return self.classes.index('PowderyMildew')
        elif '2_GrayMold' in img_path:
            return self.classes.index('GrayMold')
        elif '3_YellowLeafCurlVirus' in img_path:
            return self.classes.index('YellowLeafCurlVirus')        
        elif '4_LeafMold' in img_path:
            return self.classes.index('LeafMold')
        elif '5_LateBlight' in img_path:
            return self.classes.index('LateBlight')  

In [7]:
# 학습 및 검증 데이터 로딩
train_dirs = ["./TrainingDataSet/0_healthy/",
                "./TrainingDataSet/1_PowderyMildew/",
                "./TrainingDataSet/2_GrayMold/",
                "./TrainingDataSet/3_YellowLeafCurlVirus/",
                "./TrainingDataSet/4_LeafMold/",
                "./TrainingDataSet/5_LateBlight/"
             ]
val_dirs = ["./ValidationDataSet/0_healthy/",
            "./ValidationDataSet/1_PowderyMildew/",
            "./ValidationDataSet/2_GrayMold/",
            "./ValidationDataSet/3_YellowLeafCurlVirus/",
            "./ValidationDataSet/4_LeafMold/",
            "./ValidationDataSet/5_LateBlight/",
           ]

train_datasets = [CustomImageDataset(train_dir, transform=transform_with_augmentation) for train_dir in train_dirs]
# train_datasets = [CustomImageDataset(train_dir, transform=transform) for train_dir in train_dirs]
val_datasets = [CustomImageDataset(val_dir, transform=transform) for val_dir in val_dirs]

train_dataset = torch.utils.data.ConcatDataset(train_datasets)
val_dataset = torch.utils.data.ConcatDataset(val_datasets)

In [8]:
# from collections import Counter

# # 레이블을 저장할 리스트를 초기화합니다.
# labels = []

# # 모든 이미지/레이블 쌍에 대해 루프를 실행합니다.
# for _, label in train_dataset:
#     class_name = train_dataset.datasets[0].classes[label]
#     labels.append(class_name)

# # Counter 객체를 사용해 각 클래스의 발생 횟수를 계산합니다.
# label_counts = Counter(labels)

# # 결과를 출력합니다.
# for disease, count in label_counts.items():
#     print(f"{disease}: {count}개")

In [9]:
from collections import Counter

# 레이블을 저장할 리스트를 초기화합니다.
labels = []

# 모든 이미지/레이블 쌍에 대해 루프를 실행합니다.
for _, label in val_dataset:
    class_name = val_dataset.datasets[0].classes[label]
    labels.append(class_name)

# Counter 객체를 사용해 각 클래스의 발생 횟수를 계산합니다.
label_counts = Counter(labels)

# 결과를 출력합니다.
for disease, count in label_counts.items():
    print(f"{disease}: {count}개")

healthy: 2235개
PowderyMildew: 1412개
GrayMold: 1101개
YellowLeafCurlVirus: 135개
LeafMold: 102개
LateBlight: 26개


In [10]:
class FocalLoss(nn.Module):
    def __init__(self, alpha=1., gamma=2.):
        super(FocalLoss, self).__init__()
        self.alpha = alpha
        self.gamma = gamma

    def forward(self, inputs, targets):
        CE_loss = nn.CrossEntropyLoss(reduction='none')(inputs, targets)
        pt = torch.exp(-CE_loss)
        F_loss = self.alpha * (1-pt)**self.gamma * CE_loss
        return F_loss.mean()

In [11]:
def get_completed_configs(save_directory):
    completed_configs = []
    for filename in os.listdir(save_directory):
        if filename.startswith("best_"):
            # 파일명에서 "best_" 제거 및 ".pth" 제거
            parts = filename.replace("best_", "").replace(".pth", "").split("_")
            config = {}
            for part in parts:
                if 'lr' in part:
                    key, value = part.split("lr")
                    config['learning_rate'] = float(value)
                elif 'batch' in part:
                    key, value = part.split("batch")
                    config['batch_size'] = int(value)
                elif 'opt' in part:
                    key, value = part.split("opt")
                    config['optimizer'] = value
                elif 'alpha' in part:
                    key, value = part.split("alpha")
                    config['alpha'] = float(value)
                elif 'gamma' in part:
                    key, value = part.split("gamma")
                    config['gamma'] = float(value)
            completed_configs.append(config)
    return completed_configs

In [14]:
model = EfficientNet.from_pretrained('efficientnet-b0', num_classes=6)  
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model = model.to(device)

Downloading: "https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/efficientnet-b0-355c32eb.pth" to /root/.cache/torch/hub/checkpoints/efficientnet-b0-355c32eb.pth
100%|████████████████████████████████████████████████████████████████████████████████████████████████████| 20.4M/20.4M [00:02<00:00, 8.83MB/s]


Loaded pretrained weights for efficientnet-b0


In [15]:
model

EfficientNet(
  (_conv_stem): Conv2dStaticSamePadding(
    3, 32, kernel_size=(3, 3), stride=(2, 2), bias=False
    (static_padding): ZeroPad2d((0, 1, 0, 1))
  )
  (_bn0): BatchNorm2d(32, eps=0.001, momentum=0.010000000000000009, affine=True, track_running_stats=True)
  (_blocks): ModuleList(
    (0): MBConvBlock(
      (_depthwise_conv): Conv2dStaticSamePadding(
        32, 32, kernel_size=(3, 3), stride=[1, 1], groups=32, bias=False
        (static_padding): ZeroPad2d((1, 1, 1, 1))
      )
      (_bn1): BatchNorm2d(32, eps=0.001, momentum=0.010000000000000009, affine=True, track_running_stats=True)
      (_se_reduce): Conv2dStaticSamePadding(
        32, 8, kernel_size=(1, 1), stride=(1, 1)
        (static_padding): Identity()
      )
      (_se_expand): Conv2dStaticSamePadding(
        8, 32, kernel_size=(1, 1), stride=(1, 1)
        (static_padding): Identity()
      )
      (_project_conv): Conv2dStaticSamePadding(
        32, 16, kernel_size=(1, 1), stride=(1, 1), bias=False
    

In [12]:
grid = ParameterGrid(param_grid)
# 결과를 저장할 데이터프레임 초기화
result_df = pd.DataFrame(columns=['Epoch', 'LR', 'Batch_Size', 'alpha', 'gamma',
                                  'Optimizer', 'train_Loss', 'val_loss', 'ValidationRecall',
                                  'TimeTaken(Training)', 'TimeTaken(Validation)', 'Model Save'])

write_header = True # 처음에만 헤더를 사용
num_epochs = 100  # 원하는 에포크 수로 설정

# 이미 완료된 설정 가져오기
completed_configs = get_completed_configs(save_directory)

for params in grid:
    # 현재 설정이 이미 완료된 설정인지 확인
    current_config = {
        'learning_rate': params['learning_rate'],
        'batch_size': params['batch_size'],
        'optimizer': params['optimizer'],
        'alpha': params['alpha'],
        'gamma': params['gamma']
    }
    if current_config in completed_configs:
        print(f"Skipping completed config: {current_config}")
        continue
    
    # 모델 초기화
    model = EfficientNet.from_pretrained('efficientnet-b0', num_classes=6)  
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    model = model.to(device)
    
    # 학습 및 검증 과정
    best_loss = float('inf')  # Set to a large value initially
    patience = 0  # patience 카운터 초기화
    max_patience = 30  # 최대 patience 값 설정
    criterion = FocalLoss(alpha=params['alpha'], gamma=params['gamma'])
    
    # 각 조합에 맞는 DataLoader를 생성
    train_loader = DataLoader(train_dataset, batch_size=params['batch_size'], shuffle=True, num_workers=4)
    val_loader = DataLoader(val_dataset, batch_size=params['batch_size'], shuffle=False, num_workers=4)

    # optimizer 및 loss 함수 설정
    if params['optimizer'] == 'Adam':
        optimizer = optim.Adam(model.parameters(), lr=params['learning_rate'])
    elif params['optimizer'] == 'SGD':
        optimizer = optim.SGD(model.parameters(), lr=params['learning_rate'])    
    elif params['optimizer'] == 'AdamW':
        optimizer = optim.AdamW(model.parameters(), lr=params['learning_rate'])   
    
    for epoch in range(num_epochs):
        start_time_train = time() # train 속도 측정 시작
        train_loss_accum = 0.0  # Initialize accumulator for training loss
        num_train_batches = 0  # Counter for the number of batches
        model.train()  # 모델을 학습 모드로 설정
        for images, labels in tqdm(train_loader):
            images = images.to(device)
            labels = labels.to(device)
            
            optimizer.zero_grad()  # gradient 초기화
            outputs = model(images)  # forward propagation
            loss = criterion(outputs, labels)  # loss 계산
            loss.backward()  # backpropagation
            optimizer.step()  # weight 업데이트

            train_loss_accum += loss.item()  # Accumulate the loss
            num_train_batches += 1  # Increment batch counter

        average_train_loss = train_loss_accum / num_train_batches  # Calculate average loss over all batches
        end_time_train = time() # train 속도 측정 종료
        time_taken_train = end_time_train - start_time_train # train 속도 계산    
        model.eval()
        all_labels = []
        all_predictions = []
        val_loss = 0.0  # Initialize validation loss
        num_batches = 0  # Counter to divide the accumulated val_loss
        
        with torch.no_grad():
            for images, labels in tqdm(val_loader):
                images = images.to(device)
                labels = labels.to(device)
                outputs = model(images)
                loss = criterion(outputs, labels)
                
                val_loss += loss.item()  # Accumulate loss
                num_batches += 1  # Increment counter
                
                _, predicted = torch.max(outputs.data, 1)
                all_labels.extend(labels.cpu().numpy())
                all_predictions.extend(predicted.cpu().numpy())
                
        val_loss /= num_batches  # Average validation loss
        current_recall = recall_score(all_labels, all_predictions, average='macro')
        print(f'Epoch {epoch+1}/{num_epochs}, Loss: {val_loss}, Val_Recall: {current_recall * 100}%')
        model_saved = "No"

        # last.pt 저장하기
        model_save_name = f"last_lr{params['learning_rate']}_batch{params['batch_size']}_opt{params['optimizer']}_alpha{params['alpha']}_gamma{params['gamma']}.pth"
        # 지정한 디렉토리에 모델 저장
        model_save_path = os.path.join(save_directory, model_save_name)
        torch.save(model.state_dict(), model_save_path)
        
        # Save model if validation loss decreased
        if val_loss < best_loss:
            best_loss = val_loss
            model_saved = "Yes"
            # 모델 파일 이름에 현재의 하이퍼파라미터를 포함시킵니다.
            model_save_name = f"best_lr{params['learning_rate']}_batch{params['batch_size']}_opt{params['optimizer']}_alpha{params['alpha']}_gamma{params['gamma']}.pth"
            # 지정한 디렉토리에 모델 저장
            model_save_path = os.path.join(save_directory, model_save_name)
            torch.save(model.state_dict(), model_save_path)
            patience = 0
            print("Model saved based on lowest validation loss.")
        else:
            patience += 1
            # patience가 max_patience에 도달하면 학습 중단
            if patience == max_patience:
                print("Early stopping")
                break
            
        # 결과를 데이터프레임에 추가
        new_row = pd.DataFrame({
            'Epoch': [epoch + 1],
            'LR': [params['learning_rate']],
            'Batch_Size': [params['batch_size']],
            'alpha': [params['alpha']],
            'gamma': [params['gamma']],
            'Optimizer': [params['optimizer']],
            'Train_Loss': [average_train_loss],
            'val_loss': [val_loss],
            'ValidationRecall': [current_recall],
            'TimeTaken(Training)': [time_taken_train],
            'Model Save': [model_saved]
        })
        # 매 에포크마다 같은 CSV 파일에 결과 추가
        with open('./experiment_results_b0.csv', 'a') as f:
            new_row.to_csv(f, header=write_header, index=False)
        # 헤더는 처음에만 작성
        write_header = False 
    #
    del model #  Python 객체를 메모리에서 삭제
    torch.cuda.empty_cache() # PyTorch가 GPU 메모리에 캐시한, 현재 사용되지 않는 메모리를 해제
    gc.collect() # Python의 가비지 컬렉터를 강제로 실행하여 순환 참조가 있는 객체를 포함하여 사용하지 않는 모든 객체를 메모리에서 해제

Downloading: "https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/efficientnet-b0-355c32eb.pth" to /root/.cache/torch/hub/checkpoints/efficientnet-b0-355c32eb.pth
 83%|██████████████████████████████████████████████████████████████████████████████████▊                 | 16.9M/20.4M [00:02<00:00, 7.68MB/s]


KeyboardInterrupt: 