In [2]:
import pandas as pd
import numpy as np
import os
import glob
import torch
from torchvision import models, transforms
from torchvision.models import Inception_V3_Weights
from PIL import Image
import cv2
from tqdm import tqdm

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset

In [None]:
# x, y데이터 준비에 문제가 없음에도 계속 손실함수 관련 문제가 발생한다면, 데이터 준비를 위해 내가 작성한 함수에 어떤 문제가 있을 수도 있음
# 이를 확인하기 위해, 내가 작성한 함수를 사용하지 않고 다시 x, y dataset, dataloader을 준비해서 학습을 시도해보자.

In [3]:
# PartDiseaseClassifier model build

class PartDiseaseClassifier(nn.Module):
    def __init__(self, input_size, num_classes):
        super(PartDiseaseClassifier, self).__init__()
        self.layer1 = nn.Linear(input_size, 512)
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(0.5)
        self.layer2 = nn.Linear(512, num_classes)
    
    def forward(self, x):
        out = self.layer1(x)
        out = self.relu(out)
        out = self.dropout(out)
        out = self.layer2(out)
        
        return out

In [4]:
train_10 = pd.read_csv('./data_preprocessed/label/train_10_cleaned.csv', encoding='utf-8')
test_data = pd.read_csv('./data_preprocessed/label/test_cleaned.csv', encoding='utf-8')

df_train = train_10.copy()
df_test = test_data.copy()

train_features_npz = np.load('./data_preprocessed/part_image_features/tr_10_cleaned.npz')
test_features_npz = np.load('./data_preprocessed/part_image_features/vl_cleaned.npz')

In [21]:
train_50 = pd.read_csv('./data_preprocessed/label/train_50_cleaned.csv', encoding='utf-8')
test_data = pd.read_csv('./data_preprocessed/label/test_cleaned.csv', encoding='utf-8')

df_train_50 = train_50.copy()
df_test = test_data.copy()

train_features_50_npz = np.load('./data_preprocessed/part_image_features/tr_50_cleaned.npz')
test_features_npz = np.load('./data_preprocessed/part_image_features/vl_cleaned.npz')

In [23]:
df_train['features'] = None
train_data = df_train[['class', 'tr_id', 'disease_status', 'features']].copy()
train_data = train_data[train_data['disease_status'] == 'Y'].reset_index(drop=True)

df_test['features'] = None
test_set = df_test[['class', 'vl_id', 'disease_status', 'features']].copy()
test_set = test_set[test_set['disease_status'] == 'Y'].reset_index(drop=True)

In [24]:
key_list_tr = list(train_features_npz.keys())
key_list_vl = list(test_features_npz.keys())

for idx, row in train_data.iterrows():
    id = row['tr_id']

    if id in key_list_tr:
        train_data.at[idx, 'features'] = train_features_npz[id]

for idx, row in test_set.iterrows():
    id = row['vl_id']

    if id in key_list_vl:
        test_set.at[idx, 'features'] = test_features_npz[id]

In [26]:

train_data['class_label'] = None

for idx, row in train_data.iterrows():
    origin = row['class']
    if origin == 'blight':
        train_data.at[idx, 'class_label'] = 0

    elif origin == 'wilt':
        train_data.at[idx, 'class_label'] = 1
    
    elif origin == 'scorch':
        train_data.at[idx, 'class_label'] = 2

    else:
        train_data.at[idx, 'class_label'] = 3

test_set['class_label'] = None

for idx, row in test_set.iterrows():
    origin = row['class']
    if origin == 'blight':
        test_set.at[idx, 'class_label'] = 0

    elif origin == 'wilt':
        test_set.at[idx, 'class_label'] = 1
    
    elif origin == 'scorch':
        test_set.at[idx, 'class_label'] = 2

    else:
        test_set.at[idx, 'class_label'] = 3

In [27]:
x_train = torch.tensor(train_data['features'], dtype=torch.float32)
y_train = torch.tensor(train_data['class_label'], dtype=torch.long)
x_test = torch.tensor(test_set['features'], dtype=torch.float32)
y_test = torch.tensor(test_set['class_label'], dtype=torch.long)

In [28]:
print(x_train.shape)
print(x_test.shape)

print(y_train.shape)
print(y_test.shape)

torch.Size([4466, 1, 1000])
torch.Size([1800, 1, 1000])
torch.Size([4466])
torch.Size([1800])


In [29]:
x_train = torch.squeeze(x_train, 1)
x_test = torch.squeeze(x_test, 1)
print(x_train.shape)
print(x_test.shape)

torch.Size([4466, 1000])
torch.Size([1800, 1000])


In [30]:
train_dataset = TensorDataset(x_train, y_train)
val_dataset = TensorDataset(x_test, y_test)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32)

In [15]:
def status_y_for_model(df, features_npz, id_col, batch_size, shuffle_option):
    df_copy = df.copy()
    df_copy['features'] = None
    df_copy['class_label'] = None
    df_copy = df_copy[df_copy['disease_status'] == 'Y'].reset_index(drop=True)
    features_keys = list(features_npz.keys())

    for idx, row in df_copy.iterrows():
        id = row[id_col]
        if id in features_keys:
            df_copy.at[idx, 'features'] = features_npz[id]

    for index, row in df_copy.iterrows():
        origin = row['class']
        if origin == 'blight':
            df_copy.at[index, 'class_label'] = 0
        elif origin == 'wilt':
            df_copy.at[index, 'class_label'] = 1  
        elif origin == 'scorch':
            df_copy.at[index, 'class_label'] = 2
        else:
            df_copy.at[index, 'class_label'] = 3

    features_array = np.array(df_copy['features'].tolist())
    x_data = torch.tensor(features_array, dtype=torch.float32)
    y_data = torch.tensor(df_copy['class_label'], dtype=torch.long)
    x_data = torch.squeeze(x_data, 1)

    dataset = TensorDataset(x_data, y_data)
    dataset_dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=shuffle_option)

    return dataset_dataloader, df_copy, x_data


In [16]:
train_loader, df_train, x_train = status_y_for_model(df=df_train, features_npz=train_features_npz, id_col='tr_id', batch_size=32, shuffle_option=True)
val_loader, df_test, x_test = status_y_for_model(df=df_test, features_npz=test_features_npz, id_col='vl_id', batch_size=32, shuffle_option=False)

In [22]:
train_loader, df_train_50, x_train_50 = status_y_for_model(df=df_train_50, features_npz=train_features_50_npz, id_col='tr_id', batch_size=32, shuffle_option=True)
val_loader, df_test, x_test = status_y_for_model(df=df_test, features_npz=test_features_npz, id_col='vl_id', batch_size=32, shuffle_option=False)

In [17]:
model = PartDiseaseClassifier(input_size=x_train.shape[1], num_classes=4)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model.to(device)

PartDiseaseClassifier(
  (layer1): Linear(in_features=1000, out_features=512, bias=True)
  (relu): ReLU()
  (dropout): Dropout(p=0.5, inplace=False)
  (layer2): Linear(in_features=512, out_features=4, bias=True)
)

In [23]:
model = PartDiseaseClassifier(input_size=x_train_50.shape[1], num_classes=4)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model.to(device)

PartDiseaseClassifier(
  (layer1): Linear(in_features=1000, out_features=512, bias=True)
  (relu): ReLU()
  (dropout): Dropout(p=0.5, inplace=False)
  (layer2): Linear(in_features=512, out_features=4, bias=True)
)

In [24]:
epochs = 30

for epoch in range(epochs):
    model.train()
    running_loss = 0.0
    correct_preds = 0
    total_preds = 0
    for inputs, labels in train_loader:
        inputs = inputs.to(device)
        labels = labels.to(device, dtype=torch.long)

        optimizer.zero_grad()
        outputs = model(inputs)
        _, predicted = torch.max(outputs, 1)
        correct_preds += (predicted == labels).sum().item()
        total_preds += labels.size(0)

        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

    train_accuracy = correct_preds / total_preds
    print(f"Epoch {epoch+1}/{epochs}, Loss: {running_loss/len(train_loader)}, Accuracy: {train_accuracy}")

    model.eval()
    val_loss = 0.0
    correct_preds = 0
    total_preds = 0
    with torch.no_grad():
        for inputs, labels in val_loader:
            inputs = inputs.to(device)
            labels = labels.to(device, dtype=torch.long)
            outputs = model(inputs)
            _, predicted = torch.max(outputs, 1)
            correct_preds += (predicted == labels).sum().item()
            total_preds += labels.size(0)

            loss = criterion(outputs, labels)
            val_loss += loss.item()

    val_accuracy = correct_preds / total_preds
    print(f"Validation Loss: {val_loss/len(val_loader)}, Validation Accuracy: {val_accuracy}")

Epoch 1/30, Loss: 0.6923336114628902, Accuracy: 0.6922029281579843
Validation Loss: 0.5534694317662925, Validation Accuracy: 0.7527777777777778
Epoch 2/30, Loss: 0.576758209072167, Accuracy: 0.7430711610486891
Validation Loss: 0.5294938045635558, Validation Accuracy: 0.7577777777777778
Epoch 3/30, Loss: 0.5312818874785583, Accuracy: 0.7641130405175349
Validation Loss: 0.5034604754887129, Validation Accuracy: 0.7716666666666666
Epoch 4/30, Loss: 0.49819582752672414, Accuracy: 0.7814776983316309
Validation Loss: 0.5047111715141096, Validation Accuracy: 0.7722222222222223
Epoch 5/30, Loss: 0.4648050108116956, Accuracy: 0.7995914198161389
Validation Loss: 0.5127169531688356, Validation Accuracy: 0.7727777777777778
Epoch 6/30, Loss: 0.44898226285811865, Accuracy: 0.8042900919305414
Validation Loss: 0.5274534949608016, Validation Accuracy: 0.77
Epoch 7/30, Loss: 0.4282626730099742, Accuracy: 0.8179094313925774
Validation Loss: 0.48368827135939346, Validation Accuracy: 0.7944444444444444
Epoc