In [7]:
import os
import json
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torchvision import models, transforms
from PIL import Image, UnidentifiedImageError
from torch.utils.data.dataloader import default_collate

In [9]:
# CustomDataset 클래스 정의
class CustomDataset(Dataset):
    def __init__(self, root_dir, mode='Training', transform=None):
        self.root_dir = root_dir
        self.mode = mode
        self.transform = transform
        self.images = []
        self.labels = []

        data_folder = mode  # 'Training', 'Validation', 또는 'Test'
        data_path = os.path.join(root_dir, data_folder)
        self._load_data(data_path)

    def _load_data(self, data_path):
        image_folder_path = os.path.join(data_path, "원천데이터", "05.상추")
        label_folder_path = os.path.join(data_path, "라벨링데이터", "05.상추")

        image_folders = os.listdir(image_folder_path)

        for image_folder in image_folders:
            image_path = os.path.join(image_folder_path, image_folder)
            label_path = os.path.join(label_folder_path, image_folder)

            for image_filename in os.listdir(image_path):
                image_file_path = os.path.join(image_path, image_filename)
                label_file_path = os.path.join(label_path, image_filename.replace('.jpeg', '.json'))

                if os.path.exists(label_file_path):
                    with open(label_file_path, 'r') as f:
                        label_info = json.load(f)
                        disease = label_info['annotations']['disease']
                        if disease == 9: #균핵병
                            label = 1
                        elif disease == 10: #노균병
                            label = 2
                        else:  #정상
                            label = 0
                else: #라벨 정보가 없으면 정상으로 처리
                    label = 0

                self.images.append(image_file_path)
                self.labels.append(label)

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        image_path = self.images[idx]
        label = self.labels[idx]

        try:
            image = Image.open(image_path).convert('RGB')
            if self.transform:
                image = self.transform(image)
        except (IOError, UnidentifiedImageError) as e:
            print(f"Warning: Could not read image {image_path}. Skipping. Error: {e}")
            return None, None

        return image, label


In [12]:
# DataLoader에서 None 타입 데이터를 걸러내기 위한 함수
def my_collate_fn(batch):
    batch = list(filter(lambda x: x[0] is not None, batch))
    return default_collate(batch)

# 데이터 변환 정의
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# 데이터셋 및 DataLoader 인스턴스 생성
root_dir = '/Volumes/T7/상추 질병 진단/realdata'
train_dataset = CustomDataset(root_dir=root_dir, mode='Training', transform=transform)
val_dataset = CustomDataset(root_dir=root_dir, mode='Validation', transform=transform)
test_dataset = CustomDataset(root_dir=root_dir, mode='Test', transform=transform)  # 테스트 데이터셋 추가

train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True, collate_fn=my_collate_fn)
val_loader = DataLoader(val_dataset, batch_size=16, shuffle=False, collate_fn=my_collate_fn)
test_loader = DataLoader(test_dataset, batch_size=16, shuffle=False, collate_fn=my_collate_fn)  # 테스트 DataLoader 추가


In [13]:
# 모델, 손실 함수, 최적화 알고리즘 설정
model = models.resnet50(weights=models.ResNet50_Weights.DEFAULT)
num_ftrs = model.fc.in_features
model.fc = nn.Linear(num_ftrs, 3)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.0001)
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model.to(device)

# 훈련 루프
num_epochs = 10
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
    print(f"Epoch {epoch+1}/{num_epochs}, Loss: {running_loss/len(train_loader)}")

    # 검증 루프 #print(f'Validation Accuracy: {accuracy}%')
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for images, labels in val_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    accuracy = 100 * correct / total
    

Epoch 1/10, Loss: 0.26185990289546723
Epoch 2/10, Loss: 0.010090648662298918
Epoch 3/10, Loss: 0.004057515293446391
Epoch 4/10, Loss: 0.002347494405152839
Epoch 5/10, Loss: 0.0014592068375434814
Epoch 6/10, Loss: 0.0011028347301383524
Epoch 7/10, Loss: 0.0008445631741102092
Epoch 8/10, Loss: 0.0006179280415296468
Epoch 9/10, Loss: 0.0005145735028849612
Epoch 10/10, Loss: 0.00043456342753542715


In [10]:
# 라벨별 데이터 수를 계산하는 함수
def count_labels(dataset):
    label_counts = {'정상': 0, '균핵병': 1, '노균병': 2}
    for _, label in dataset:
        if label == 0:
            label_counts['정상'] += 1
        elif label == 1:
            label_counts['균핵병'] += 1
        elif label == 2:
            label_counts['노균병'] += 1
    return label_counts

# Training 데이터셋에 대한 라벨별 데이터 수 출력
label_counts = count_labels(train_dataset)
for label, count in label_counts.items():
    print(f"{label}: {count}개")


KeyboardInterrupt: 

수정 후 

In [4]:
# 라벨별 데이터 수를 계산하는 함수
def count_labels(dataset):
    label_counts = {'정상': 0, '균핵병': 1, '노균병': 2}
    for _, label in dataset:
        if label == 0:
            label_counts['정상'] += 1
        elif label == 1:
            label_counts['균핵병'] += 1
        elif label == 2:
            label_counts['노균병'] += 1
    return label_counts

# Training 데이터셋에 대한 라벨별 데이터 수 출력
label_counts = count_labels(train_dataset)
for label, count in label_counts.items():
    print(f"{label}: {count}개")


정상: 225개
균핵병: 226개
노균병: 227개


In [14]:
import os
from PIL import Image
import torch
from torch.utils.data import Dataset, DataLoader, default_collate
from torchvision import transforms, models
import torch.nn as nn
import torch.optim as optim

class CustomDataset(Dataset):
    def __init__(self, root_dir, split='Training', transform=None):
        self.root_dir = os.path.join(root_dir, split)
        self.transform = transform
        self.labels = {'정상': 0, '균핵병': 1, '노균병': 2}
        self.data = []
        self._load_dataset()

    def _load_dataset(self):
        for label_name, label in self.labels.items():
            path = os.path.join(self.root_dir, '원천데이터', '05.상추', f'05.상추_{label}.{label_name}')
            images = os.listdir(path)
            for image in images:
                self.data.append((os.path.join(path, image), label))

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        img_path, label = self.data[idx]
        image = Image.open(img_path).convert('RGB')

        if self.transform:
            image = self.transform(image)

        return image, label


In [15]:
# DataLoader에서 None 타입 데이터를 걸러내기 위한 함수
def my_collate_fn(batch):
    batch = list(filter(lambda x: x[0] is not None, batch))
    return default_collate(batch)

# 데이터 변환 정의
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# 데이터셋 및 DataLoader 인스턴스 생성
root_dir = '/Users/leeyeonju/Desktop/smart-plant-ai/smart_plant_ai/realdata'
train_dataset = CustomDataset(root_dir=root_dir, split='Training', transform=transform)
val_dataset = CustomDataset(root_dir=root_dir, split='Validation', transform=transform)

train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True, collate_fn=my_collate_fn)
val_loader = DataLoader(val_dataset, batch_size=16, shuffle=False, collate_fn=my_collate_fn)


In [16]:
# 모델, 손실 함수, 최적화 알고리즘 설정
model = models.resnet50(weights=models.ResNet50_Weights.DEFAULT)  # 전이학습 - 가중치 그대로 사용
num_ftrs = model.fc.in_features  # 모델의 마지막 fully connected층의 입력 특성 수 가져옴
model.fc = nn.Linear(num_ftrs, 3)  # 원래 모델의 마지막 fc레이어를 새로운 레이어로 대체 - num_ftrs개의 입력 특성을 받아 3개의 출력을 가정
criterion = nn.CrossEntropyLoss()  # 다중 클래스 분류에 적합한 손실함수
optimizer = optim.Adam(model.parameters(), lr=1e-4)

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model.to(device)

# 훈련 및 검증 루프
num_epochs = 10
best_cost = 1e+10
for epoch in range(num_epochs):
    print("================")
    for phase in ['train', 'val']:
        if phase == 'train':
            model.train()  # 모델을 학습 모드로 설정
        else:
            model.eval()   # 모델을 평가 모드로 설정

        running_loss = 0.0
        corrects = 0
        sample_size = 0

        # 데이터 로더 설정
        dataloader = train_loader if phase == 'train' else val_loader

        for images, labels in dataloader:
            images, labels = images.to(device), labels.to(device)

            optimizer.zero_grad()

            # forward
            with torch.set_grad_enabled(phase == 'train'):
                outputs = model(images)
                _, preds = torch.max(outputs, 1)
                loss = criterion(outputs, labels)

                # backward + optimize only if in training phase
                if phase == 'train':
                    loss.backward()
                    optimizer.step()

            running_loss += loss.item() * images.size(0)
            corrects += torch.sum(preds == labels.data)
            sample_size += images.size(0)

        epoch_loss = running_loss / sample_size
        epoch_acc = corrects.double() / sample_size

        print(f"Epoch: {epoch+1}/{num_epochs}, Phase: {phase}, Loss: {epoch_loss}, ACC: {epoch_acc}")

        # 모델 저장
        if phase == 'val' and epoch_loss < best_cost:
            print("Model saved")
            best_cost = epoch_loss
            torch.save(model.state_dict(), 'model.pt')

Epoch: 1/10, Phase: train, Loss: 0.7718769501756739, ACC: 0.7051851851851851
Epoch: 1/10, Phase: val, Loss: 0.5412274956703186, ACC: 0.8333333333333334
Model saved
Epoch: 2/10, Phase: train, Loss: 0.2516471983326806, ACC: 0.9362962962962963
Epoch: 2/10, Phase: val, Loss: 0.3869178838200039, ACC: 0.8555555555555555
Model saved
Epoch: 3/10, Phase: train, Loss: 0.07735049975139123, ACC: 0.9896296296296296
Epoch: 3/10, Phase: val, Loss: 0.4657071338759528, ACC: 0.8444444444444444
Epoch: 4/10, Phase: train, Loss: 0.0499773649043507, ACC: 0.9940740740740741
Epoch: 4/10, Phase: val, Loss: 0.5377604391839769, ACC: 0.8444444444444444
Epoch: 5/10, Phase: train, Loss: 0.038128319460277754, ACC: 0.9881481481481481
Epoch: 5/10, Phase: val, Loss: 0.3816556341118283, ACC: 0.8333333333333334
Model saved
Epoch: 6/10, Phase: train, Loss: 0.04473652953350985, ACC: 0.9881481481481481
Epoch: 6/10, Phase: val, Loss: 0.4366233189900716, ACC: 0.8444444444444444
Epoch: 7/10, Phase: train, Loss: 0.0420444883816

In [24]:
import torch
from torchvision import transforms
from PIL import Image

# 이미지를 불러오고 전처리하기 위한 transform 설정
transform = transforms.Compose([
    transforms.Resize((254, 254)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

def predict_image(image_path):
    # 이미지 불러오기 및 변환
    image = Image.open(image_path).convert('RGB')
    image = transform(image)
    image = image.unsqueeze(0)  # 배치 차원 추가

    # 모델로 예측
    model.eval()  # 모델을 평가 모드로 설정
    with torch.no_grad():
        outputs = model(image.to(device))
        _, preds = torch.max(outputs, 1)

    # 예측된 라벨 반환
    return preds.item()

# 모델 가중치 불러오기 (앞서 저장한 'model.pt')
model.load_state_dict(torch.load('model.pt', map_location=device))

# 예측할 이미지의 경로 설정
image_path = '/Users/leeyeonju/Desktop/smart-plant-ai/smart_plant_ai/KakaoTalk_Photo_2024-05-14-02-06-39.jpeg'

# 이미지에 대한 예측 라벨 출력
predicted_label = predict_image(image_path)
print(f'Predicted label: {predicted_label}')


Predicted label: 0


In [18]:
#클래스별 샘플의 개수 출력 
from collections import Counter
import torch

# 데이터셋에서 각 클래스(0, 1, 2)의 개수를 세기 위한 함수
def count_class_samples(loader):
    class_counts = Counter()
    for _, labels in loader:
        class_counts.update(labels.tolist())
    return class_counts

# 훈련 데이터셋에서 각 클래스의 개수를 세기
train_class_counts = count_class_samples(train_loader)

# 클래스별 개수 출력
for class_label, count in sorted(train_class_counts.items()):
    print(f"Class {class_label}: {count} samples")

# 검증 데이터셋에서도 같은 방식으로 클래스의 개수를 셀 수 있습니다.
val_class_counts = count_class_samples(val_loader)

# 클래스별 개수 출력
for class_label, count in sorted(val_class_counts.items()):
    print(f"Class {class_label}: {count} samples")


Class 0: 225 samples
Class 1: 225 samples
Class 2: 225 samples
Class 0: 30 samples
Class 1: 30 samples
Class 2: 30 samples
