In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [2]:
#@title DataLoad [gdown]
import gdown, zipfile, os

file_id = '1OvQwfbLsAYN4GPENpLGkW87fpeBqTr9Z'
output = 'file.zip'

gdown.download(f'https://drive.google.com/uc?id={file_id}', output, quiet=False)

output_dir = 'cloud'
os.makedirs(output_dir, exist_ok=True)

with zipfile.ZipFile(output, 'r') as z:
  z.extractall(output_dir)

Downloading...
From (original): https://drive.google.com/uc?id=1OvQwfbLsAYN4GPENpLGkW87fpeBqTr9Z
From (redirected): https://drive.google.com/uc?id=1OvQwfbLsAYN4GPENpLGkW87fpeBqTr9Z&confirm=t&uuid=b0e8877b-78b7-4366-bdad-55d4fbebbfe6
To: /content/file.zip
100%|██████████| 97.7M/97.7M [00:02<00:00, 38.9MB/s]


In [9]:
def load_image(file_path, target_size=(224, 224)):
    if not os.path.exists(file_path):
        raise FileNotFoundError(f"File not found: {file_path}")
    img = cv2.imread(file_path)  # 이미지를 읽음
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)  # BGR -> RGB 변환
    img = cv2.resize(img, target_size)  # 모델 입력 크기로 조정
    img = img.astype('float32') / 255.0  # 0-1로 정규화
    return img

# 이미지 경로를 받아 데이터셋 생성
def create_dataset(image_paths, labels, target_size=(224, 224)):
    images = [load_image(path, target_size) for path in image_paths]
    images = np.array(images)
    labels = np.array(labels, dtype='int32')
    return images, labels

# 파일 경로 리스트와 레이블 리스트 사용
train_images, train_labels = create_dataset(train_image_paths, train_labels)

SyntaxError: invalid syntax (<ipython-input-9-ddd8c0be4d0b>, line 19)

In [3]:
#@title DataLoad
from sklearn.model_selection import train_test_split
import numpy as np
def load_data(base_dir='cloud/data/train'):
  images = []
  labels = []
  class_names = []

  for class_name in os.listdir(base_dir):
    class_name_path = os.path.join(base_dir, class_name)

    # 디렉토리인 경우만
    if os.path.isdir(class_name_path):
      for image_name in os.listdir(class_name_path):
        image_path = os.path.join(class_name_path, image_name)
        images.append(image_path)
        labels.append(class_name)

  return np.array(images), np.array(labels)

images, labels = load_data()
print(images.shape, labels.shape)
print(np.unique(labels))

(2323,) (2323,)
['Ac' 'As' 'Cb' 'Cc' 'Ci' 'Cs' 'Ct' 'Cu' 'Ns' 'Sc' 'St']


In [99]:
rm -rf "file.zip"

In [4]:
from tensorflow.keras.utils import Sequence
import cv2
import matplotlib.pyplot as plt


class cloudAugmentation(Sequence):
  def __init__(self, images, labels, batch_size=32, augmentor=None,
               target_size=(224, 224), save_dir=None):
    self.images = images
    self.labels = labels
    self.batch_size = batch_size
    self.augmentor = augmentor
    self.target_size = target_size
    self.save_dir = save_dir

  def __len__(self):
    return int(np.ceil(self.images.shape[0] / self.batch_size))

  def __getitem__(self, index):
    start = index * self.batch_size
    stop = (index + 1) * self.batch_size
    batch_images_path = self.images[start:stop]
    batch_labels = self.labels[start:stop]

    batch_images = []

    for i, (image_path, label) in enumerate(zip(batch_images_path, batch_labels)):
      image = cv2.imread(image_path)

      # 데이터 증강
      if self.augmentor is not None:
        image = self.augmentor(image=image)['image']

      # 리사이즈
      image = cv2.resize(image, self.target_size)

      # 저장
      if self.save_dir is not None:
        os.makedirs(self.save_dir, exist_ok=True)

        label_dir = os.path.join(self.save_dir, label)
        os.makedirs(label_dir, exist_ok=True)

        aug_image_name = f'aug_batch{index}_{i}.jpg'
        aug_image_path = os.path.join(label_dir, aug_image_name)
        # image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
        cv2.imwrite(aug_image_path, image)

        batch_images.append(image)

    return np.array(batch_images), batch_labels

In [5]:
import albumentations as A

augmentor = A.Compose([
    A.ShiftScaleRotate(shift_limit=0.1, p=1),
    A.HorizontalFlip(p=1),
    A.VerticalFlip(p=0.3),

])

save_dir = 'cloud/data_aug'

cloud_aug = cloudAugmentation(images, labels, augmentor=augmentor, save_dir=save_dir)

  check_for_updates()


In [6]:
from sklearn.model_selection  import train_test_split

train_images,val_images, train_labels, val_labels=train_test_split(images, labels, test_size=0.2, random_state=42)

In [8]:
ls cloud/data/train/St/

St-N001.jpg  St-N030.jpg  St-N058.jpg  St-N089.jpg  St-N116.jpg  St-N146.jpg  St-N174.jpg
St-N002.jpg  St-N031.jpg  St-N059.jpg  St-N090.jpg  St-N117.jpg  St-N147.jpg  St-N175.jpg
St-N003.jpg  St-N032.jpg  St-N060.jpg  St-N092.jpg  St-N119.jpg  St-N148.jpg  St-N176.jpg
St-N005.jpg  St-N033.jpg  St-N061.jpg  St-N093.jpg  St-N120.jpg  St-N149.jpg  St-N178.jpg
St-N006.jpg  St-N034.jpg  St-N063.jpg  St-N094.jpg  St-N121.jpg  St-N150.jpg  St-N179.jpg
St-N007.jpg  St-N036.jpg  St-N064.jpg  St-N095.jpg  St-N122.jpg  St-N151.jpg  St-N180.jpg
St-N008.jpg  St-N037.jpg  St-N066.jpg  St-N096.jpg  St-N123.jpg  St-N152.jpg  St-N181.jpg
St-N009.jpg  St-N038.jpg  St-N067.jpg  St-N097.jpg  St-N125.jpg  St-N153.jpg  St-N182.jpg
St-N010.jpg  St-N039.jpg  St-N068.jpg  St-N098.jpg  St-N126.jpg  St-N154.jpg  St-N183.jpg
St-N011.jpg  St-N041.jpg  St-N069.jpg  St-N099.jpg  St-N127.jpg  St-N155.jpg  St-N184.jpg
St-N013.jpg  St-N042.jpg  St-N070.jpg  St-N100.jpg  St-N128.jpg  St-N156.jpg  St-N185.jpg
St-N014.jp

In [7]:
train_images = train_images.astype('float32') / 255.0
val_images = val_images.astype('float32') / 255.0

# 레이블 데이터 타입 변환
label_encoder = LabelEncoder()
train_labels = label_encoder.fit_transform(train_labels)
val_labels = label_encoder.transform(val_labels)

train_labels = np.array(train_labels, dtype='int32')
val_labels = np.array(val_labels, dtype='int32')

ValueError: could not convert string to float: 'cloud/data/train/St/St-N172.jpg'

In [84]:
# 분류층을 제외하고 모델 로드
# - input_shape=(224, 224, 3) (기본값)
# - include_top=True (기본값) 분류층 포함여부
# - weights='imagenet' (기본값) imagenet 1000개클래스를 학습한 가중치
from tensorflow.keras.applications import VGG16
from tensorflow.keras.layers import GlobalAveragePooling2D, Dense, Dropout
from tensorflow.keras.models import Model

base_model = VGG16(input_shape=(64, 64, 3), include_top=False, weights='imagenet')
x = base_model.output
x = GlobalAveragePooling2D()(x)
x = Dense(256, activation='relu')(x)
x = Dropout(0.5)(x)
output = Dense(11, activation='softmax')(x)

model = Model(inputs=base_model.input, outputs=output)

model.summary()

In [89]:
model.compile(loss="sparse_categorical_crossentropy",
              optimizer='adam',
              metrics=["accuracy"])

In [92]:
model.fit(train_images, train_labels,
              epochs=30,
              batch_size=32,
              validation_data=(val_images, val_labels))

ValueError: Invalid dtype: str992