In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import cv2

In [None]:
from tensorflow.keras.applications import VGG16
from tensorflow.keras.layers import GlobalAveragePooling2D, Dense, Dropout,Flatten
from tensorflow.keras.models import Model

In [None]:
#@title DataLoad [gdown]
import gdown, zipfile, os

file_id = '1OvQwfbLsAYN4GPENpLGkW87fpeBqTr9Z'
output = 'file.zip'

gdown.download(f'https://drive.google.com/uc?id={file_id}', output, quiet=False)

output_dir = 'cloud'
os.makedirs(output_dir, exist_ok=True)

with zipfile.ZipFile(output, 'r') as z:
  z.extractall(output_dir)

Downloading...
From (original): https://drive.google.com/uc?id=1OvQwfbLsAYN4GPENpLGkW87fpeBqTr9Z
From (redirected): https://drive.google.com/uc?id=1OvQwfbLsAYN4GPENpLGkW87fpeBqTr9Z&confirm=t&uuid=2b787302-fefa-45ea-a8da-f8630acea71f
To: /content/file.zip
100%|██████████| 97.7M/97.7M [00:00<00:00, 169MB/s]


In [None]:
#@title DataLoad (cloud/data/train)
import numpy as np
def load_data(base_dir='cloud/data/train'):
  images_path = []
  labels = []
  class_names = []

  for class_name in os.listdir(base_dir):
    class_name_path = os.path.join(base_dir, class_name)

    # 디렉토리인 경우만
    if os.path.isdir(class_name_path):
      for image_name in os.listdir(class_name_path):
        image_path = os.path.join(class_name_path, image_name)
        images_path.append(image_path)
        labels.append(class_name)

  return np.array(images_path), np.array(labels)

images_path, labels = load_data()
print(images_path.shape, labels.shape)
print(np.unique(labels))

(2323,) (2323,)
['Ac' 'As' 'Cb' 'Cc' 'Ci' 'Cs' 'Ct' 'Cu' 'Ns' 'Sc' 'St']


In [None]:
#@title train_validation data split
from sklearn.model_selection  import train_test_split

train_images_path ,test_images_path, train_labels, test_labels=train_test_split(images_path, labels, test_size=0.2, random_state=42)
train_images_path,val_images_path, train_labels,val_labels=train_test_split(train_images_path, train_labels, test_size=0.3, random_state=42)
train_images_path.shape, val_images_path.shape, train_labels.shape, val_labels.shape

ValueError: With n_samples=0, test_size=0.2 and train_size=None, the resulting train set will be empty. Adjust any of the aforementioned parameters.

In [None]:
#@title Data_Augmentation (sequence class 정의)

from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.utils import Sequence
from sklearn.utils import shuffle
import cv2
import matplotlib.pyplot as plt
import albumentations as A

BATCH_SIZE = 32
IMAGE_SIZE = 224

# 증강을 위한 sequence
class CloudAugSequence(Sequence):
  def __init__(self, images_path, labels, batch_size=BATCH_SIZE, augmentor=None,
               image_size=IMAGE_SIZE, preprocess_function=None,save_dir=None):
    self.images_path = images_path
    self.labels = labels
    self.batch_size = batch_size
    self.augmentor = augmentor
    self.image_size = image_size
    self.preprocess_function = preprocess_function
    self.save_dir = save_dir

  def __len__(self):
    return int(np.ceil(len(self.labels) / self.batch_size))

  def __getitem__(self, index):
    start = index * self.batch_size
    stop = (index + 1) * self.batch_size
    batch_images_path = self.images_path[start:stop]
    batch_labels = self.labels[start:stop]

    batch_images = np.zeros((batch_images_path.shape[0], self.image_size, self.image_size, 3), dtype=np.float32)

    for (image_path, label) in zip(batch_images_path, batch_labels):
      for i in range(10):
        # 여기에다가 for문 쓰면 증강 횟수 정할 수 있음
        # 데이터 증강
        image = cv2.imread(image_path)
        if self.augmentor is not None:
          image = self.augmentor(image=image)['image']

        image = cv2.resize(image, (self.image_size, self.image_size))
        # 스케일링
        if self.preprocess_function is not None:
          image = self.preprocess_function(image)

        batch_images[i]=image

    return (batch_images, batch_labels) if self.labels is not None else batch_images

In [None]:
#@title image augment 생성
import albumentations as A
from tensorflow.keras.applications.vgg16 import preprocess_input

augmentor = A.Compose([
    A.ShiftScaleRotate(shift_limit=0.1, p=1),
    A.HorizontalFlip(p=1),
    A.VerticalFlip(p=0.3),
    A.ShiftScaleRotate(
      rotate_limit=45,
      p=0.5),
])

save_dir='cloud/cloud_aug'

# train data에 대해서만 augmentation 적용
cloud_seq = CloudAugSequence(train_images_path, train_labels,preprocess_function=preprocess_input, augmentor=augmentor, save_dir=save_dir)

# sequence 객체는 호출되어야 생성됨 !
# 저장을 위해 for문 한번 돌리기 > for문 안에서 알아서 next호출
for i in range(len(cloud_seq)):
  batch_images, batch_labels = cloud_seq[i]
  print(batch_images.shape, batch_labels.shape)

In [None]:
#@title DataLoad (cloud/cloud_aug/train)
import numpy as np
def load_aug_data(base_dir='cloud/cloud_aug'):
  images_path=[]
  labels=[]
  for class_name in os.listdir(base_dir):
    class_name_path = os.path.join(base_dir, class_name)

    # 디렉토리인 경우만
    if os.path.isdir(class_name_path):
      for image_name in os.listdir(class_name_path):
        image_path = os.path.join(class_name_path, image_name)
        images_path.append(image_path)
        labels.append(class_name)

  return np.array(images_path), np.array(labels)

train_images_path, train_labels = load_aug_data()
print(images_path.shape, labels.shape)
print(np.unique(labels))

(0,) (0,)
[]


In [None]:
#@title label에 대해 label_encoding
from sklearn.preprocessing import LabelEncoder

label_encoder = LabelEncoder()
train_labels = label_encoder.fit_transform(train_labels)
train_labels = np.array(train_labels, dtype='int32')
print(np.unique(train_labels))
print(train_labels.dtype)

val_labels = label_encoder.transform(val_labels)
val_labels = np.array(val_labels, dtype='int32')
print(np.unique(val_labels))
print(val_labels.dtype)

[ 0  1  2  3  4  5  6  7  8  9 10]
int32
[ 0  1  2  3  4  5  6  7  8  9 10]
int32


In [None]:
#@title Data_Augmentation (sequence class 정의)

from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.utils import Sequence
from sklearn.utils import shuffle
import cv2
import matplotlib.pyplot as plt
import albumentations as A

BATCH_SIZE = 32
IMAGE_SIZE = 224

# image read를 위한 sequence
class CloudSequence(Sequence):
  def __init__(self, images_path, labels, batch_size=BATCH_SIZE,
               image_size=IMAGE_SIZE, preprocess_function=None):
    self.images_path = images_path
    self.labels = labels
    self.batch_size = batch_size
    self.image_size = image_size
    self.preprocess_function = preprocess_function

  def __len__(self):
    return int(np.ceil(len(self.labels) / self.batch_size))

  def __getitem__(self, index):
    start = index * self.batch_size
    stop = (index + 1) * self.batch_size
    batch_images_path = self.images_path[start:stop]
    batch_labels = self.labels[start:stop]

    batch_images = np.zeros((batch_images_path.shape[0], self.image_size, self.image_size, 3), dtype=np.float32)

    for (image_path, label) in zip(batch_images_path, batch_labels):
      image = cv2.imread(image_path)
      image = cv2.resize(image, (self.image_size, self.image_size))
      # 스케일링
      if self.preprocess_function is not None:
        image = self.preprocess_function(image)
      batch_images[i]=image

    return (batch_images, batch_labels) if self.labels is not None else batch_images

In [None]:
 #@title VGG16 model transfer learning
from tensorflow.keras.applications import VGG16
from tensorflow.keras.layers import GlobalAveragePooling2D, Dense, Dropout,Flatten
from tensorflow.keras.models import Model

base_model = VGG16(input_shape=(64, 64, 3), include_top=False, weights='imagenet')
x = base_model.output
x = Flatten()(x)
x = Dense(256, activation='relu')(x)
x = Dropout(0.5)(x)
output = Dense(11, activation='softmax')(x)  # 분류클래스 (0~10 : 11개)

model = Model(inputs=base_model.input, outputs=output)

model.summary()

In [None]:
#@title model compile
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau

model.compile(loss="sparse_categorical_crossentropy",
              optimizer='adam',
              metrics=["accuracy"])


early_stopping_cb = EarlyStopping(patience=6, verbose=1, restore_best_weights=True)
reduce_lr_on_plateau_cb = ReduceLROnPlateau(patience=3, factor=0.5, verbose=1)

In [None]:
from tensorflow.keras.applications.vgg16 import preprocess_input
tr_seq=CloudSequence(train_images_path, train_labels, batch_size=BATCH_SIZE,preprocess_function=preprocess_input)
val_seq=CloudSequence(val_images_path, val_labels, batch_size=BATCH_SIZE,preprocess_function=preprocess_input)
test_seq=CloudSequence(test_images_path, test_labels, batch_size=BATCH_SIZE,preprocess_function=preprocess_input)

In [None]:
history=model.fit(tr_seq,
              epochs=30,
              batch_size=32,
              validation_data=val_seq,
              callbacks=[early_stopping_cb, reduce_lr_on_plateau_cb])

Epoch 1/30
[1m21/21[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m25s[0m 583ms/step - accuracy: 0.0987 - loss: 3.0352 - val_accuracy: 0.0964 - val_loss: 2.4037 - learning_rate: 0.0010
Epoch 2/30
[1m21/21[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 59ms/step - accuracy: 0.1363 - loss: 2.4231 - val_accuracy: 0.0964 - val_loss: 2.4158 - learning_rate: 0.0010
Epoch 3/30
[1m21/21[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 62ms/step - accuracy: 0.1168 - loss: 2.4003 - val_accuracy: 0.1024 - val_loss: 2.4021 - learning_rate: 0.0010
Epoch 4/30
[1m21/21[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 65ms/step - accuracy: 0.1014 - loss: 2.3933 - val_accuracy: 0.0964 - val_loss: 2.4031 - learning_rate: 0.0010
Epoch 5/30
[1m21/21[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 68ms/step - accuracy: 0.1192 - loss: 2.3789 - val_accuracy: 0.1024 - val_loss: 2.3940 - learning_rate: 0.0010
Epoch 6/30
[1m21/21[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0

In [None]:
history_test=model.evaluate(test_seq)
history_test

In [None]:
import shutil
from google.colab import files

# 압축하려는 폴더 이름과 ZIP 파일 이름 설정
folder_name = 'cloud/cloud_aug'  # 폴더 이름
zip_file_name = 'cloud_aug.zip'  # ZIP 파일 이름

# 폴더를 ZIP으로 압축
shutil.make_archive(folder_name, 'zip', folder_name)

FileNotFoundError: Cannot find file: cloud_aug.zip