In [1]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [2]:
#@title DataLoad [gdown] - cloud_binary
import gdown, zipfile, os
"https://drive.google.com/file/d/1738nG4c9ZbnAQkuBNxfrCBJcBn2CyQb5/view?usp=sharing"

file_id = '1738nG4c9ZbnAQkuBNxfrCBJcBn2CyQb5'
output = 'file.zip'

gdown.download(f'https://drive.google.com/uc?id={file_id}', output, quiet=False)

output_dir = 'cloud_binary'
os.makedirs(output_dir, exist_ok=True)

with zipfile.ZipFile(output, 'r') as z:
  z.extractall(output_dir)

Downloading...
From (original): https://drive.google.com/uc?id=1738nG4c9ZbnAQkuBNxfrCBJcBn2CyQb5
From (redirected): https://drive.google.com/uc?id=1738nG4c9ZbnAQkuBNxfrCBJcBn2CyQb5&confirm=t&uuid=496e501d-afc2-453f-aa9b-b73e9566a150
To: /content/file.zip
100%|██████████| 92.4M/92.4M [00:03<00:00, 27.2MB/s]


In [3]:
#@title DataLoad (cloud/data/train)
from sklearn.model_selection import train_test_split
def load_data(base_dir='cloud_binary/train'):
  images_path = []
  labels = []

  class_names = []

  for class_name in os.listdir(base_dir):
    class_name_path = os.path.join(base_dir, class_name)


    # 디렉토리인 경우만
    if os.path.isdir(class_name_path):
      for image_name in os.listdir(class_name_path):
        image_path = os.path.join(class_name_path, image_name)
        images_path.append(image_path)
        labels.append(class_name)

  return np.array(images_path), np.array(labels)

images_path, labels = load_data()
print(images_path.shape,labels.shape)
print(np.unique(labels))

(1000,) (1000,)
['Cloud' 'RainCloud']


In [4]:
#@title DataLoad (cloud/data/train)
from sklearn.model_selection import train_test_split
def load_data(base_dir='cloud_binary/test'):
  images_path = []
  labels = []

  class_names = []

  for class_name in os.listdir(base_dir):
    class_name_path = os.path.join(base_dir, class_name)


    # 디렉토리인 경우만
    if os.path.isdir(class_name_path):
      for image_name in os.listdir(class_name_path):
        image_path = os.path.join(class_name_path, image_name)
        images_path.append(image_path)
        labels.append(class_name)

  return np.array(images_path), np.array(labels)

test_images_path, test_labels = load_data()
print(test_images_path.shape, test_labels.shape)
print(np.unique(test_labels))

(200,) (200,)
['Cloud' 'RainCloud']


In [5]:
#@title train data split
from sklearn.model_selection  import train_test_split

train_images_path, val_images_path, train_labels, val_labels = train_test_split(images_path, labels, test_size=0.2, random_state=42)
train_images_path.shape, val_images_path.shape, train_labels.shape, val_labels.shape

((800,), (200,), (800,), (200,))

In [6]:
#@title label에 대해 label_encoding
from sklearn.preprocessing import LabelEncoder

label_encoder = LabelEncoder()
train_labels = label_encoder.fit_transform(train_labels)
train_labels = np.array(train_labels, dtype='int32')

val_labels = label_encoder.transform(val_labels)
val_labels = np.array(val_labels, dtype='int32')

test_labels=label_encoder.transform(test_labels)
test_labels = np.array(test_labels, dtype='int32')

In [7]:
#@title image_path to image_pixel

from tensorflow.keras.utils import Sequence
import cv2 # 이거 나중에 tensorflow로 바꿔

BATCH_SIZE = 32
IMAGE_SIZE = 224

class CloudSequence(Sequence):
  def __init__(self, images_path, labels, batch_size=BATCH_SIZE,
               image_size=IMAGE_SIZE, preprocess_function=None):
    self.images_path = images_path
    self.labels = labels
    self.batch_size = batch_size
    self.image_size = image_size
    self.preprocess_function = preprocess_function

  def __len__(self):
    return int(np.ceil(len(self.labels) / self.batch_size))

  def __getitem__(self, index):
    start = index * self.batch_size
    stop = (index + 1) * self.batch_size
    batch_images_path = self.images_path[start:stop]
    batch_labels = self.labels[start:stop]

    batch_images = np.zeros((batch_images_path.shape[0], self.image_size, self.image_size, 3), dtype=np.float32)

    for i,(image_path, label) in enumerate(zip(batch_images_path, batch_labels)):
        image = cv2.imread(image_path)
        image = cv2.resize(image, (self.image_size, self.image_size))
        # 스케일링
        if self.preprocess_function is not None:
          image = self.preprocess_function(image)
        batch_images[i]=image

    return (batch_images, batch_labels) if self.labels is not None else batch_images

In [8]:
from tensorflow.keras.applications.efficientnet import preprocess_input
tr_seq=CloudSequence(train_images_path, train_labels,preprocess_function=preprocess_input)
val_seq=CloudSequence(val_images_path, val_labels,preprocess_function=preprocess_input)
test_seq=CloudSequence(test_images_path, test_labels,preprocess_function=preprocess_input)

In [9]:
first_batch = next(iter(tr_seq))
first_batch[0].shape, first_batch[1].shape

((32, 224, 224, 3), (32,))

In [10]:
first_batch_val = next(iter(val_seq))
first_batch_val[0].shape, first_batch_val[1].shape

((32, 224, 224, 3), (32,))

In [11]:
#@title EfficientNet model 전이학습
from sklearn.model_selection import StratifiedShuffleSplit
from skimage.transform import resize
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from pylab import rcParams
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
from keras.callbacks import Callback, EarlyStopping, ReduceLROnPlateau
import tensorflow as tf
import keras
from keras.models import Sequential, load_model
from keras.layers import Dropout, Dense, GlobalAveragePooling2D
from keras.optimizers import Adam
from tensorflow.keras.applications import EfficientNetB0


height = 224
width = 224
channels = 3
input_shape = (height, width, channels)
efnb0 = EfficientNetB0(weights='imagenet', include_top=False, input_shape=input_shape)

model = Sequential()
model.add(efnb0)
model.add(GlobalAveragePooling2D())
model.add(Dropout(0.3))
model.add(Dense(1, activation='sigmoid'))

model.summary()

Downloading data from https://storage.googleapis.com/keras-applications/efficientnetb0_notop.h5
[1m16705208/16705208[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step


In [12]:
#@title model_compile
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau

model.compile(loss="binary_crossentropy",
              optimizer='adam',
              metrics=["accuracy"])


early_stopping_cb = EarlyStopping(patience=6, verbose=1, restore_best_weights=True)
reduce_lr_on_plateau_cb = ReduceLROnPlateau(patience=3, factor=0.5, verbose=1)

In [13]:
history = model.fit(
    tr_seq,
    epochs=30,
    validation_data=val_seq,
    callbacks=[early_stopping_cb, reduce_lr_on_plateau_cb]
)

Epoch 1/30


  self._warn_if_super_not_called()


[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m306s[0m 10s/step - accuracy: 0.8527 - loss: 0.2681 - val_accuracy: 0.5450 - val_loss: 3.0787 - learning_rate: 0.0010
Epoch 2/30
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m250s[0m 10s/step - accuracy: 0.9897 - loss: 0.0474 - val_accuracy: 0.9000 - val_loss: 0.3169 - learning_rate: 0.0010
Epoch 3/30
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m265s[0m 10s/step - accuracy: 0.9918 - loss: 0.0169 - val_accuracy: 0.7550 - val_loss: 2.1560 - learning_rate: 0.0010
Epoch 4/30
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m245s[0m 10s/step - accuracy: 0.9994 - loss: 0.0050 - val_accuracy: 0.9250 - val_loss: 0.4357 - learning_rate: 0.0010
Epoch 5/30
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9s/step - accuracy: 0.9957 - loss: 0.0092
Epoch 5: ReduceLROnPlateau reducing learning rate to 0.0005000000237487257.
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m249s[0m 10s/step

### **model save**
- `model.save('model_name.h5')`
- `model.save("model_name.keras")`
  - 모델 저장 시 `.keras` 확장자를 사용하는 것은 파일 방식을 명확히 하고, Keras 모델임을 나타내기 위해 자주 사용됨
  - 실제로 **.keras 확장자는 HDF5나 SavedModel 포맷의 저장 방식과 동일한 구조**

In [14]:
model.save('best_cloud_binary_EfficientNet.keras')