# ⛅️ Project 02: 날씨 데이터 분류
---
## 주제: 날씨 데이터 분류 (사전 훈련 모델 or 미세 조정(sequence) , (저용량))
---
### 목표 (Target)
- **흐림**
- **비**
- **맑음⛅️**
- **해돋이**


### 목차
1. 이미지 불러오기.
2. 데이터 프레임으로 file 경로 및 target 분류.
3. 데이터셋 미세 조정을 통한 데이터 증강 및 메모리 효율성 확보.
4. 데이터 훈련.
5. 데이터 검증.
6. 결론.

## 1. 이미지 불러오기

In [1]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator

root = './datasets/p_Multi-class Weather Dataset/'

IMAGE_SIZE = 128
BATCH_SIZE = 64

idg = ImageDataGenerator(rescale=1./255)

generator = idg.flow_from_directory(root, target_size=(IMAGE_SIZE, IMAGE_SIZE), batch_size=BATCH_SIZE, class_mode='categorical')
print(generator.class_indices)

Found 1125 images belonging to 4 classes.
{'Cloudy': 0, 'Rain': 1, 'Shine': 2, 'Sunrise': 3}


In [4]:
target_name = {v: k for k, v in generator.class_indices.items()}
target_name

{0: 'Cloudy', 1: 'Rain', 2: 'Shine', 3: 'Sunrise'}

In [5]:
target_names = []
for target in generator.classes:
    target_names.append(target_name[target])

In [7]:
import pandas as pd

w_df = pd.DataFrame({'file_paths': generator.filepaths,'target_names': target_names, 'targets': generator.classes})
w_df

Unnamed: 0,file_paths,target_names,targets
0,./datasets/p_Multi-class Weather Dataset/Cloud...,Cloudy,0
1,./datasets/p_Multi-class Weather Dataset/Cloud...,Cloudy,0
2,./datasets/p_Multi-class Weather Dataset/Cloud...,Cloudy,0
3,./datasets/p_Multi-class Weather Dataset/Cloud...,Cloudy,0
4,./datasets/p_Multi-class Weather Dataset/Cloud...,Cloudy,0
...,...,...,...
1120,./datasets/p_Multi-class Weather Dataset/Sunri...,Sunrise,3
1121,./datasets/p_Multi-class Weather Dataset/Sunri...,Sunrise,3
1122,./datasets/p_Multi-class Weather Dataset/Sunri...,Sunrise,3
1123,./datasets/p_Multi-class Weather Dataset/Sunri...,Sunrise,3


In [13]:
from sklearn.model_selection import train_test_split

train_images, test_images, train_targets, test_targets = \
train_test_split(w_df.file_paths, 
                 w_df.targets, 
                 stratify=animal_df.targets, 
                 test_size=0.2, random_state=124)

print(train_targets.value_counts())
print(test_targets.value_counts())

targets
3    286
0    240
2    202
1    172
Name: count, dtype: int64
targets
3    71
0    60
2    51
1    43
Name: count, dtype: int64


In [14]:
train_images, validation_images, train_targets, validation_targets = \
train_test_split(train_images, train_targets, stratify = train_targets, test_size=0.2, random_state=124)

print(train_targets.value_counts())
print(validation_targets.value_counts())
print(test_targets.value_counts())

targets
3    229
0    192
2    162
1    137
Name: count, dtype: int64
targets
3    57
0    48
2    40
1    35
Name: count, dtype: int64
targets
3    71
0    60
2    51
1    43
Name: count, dtype: int64


## 2. 데이터 프레임 분리

In [16]:
train_df = w_df.iloc[train_images.index].reset_index(drop=True)
validation_df = w_df.iloc[validation_images.index].reset_index(drop=True)
test_df = w_df.iloc[test_images.index].reset_index(drop=True)

print(train_df.shape)
print(validation_df.shape)
print(test_df.shape)

(720, 3)
(180, 3)
(225, 3)


## 3. 이미지의 개수가 적기 때문에 데이터셋 미세 조정 및 albumentation.
- (ShiftScaleRotate, HorizontalFlip, RandomBrightnessContrast)

In [None]:
import numpy as np
from tensorflow.keras.utils import Sequence
from sklearn.utils import shuffle
import cv2

IMAGE_SIZE = 256
BATCH_SIZE = 64

class Dataset(Sequence):
    def __init__(self, file_paths, targets, batch_size = BATCH_SIZE, aug=None, preprocess=None, shuffle=False):
        self.file_paths = file_paths
        self.targets = targets
        self.batch_size = batch_size
        self.aug = aug
        self.preprocess = preprocess
        self.shuffle = shuffle

        if self.shuffle:
            self.on_epoch_end()

    def __len__(self):
        return int(np.ceil(len(self.targets) / self.batch_size)


    def __getitem__(self, index):
        file_paths_batch = self.file_paths[index * self.batch_size: (index + 1) * self.batch_size]
        targets_batch = self.targets[index * self.batch_size: (index + 1) * self.batch_size]

        results_batch = np.zeros((file_paths_batch.shape[0], IMAGE_SIZE, IMAGE_SIZE, 3))

        for i in range(file_paths_batch.shape[0]):
            image = cv2.cvtColor(cv2.imread(file_paths_batch[i]), cv2.COLOR_BGR2RGB)
            image = cv2.resize(image, (IMAGE_SIZE, IMAGE_SIZE))

            if self.aug is not None:
                image = self.aug(image=image)['image
    
            if self.preprocess is not None:
                image = self.preprocess(image)

            results_batch[i] = image

        return results_batch, targets_batch


    def on_epoch_end(self):
        if self.shuffle:
            self.file_paths, self.targets = shuffle(self.file_paths, self.targets)

In [None]:
import albumentations as A
from tensorflow.keras.applicatios.xception import preprocess_imput as xception_preprocess_input


IMAGE_SIZE = 256
BATCH_SIZE = 64


train_file_paths = train_df['file_paths'].values
train_targets = pd.get_dummies(train_df['targets']).values

validation_file_paths = validation_df['file_paths'].values
validation_targets = pd.get_dummies(validation_df['targets']).values

test_file_paths = test_df['file_paths'].values
test_targets = pd.get_dummies(test_df['targets']).values


aug = A.Compose([
    A.ShiftScaleRotate(p=0.5),
    A.HorizontalFlip(p=0.5),
    A.RandomBrightnessContrast(brightness_limit=0.3, contrast_limit=0.2, p=0.5)
],p=0.7)


train_dataset = Dataset(train_file_paths, 
                        train_targets, 
                        batch_size = BATCH_SIZE,
                        aug = aug
                        preprocess = xception_preprocess_input, 
                        shuffle= True)

train_dataset = Dataset(validation_file_paths, 
                        validation_targets, 
                        batch_size = BATCH_SIZE,
                        aug = aug
                        preprocess = xception_preprocess_input)

test_dataset = Dataset(test_file_paths, 
                        test_targets, 
                        batch_size = BATCH_SIZE,
                        aug = aug
                        preprocess = xception_preprocess_input)

In [None]:
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense, Conv2D, Dropout, Flatten, Activation, MaxPooling2D, GlobalAveragePooling2D, BatchNormalization
from tensorflow.keras.applications import Xception

def create_model(model_name='vgg16', verbose=False):
    input_tensor = Input(shape = (IMAGE_SIZE, IMAGE_SIZE), 3)
    if model_name == 'vgg16':
        model = VGG16(input_tensor=input_tensor, include_top=False, weight= 'imagenet')
    elif model_name == 'resnet50':
        model = ResNet50V2(input_tensor=input_tensor, include_top=False, weight= 'imagenet')
    elif model_name == 'xception':
        model = Xception(input_tensor=input_tensor, include_top=False, weight= 'imagenet')

    x = model.output

    x = GlobalAveragePooling2D()(x)
    if model_name != 'vgg16':
        x = Dropout(rate=0.5)(x)
    x = Dense(50, activation='relu')(x)
    if model_name != 'vgg16':
        x = Dropout(rate=0.5)(x)
    x = Dense(4, activation='softmax', name ='output')(x)

    model = Model(inputs = input_tensor, outputs = output)

    if verbose:
        model.summary()

    return model
    

In [None]:
from tensorflow.keras.callbacks import ModelCheckpoint, ReduceLROnPlateau, EarlyStopping

mcp_cb = ModelCheckpoint(
    filepath="./callback_files/project02/weights.{epoch:03d}-{val_loss:.4f}-{acc:.4f}.weights.h5",
    monitor='val_loss',
    save_best_only=False,
    save_weights_only=True,
    mode='min'
)

rlr_cb = ReduceLROnPlateau(
    monitor='val_loss',
    factor=0.1,
    patience=2,
    mode='min'
)

ely_cb = EarlyStopping(
    monitor='val_loss',
    patience=4,
    mode='min'
)

In [None]:
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.losses import CategoricalCrossentropy

model = create_model(model_name='xception', verbose=True)
model.compile(optimizer=Adam(), loss=CategoricalCrossentropy(), metrics=['acc'])

In [None]:
N_EPOCHS = 10

history = model.fit(train_dataset,
                    batch_size=BATCH_SIZE,
                    epochs=N_EPOCHS, 
                    validation_data=validation_dataset,
                    callbacks=[mcp_cb, rlr_cb, ely_cb])