# 전이학습

In [1]:
# @title gdown
# https://drive.google.com/file/d/1z-HEvwpoTDbef_EowtGp7mUINlDRLFw7/view?usp=sharing
import gdown, zipfile, os

file_id = '1z-HEvwpoTDbef_EowtGp7mUINlDRLFw7'

gdown.download(f'https://drive.google.com/uc?id={file_id}', 'SUV_kor.zip', quiet=False)

dir = 'SUV-classification2'
os.makedirs(dir, exist_ok=True)  # 없으면 생성

with zipfile.ZipFile('SUV_kor.zip', 'r') as z:
    z.extractall(dir)

ModuleNotFoundError: No module named 'gdown'

In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os

from sklearn.model_selection import train_test_split

def sample(x):

  return x.sample(1000)

def load_data(base_dir='SUV-classification2'):
  images = []
  labels = []

  sampled_images = []
  sampled_labels = []


  base = os.path.join(base_dir, 'SUV')

  for category in [ 'Hyundai', 'Kia', ]:
    category_path = os.path.join(base, category) # vehicle-classification/pics/BMW

    for filename in os.listdir(category_path):
      if '.jpg' in filename:
        file_path = os.path.join(category_path, filename)
        images.append(file_path)
        texts = filename.replace('_','$').replace('-','$')
        texts = texts.split('$')
        labels.append(texts[1])


  df_temp = pd.DataFrame({
    'path': images,
    'label': labels
  })

  return df_temp


In [3]:
#@title 10개 차종 천장씩 분류
df_temp = load_data()

data_set = pd.DataFrame()

for g in df_temp['label'].unique():
  if(len(df_temp[df_temp['label'] == g].value_counts())>1000):
    temp_df = df_temp[df_temp['label'] == g].sample(1000)
    data_set = pd.concat([data_set, temp_df])


print(data_set['label'].value_counts())
print(data_set.head())

# # 데이터 로드 및 분리
# (tr_images, tr_labels), (test_images, test_labels) = load_data()

# # 입력 데이터 전처리
# test_images = test_images.reshape(-1, 28, 28, 1) / 255.0  # 크기 및 정규화
# test_labels = test_labels  # 레이블은 그대로 사용

# 훈련/test 셋 분할

tr_images, test_images, tr_labels, test_labels = train_test_split(data_set['path'], data_set['label'], test_size=0.2, random_state=0)

tr_images.shape, tr_labels.shape, test_images.shape, test_labels.shape  #images path


label
Tucson      1000
Santafe     1000
Palisade    1000
Veracruz    1000
Kona        1000
Seltos      1000
Carens      1000
Sorento     1000
Sportage    1000
Mohave      1000
Soul        1000
Niro        1000
Name: count, dtype: int64
                                                    path   label
12282  SUV-classification2/SUV/Hyundai/SUV_Tucson-153...  Tucson
16202  SUV-classification2/SUV/Hyundai/SUV_Tucson-103...  Tucson
6249   SUV-classification2/SUV/Hyundai/SUV_Tucson-547...  Tucson
4869   SUV-classification2/SUV/Hyundai/SUV_Tucson-449...  Tucson
17918  SUV-classification2/SUV/Hyundai/SUV_Tucson-650...  Tucson


((9600,), (9600,), (2400,), (2400,))

In [None]:
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from sklearn.preprocessing import LabelEncoder
import numpy as np

# 이미지 로딩 및 전처리 함수
def preprocess_image(image_path, image_size):
    image = load_img(image_path, target_size=(image_size, image_size))
    image = img_to_array(image)
    image = image / 255.0  # 정규화
    return image

# 이미지 데이터 로드
IMAGE_SIZE = 299
tr_images = np.array([preprocess_image(path, IMAGE_SIZE) for path in tr_images])
test_images = np.array([preprocess_image(path, IMAGE_SIZE) for path in test_images])

# 레이블 인코딩
label_encoder = LabelEncoder()
tr_labels = label_encoder.fit_transform(tr_labels)
test_labels = label_encoder.transform(test_labels)


##  Xception 모델 로드

In [None]:
#Xception은 299*299 사이즈로 전달해줘야 함

from tensorflow.keras import applications
pretrained_model = applications.Xception()
pretrained_model.summary()

## Car10Sequence

In [None]:
from tensorflow.keras.utils import Sequence

from sklearn.utils import shuffle
import numpy as np
import cv2

BATCH_SIZE = 64
IMAGE_SIZE = 229

class Car10Sequence(Sequence):
  def __init__(self, images, labels,
               batch_size=BATCH_SIZE, image_size=IMAGE_SIZE, augmentor=None,
               shuffle=False, preprocess_function=None):
    self.images = images # path가 아닌 이미지 픽셀 데이터(ndarray)
    self.labels = labels # fit/evaluate에는 label데이터가 있지만, predict에는 label데이터가 없다.
    self.batch_size = batch_size
    self.image_size = image_size
    self.augmentor = augmentor
    self.shuffle = shuffle
    self.preprocess_function = preprocess_function # 스케일링 함수
    self.on_epoch_end() # shuffle 처리


  def __len__(self):
    # 이 데이터셋은 몇개의 batch로 구성되었는가
    return int(np.ceil(len(self.images) / self.batch_size))

  def __getitem__(self, index):
    # 이번 배치용 이미지/라벨 선정
    start = index * self.batch_size
    end = (index + 1) * self.batch_size
    this_batch_images = self.images[start:end]
    batch_labels = self.labels[start:end] if self.labels is not None else None

    batch_images = np.zeros((this_batch_images.shape[0], self.image_size, self.image_size, 3), dtype=np.float32)

    for i in range(this_batch_images.shape[0]): # 이번 배치 이미지만큼 반복
      image = this_batch_images[i]
      # 데이터 증강
      #if self.augmentor is not None:
      # image = self.augmentor(image)['image']
      # 리사이즈
      image = cv2.resize(image, (self.image_size, self.image_size))
      # 스케일링
      if self.preprocess_function is not None:
        image = self.preprocess_function(image)

      batch_images[i] = image

    return (batch_images, batch_labels) if self.labels is not None else batch_images

  def on_epoch_end(self):
    if self.shuffle:
      self.images, self.labels = shuffle(self.images, self.labels)



## 분류를 위한 전이학습

In [None]:
#weights 수정 /
'''
weights=None
설명:
weights=None으로 설정하면 모델의 가중치가 무작위로 초기화됩니다.
모델이 처음부터 새 데이터에 대해 학습하도록 하려는 경우에 사용됩니다.
사용 시점:
데이터셋이 ImageNet 데이터와 완전히 다른 도메인(예: 의료 영상, 위성 이미지 등)일 때.
전이 학습이 불가능하거나 기존 가중치가 도움이 되지 않는다고 판단되는 경우.
모델을 처음부터 끝까지 새로 훈련시키고 싶을 때
'''
from tensorflow.keras import layers
from tensorflow.keras import models
base_model = applications.Xception(
    input_shape=(IMAGE_SIZE, IMAGE_SIZE, 3),
    include_top=False,
    weights='imagenet'
)
#모델의 실행 결과를 인풋으로 불러온다.
x = base_model.output
x = layers.GlobalAveragePooling2D()(x)
x = layers.Dropout(0.3)(x)
x = layers.Dense(500, activation='relu', kernel_initializer='he_normal')(x)
x = layers.Dropout(0.3)(x)
output = layers.Dense(10, activation='softmax')(x)
model = models.Model(base_model.input, output)
model.summary()

In [None]:
print(tr_images.shape, test_labels.shape, tr_labels.shape, test_labels.shape)

## 모델 컴파일 부터 시각화까지

In [None]:
print(model.input_shape)  # 예: (None, 28, 28, 1)
print(test_images.shape)  # 모델 입력과 동일한지 확인


In [None]:
#모델 컴파일
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau

model.compile(
    loss='categorical_crossentropy',
    optimizer='adam',
    metrics=['accuracy']
)

early_stopping_cb = EarlyStopping(patience=10, verbose=1, restore_best_weights=True)
reduce_lr_on_plateau_cb = ReduceLROnPlateau(patience=5, factor=0.5, verbose=1)

#학습
history = model.fit(epochs=30,
    validation_data=(test_images, test_labels),
    callbacks=[early_stopping_cb, reduce_lr_on_plateau_cb])

#학습결과 시각화
pd.DataFrame(history.history).plot()
plt.show()


In [None]:
# 평가
loss, acc = model.evaluate(test_seq)
print(f'loss:{loss:.4f}, acc:{acc:.4f}')