<a href="https://colab.research.google.com/github/park-hoyeon/park-hoyeon.github.io/blob/master/skt_7_01_CNN.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from tensorflow.keras.datasets import fashion_mnist
(X_train, y_train), (X_test, y_test) = fashion_mnist.load_data()
print(X_train.shape, y_train.shape)
print(X_test.shape, y_test.shape)

In [None]:
import numpy as np
import matplotlib.pyplot as plt

# 이미지 확인
class_names = ['T-shirt/top', 'Trouser', 'Pullover',
               'Dress', 'Coat', 'Sandal',
               'Shirt', 'Sneaker', 'Bag', 'Ankle boot']
samples = np.random.randint(len(X_train), size=9)
plt.figure(figsize = (8, 6))
for i, idx in enumerate(samples):
  plt.subplot(3, 3, i+1)
  plt.xticks([])
  plt.yticks([])
  plt.imshow(X_train[idx], cmap = 'gray')
  plt.title(class_names[y_train[idx]])
plt.show()

In [None]:
# 검증용 데이터를 훈련용 데이터에서 분리한다.
from sklearn.model_selection import train_test_split
X_train, X_val, y_train, y_val = train_test_split(
            X_train, y_train, test_size = 0.3, random_state = 42)
print(X_train.shape, y_train.shape)
print(X_val.shape, y_val.shape)

In [None]:
# 이미지 데이터의 정규화
import numpy as np
X_train = X_train.astype('float32') / 255.
X_val = X_val.astype('float32') / 255.
X_test = X_test.astype('float32') / 255.
print(np.max(X_train), np.min(X_train))

In [None]:
# 데이터 Shape 확인
print('X_train : ', X_train.shape)
print('X_val : ', X_val.shape)
print('X_test : ', X_test.shape)

In [None]:
# 채널 축 추가
import tensorflow as tf
X_train = X_train[..., tf.newaxis]
X_val = X_val[..., tf.newaxis]
X_test = X_test[..., tf.newaxis]
print('X_train : ', X_train.shape)
print('X_val : ', X_val.shape)
print('X_test : ', X_test.shape)

In [None]:
# 모델 만들기
from tensorflow import keras
from tensorflow.keras import layers
def build_model():
  model = keras.Sequential()
  model.add(layers.Conv2D(filters=16, kernel_size= 3,
        strides=(1, 1), padding='same', activation='relu',
        input_shape=(28, 28, 1)))
  model.add(layers.MaxPool2D(pool_size=(2, 2), strides=2)) #rgb이미지는 3차원이니까 2D로 안됨.
  model.add(layers.Conv2D(filters=32, kernel_size= 3,
        strides=(1, 1), padding='same', activation='relu'))
  model.add(layers.MaxPool2D(pool_size=(2, 2), strides=2))

  model.add(layers.Conv2D(filters=64, kernel_size= 3,
        strides=(1, 1), padding='same', activation='relu'))
  model.add(layers.MaxPool2D(pool_size=(2, 2), strides=2))

  model.add(layers.Flatten())
  model.add(layers.Dense(64, activation = 'relu'))
  model.add(layers.Dense(10, activation = 'softmax'))
  return model
model = build_model()
model.summary()

In [None]:
# 모델의 컴파일 - 다중 분류 모델의 학습은 손실함수로 'categorical_crossentropy'를 사용한다.
adam = tf.keras.optimizers.Adam(learning_rate=0.001)
model.compile(optimizer=adam,
       loss = 'categorical_crossentropy',
       metrics=['acc'])
EPOCHS = 30
BATCH_SIZE = 64
# Make sure to run the cell with one-hot encoding for y_train_oh and y_val_oh before this cell.
history = model.fit(X_train, y_train_oh,
           epochs = EPOCHS,
           batch_size = BATCH_SIZE,
           validation_data = (X_val, y_val_oh),
           verbose = 1)

In [None]:
# 타겟 데이터의 원-핫 인코딩
from tensorflow.keras.utils import to_categorical

y_train_oh = to_categorical(y_train)
y_val_oh = to_categorical(y_val)
y_test_oh = to_categorical(y_test)

print('y_train_oh : ', y_train_oh.shape)
print('y_val_oh : ', y_val_oh.shape)
print('y_test_oh : ', y_test_oh.shape)

In [None]:
# 학습 곡선
import matplotlib.pyplot as plt
import pandas as pd
def plot_history(history):
 hist = pd.DataFrame(history.history)
 hist['epoch'] = history.epoch
 plt.figure(figsize=(16,8))
 plt.subplot(1,2,1)
 plt.xlabel('Epoch')
 plt.ylabel('Loss')
 plt.plot(hist['epoch'], hist['loss'], label='Train Loss')
 plt.plot(hist['epoch'], hist['val_loss'],label = 'Val Loss')
 plt.legend()
 plt.subplot(1,2,2)
 plt.xlabel('Epoch')
 plt.ylabel('Accuracy')
 plt.plot(hist['epoch'], hist['acc'], label='Train Accuracy')
 plt.plot(hist['epoch'], hist['val_acc'], label = 'Val Accuracy')
 plt.legend()
 plt.show() # Add plt.show() here

plot_history(history)

왼쪽 그래프는 좋지 않은 결과.

# 드롭아웃(Dropout)

In [None]:
# 드롭아웃 레리어를 추가한 모델 구성
from tensorflow import keras
from tensorflow.keras import layers
def build_dropout_model():
  model = keras.Sequential()

  model.add(layers.Conv2D(filters=16, kernel_size= 3,
         strides=(1, 1), padding='same', activation='relu',
         input_shape=(28, 28, 1)))
  model.add(layers.MaxPool2D(pool_size=(2, 2), strides=2))
  model.add(layers.Dropout(0.2))
  model.add(layers.Conv2D(filters=32, kernel_size= 3,
        strides=(1, 1), padding='same', activation='relu'))
  model.add(layers.MaxPool2D(pool_size=(2, 2), strides=2))
  model.add(layers.Dropout(0.2))
  model.add(layers.Conv2D(filters=64, kernel_size= 3,
        strides=(1, 1), padding='same', activation='relu'))
  model.add(layers.MaxPool2D(pool_size=(2, 2), strides=2))
  model.add(layers.Dropout(0.5))
  model.add(layers.Flatten())
  model.add(layers.Dense(64, activation = 'relu'))
  model.add(layers.Dense(10, activation = 'softmax'))

  return model
model = build_dropout_model()
model.summary()

In [None]:
# 학습 곡선
plot_history(history)

In [None]:
# 예측
y_pred = model.predict(X_test)
y_pred_argmax = np.argmax(y_pred, axis=1)
y_pred_argmax[:10]


In [None]:
# 평가 지표 계산
from sklearn.metrics import accuracy_score, precision_score,
recall_score, f1_score
def print_score(y_test, y_pred):
 print('accuracy: %.3f' % (accuracy_score(y_test, y_pred)))
 print('precision: %.3f' % (precision_score(y_test, y_pred, average='macro')))
 print('recall_score: %.3f' % (recall_score(y_test, y_pred, average='macro')))
 print('f1_score: %.3f' % (f1_score(y_test, y_pred, average='macro')))

In [None]:
from tensorflow.keras.datasets import fashion_mnist
(X_train, y_train), (X_test, y_test) = fashion_mnist.load_data()
print(X_train.shape, y_train.shape)
print(X_test.shape, y_test.shape)

In [None]:
import numpy as np
import matplotlib.pyplot as plt

# 이미지 확인
class_names = ['T-shirt/top', 'Trouser', 'Pullover',
               'Dress', 'Coat', 'Sandal',
               'Shirt', 'Sneaker', 'Bag', 'Ankle boot']
samples = np.random.randint(len(X_train), size=9)
plt.figure(figsize = (8, 6))
for i, idx in enumerate(samples):
  plt.subplot(3, 3, i+1)
  plt.xticks([])
  plt.yticks([])
  plt.imshow(X_train[idx], cmap = 'gray')
  plt.title(class_names[y_train[idx]])
plt.show()

In [None]:
# 검증용 데이터를 훈련용 데이터에서 분리한다.
from sklearn.model_selection import train_test_split
X_train, X_val, y_train, y_val = train_test_split(
            X_train, y_train, test_size = 0.3, random_state = 42)
print(X_train.shape, y_train.shape)
print(X_val.shape, y_val.shape)

In [None]:
# 이미지 데이터의 정규화
import numpy as np
X_train = X_train.astype('float32') / 255.
X_val = X_val.astype('float32') / 255.
X_test = X_test.astype('float32') / 255.
print(np.max(X_train), np.min(X_train))

In [None]:
# 채널 축 추가
import tensorflow as tf
X_train = X_train[..., tf.newaxis]
X_val = X_val[..., tf.newaxis]
X_test = X_test[..., tf.newaxis]
print('X_train : ', X_train.shape)
print('X_val : ', X_val.shape)
print('X_test : ', X_test.shape)

In [None]:
# 타겟 데이터의 원-핫 인코딩
from tensorflow.keras.utils import to_categorical

y_train_oh = to_categorical(y_train)
y_val_oh = to_categorical(y_val)
y_test_oh = to_categorical(y_test)

print('y_train_oh : ', y_train_oh.shape)
print('y_val_oh : ', y_val_oh.shape)
print('y_test_oh : ', y_test_oh.shape)

In [None]:
# 드롭아웃 레리어를 추가한 모델 구성
from tensorflow import keras
from tensorflow.keras import layers
def build_dropout_model():
  model = keras.Sequential()

  model.add(layers.Conv2D(filters=16, kernel_size= 3,
         strides=(1, 1), padding='same', activation='relu',
         input_shape=(28, 28, 1)))
  model.add(layers.MaxPool2D(pool_size=(2, 2), strides=2))
  model.add(layers.Dropout(0.2))
  model.add(layers.Conv2D(filters=32, kernel_size= 3,
        strides=(1, 1), padding='same', activation='relu'))
  model.add(layers.MaxPool2D(pool_size=(2, 2), strides=2))
  model.add(layers.Dropout(0.2))
  model.add(layers.Conv2D(filters=64, kernel_size= 3,
        strides=(1, 1), padding='same', activation='relu'))
  model.add(layers.MaxPool2D(pool_size=(2, 2), strides=2))
  model.add(layers.Dropout(0.5))
  model.add(layers.Flatten())
  model.add(layers.Dense(64, activation = 'relu'))
  model.add(layers.Dense(10, activation = 'softmax'))

  return model
model = build_dropout_model()
model.summary()

In [None]:
# 컴파일 및 학습
# Make sure to run cell KQ61MEfEkxPk to build the model before running this cell.
model.compile(optimizer='adam',
        loss = 'categorical_crossentropy',
        metrics=['acc'])

EPOCHS = 30
BATCH_SIZE = 64
history = model.fit(X_train, y_train_oh,
           epochs = EPOCHS,
           batch_size = BATCH_SIZE,
           validation_data = (X_val, y_val_oh),
           verbose = 1)

In [None]:
# 학습 곡선
plot_history(history)

In [None]:
# 학습 곡선
import matplotlib.pyplot as plt
import pandas as pd
def plot_history(history):
 hist = pd.DataFrame(history.history)
 hist['epoch'] = history.epoch
 plt.figure(figsize=(16,8))
 plt.subplot(1,2,1)
 plt.xlabel('Epoch')
 plt.ylabel('Loss')
 plt.plot(hist['epoch'], hist['loss'], label='Train Loss')
 plt.plot(hist['epoch'], hist['val_loss'],label = 'Val Loss')
 plt.legend()
 plt.subplot(1,2,2)
 plt.xlabel('Epoch')
 plt.ylabel('Accuracy')
 plt.plot(hist['epoch'], hist['acc'], label='Train Accuracy')
 plt.plot(hist['epoch'], hist['val_acc'], label = 'Val Accuracy')
 plt.legend()
 plt.show() # Add plt.show() here

plot_history(history)

In [None]:
# 예측
y_pred = model.predict(X_test)
y_pred_argmax = np.argmax(y_pred, axis=1)
y_pred_argmax[:10]

In [None]:
# 평가지표계산
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
def print_score(y_test, y_pred):
 print('accuracy: %.3f' % (accuracy_score(y_test, y_pred)))
 print('precision: %.3f' % (precision_score(y_test, y_pred,
average='macro')))
 print('recall_score: %.3f' % (recall_score(y_test, y_pred,
average='macro')))
 print('f1_score: %.3f' % (f1_score(y_test, y_pred,
average='macro')))

print_score(y_test, y_pred_argmax)

# 컬러 이미지의 분류



In [None]:
from tensorflow.keras.datasets import cifar10
(X_train, y_train), (X_test, y_test) = cifar10.load_data()
print(X_train.shape, y_train.shape)
print(X_test.shape, y_test.shape)

In [None]:
# 데이터 표시
class_names = ['airplane', 'automobile', 'bird', 'cat', 'deer',
               'dog', 'frog', 'horse', 'ship', 'truck']
samples = np.random.randint(len(X_train), size=10)
plt.figure(figsize=(12, 5))
for i, idx in enumerate(samples):
  plt.subplot(2, 5, i+1, xticks=[], yticks=[])
  plt.title((class_names[y_train[idx][0]]))
  plt.imshow(X_train[idx])
plt.show()

In [None]:
# 정규화 처리 -    최소-최대	정규화	처리
X_train = X_train.astype('float32') / 255.0
X_test = X_test.astype('float32') / 255.0

In [None]:
# 검증용 데이터 분리
from sklearn.model_selection import train_test_split
X_train, X_val, y_train, y_val = train_test_split(
           X_train, y_train, test_size = 0.3,  random_state = 42)
print(X_train.shape, y_train.shape)
print(X_val.shape, y_val.shape)
print(X_test.shape, y_test.shape)

In [None]:
# 레이블 데이터 shape 변경 - 레이블 데이터를 2차원에서 1차원으로 변경
y_train = y_train.reshape(-1)
y_val = y_val.reshape(-1)
y_test = y_test.reshape(-1)
print(y_train.shape)
print(y_val.shape)
print(y_test.shape)

In [None]:
# 원-핫 인코딩
import tensorflow as tf
y_train_oh = tf.one_hot(y_train, depth=10)
y_val_oh = tf.one_hot(y_val, depth=10)
y_test_oh = tf.one_hot(y_test, depth=10)
print(y_train_oh.shape)
y_train_oh[:5]

In [None]:
#넘파이 배열로 변환
y_train_oh = y_train_oh.numpy()
y_val_oh = y_val_oh.numpy()
y_test_oh = y_test_oh.numpy()
print(y_train_oh.shape)
print(y_val_oh.shape)
print(y_test_oh.shape)

In [None]:
# 모델 만들기
from tensorflow.keras import layers
def build_model():
  model = keras.Sequential()
  model.add(layers.Conv2D(32, 3, padding = 'same',
        activation='relu', input_shape = (32, 32, 3)))
  model.add(layers.MaxPooling2D(2))
  model.add(layers.Dropout(0.3))
  model.add(layers.Conv2D(64, 3, padding = 'same',
        activation='relu'))
  model.add(layers.MaxPooling2D(2))
  model.add(layers.Dropout(0.3))
  model.add(layers.Conv2D(256, 3, padding = 'same',
        activation='relu'))
  model.add(layers.MaxPooling2D(2))
  model.add(layers.Dropout(0.3))
  model.add(layers.Conv2D(256, 3, padding = 'same',
        activation='relu'))
  model.add(layers.MaxPooling2D(2))
  model.add(layers.Dropout(0.3))
  model.add(layers.Flatten())
  model.add(layers.Dense(256, activation='relu'))
  model.add(layers.Dense(10, activation='softmax'))
  return model

model = build_model()
model.summary()

In [None]:
# 모델 학습
adam = tf.keras.optimizers.Adam(learning_rate=0.0001)
model.compile(optimizer=adam,
         loss = 'categorical_crossentropy',
        metrics=['acc'])
EPOCHS = 100
BATCH_SIZE = 256
history = model.fit(X_train, y_train_oh,
          epochs = EPOCHS,
          batch_size = BATCH_SIZE,
          validation_data = (X_val, y_val_oh))

In [None]:
# 학습 곡선
plot_history(history)

In [None]:
#예측
y_pred = model.predict(X_test)
y_pred_argmax = np.argmax(y_pred, axis=1)

In [None]:
# 혼동행렬 시각화
from sklearn.metrics import confusion_matrix
import seaborn as sns
def plot_matrix(y_test, y_pred):
  plt.figure(figsize = (10, 8))
  cm = confusion_matrix(y_test, y_pred)
  sns.heatmap(cm, annot = True, fmt = 'd',cmap = 'Blues')
  plt.xlabel('predicted label', fontsize = 15)
  plt.ylabel('true label', fontsize = 15)
  plt.show()

plot_matrix(y_test, y_pred_argmax)

# 콜백함수 적용

In [None]:
import tensorflow as tf # Import tensorflow as tf
from tensorflow import keras # Import keras

model = build_model()
adam = tf.keras.optimizers.Adam(learning_rate=0.0001)
model.compile(optimizer=adam,
         loss = 'categorical_crossentropy',
         metrics=['acc'])

In [None]:
from tensorflow.keras import callbacks
checkpoint_path = 'temp/cifar_10.weights.h5'
checkpoint = callbacks.ModelCheckpoint(checkpoint_path,
               save_weights_only=True,
               save_best_only=True,
               monitor='val_loss')

In [None]:
# 모델 학습
EPOCHS = 100
BATCH_SIZE = 256
history = model.fit(X_train, y_train_oh,
           epochs = EPOCHS,
           batch_size = BATCH_SIZE,
           validation_data = (X_val, y_val_oh),
           callbacks=[checkpoint])

# 위의 오류 해결법


In [None]:
# Load CIFAR-10 dataset
from tensorflow.keras.datasets import cifar10
(X_train, y_train), (X_test, y_test) = cifar10.load_data()
print(X_train.shape, y_train.shape)
print(X_test.shape, y_test.shape)

In [None]:
# Normalize image data
X_train = X_train.astype('float32') / 255.0
X_test = X_test.astype('float32') / 255.0

In [None]:
# Split validation data
from sklearn.model_selection import train_test_split
X_train, X_val, y_train, y_val = train_test_split(
           X_train, y_train, test_size = 0.3,  random_state = 42)
print(X_train.shape, y_train.shape)
print(X_val.shape, y_val.shape)
print(X_test.shape, y_test.shape)

In [None]:
# Reshape labels
y_train = y_train.reshape(-1)
y_val = y_val.reshape(-1)
y_test = y_test.reshape(-1)
print(y_train.shape)
print(y_val.shape)
print(y_test.shape)

In [None]:
# One-hot encode labels
import tensorflow as tf
from tensorflow.keras.utils import to_categorical

y_train_oh = to_categorical(y_train)
y_val_oh = to_categorical(y_val)
y_test_oh = to_categorical(y_test)

print(y_train_oh.shape)
y_train_oh[:5]

In [None]:
# Build the model
from tensorflow import keras
from tensorflow.keras import layers

def build_model():
  model = keras.Sequential()
  model.add(layers.Conv2D(32, 3, padding = 'same',
        activation='relu', input_shape = (32, 32, 3)))
  model.add(layers.MaxPooling2D(2))
  model.add(layers.Dropout(0.3))
  model.add(layers.Conv2D(64, 3, padding = 'same',
        activation='relu'))
  model.add(layers.MaxPooling2D(2))
  model.add(layers.Dropout(0.3))
  model.add(layers.Conv2D(256, 3, padding = 'same',
        activation='relu'))
  model.add(layers.MaxPooling2D(2))
  model.add(layers.Dropout(0.3))
  model.add(layers.Conv2D(256, 3, padding = 'same',
        activation='relu'))
  model.add(layers.MaxPooling2D(2))
  model.add(layers.Dropout(0.3))
  model.add(layers.Flatten())
  model.add(layers.Dense(256, activation='relu'))
  model.add(layers.Dense(10, activation='softmax'))
  return model

model = build_model()
model.summary()

In [None]:
# Define ModelCheckpoint callback
from tensorflow.keras import callbacks
checkpoint_path = 'temp/cifar_10.weights.h5'
checkpoint = callbacks.ModelCheckpoint(checkpoint_path,
               save_weights_only=True,
               save_best_only=True,
               monitor='val_loss')

In [None]:
# Compile and train the model
adam = tf.keras.optimizers.Adam(learning_rate=0.0001)
model.compile(optimizer=adam,
         loss = 'categorical_crossentropy',
        metrics=['acc'])
EPOCHS = 100
BATCH_SIZE = 256
history = model.fit(X_train, y_train_oh,
          epochs = EPOCHS,
          batch_size = BATCH_SIZE,
          validation_data = (X_val, y_val_oh),
          callbacks=[checkpoint])

In [None]:
# 상태 복원 - 파일로 저장되어 있는 최사으이 상태(가중치)를 모델에 복원한다.
model.load_weights(checkpoint_path)


# 배치 정규화
- 과대	적합을	방지하는	효과

In [None]:
# 모델 구성
def build_model_bach_normalization():
    model = keras.Sequential()

    model.add(layers.Conv2D(32, 3, padding = 'same', input_shape =
(32, 32, 3)))
    model.add(layers.BatchNormalization())
    model.add(layers.Activation('relu'))
    model.add(layers.MaxPooling2D(2))

    model.add(layers.Conv2D(64, 3, padding = 'same'))
    model.add(layers.BatchNormalization())
    model.add(layers.Activation('relu'))
    model.add(layers.MaxPooling2D(2))

    model.add(layers.Conv2D(256, 3, padding = 'same'))
    model.add(layers.BatchNormalization())
    model.add(layers.Activation('relu'))
    model.add(layers.MaxPooling2D(2))

    model.add(layers.Conv2D(256, 3, padding = 'same'))
    model.add(layers.BatchNormalization())
    model.add(layers.Activation('relu'))
    model.add(layers.MaxPooling2D(2))
    model.add(layers.Flatten())
    model.add(layers.Dense(256, activation='relu'))
    model.add(layers.Dense(10, activation='softmax'))
    return model

In [None]:
model = build_model_bach_normalization()
model.summary()

In [None]:
# 모델 학습
adam = tf.keras.optimizers.Adam(learning_rate=0.0001)
model.compile(optimizer=adam,
              loss = 'categorical_crossentropy',
              metrics=['acc'])

In [None]:
EPOCHS = 100
BATCH_SIZE = 256
history = model.fit(X_train, y_train_oh,
                    epochs = EPOCHS,
                    batch_size = BATCH_SIZE,
                    validation_data = (X_val, y_val_oh))

In [None]:
# 모델 평가
loss, acc = model.evaluate(X_test, y_test_oh)
print('loss : ', loss)
print('acc : ', acc)

# 데이터 증강
- 기존의 데이터를 변형해서 데이터의 양을 늘리는 것


# 전이학습 (Transfer Learning)
- 대용량의	데이터	세트에서	학습	완료된	모델(VGG16,	Resnet50	등)을	이용해서	우리가	해결하려
는	문제에	적용하기	위해	미세	조정(fine	tuning)하는	것

In [None]:
from tensorflow.keras.datasets import cifar10
(X_train, y_train), (X_test, y_test) = cifar10.load_data()
print(X_train.shape, y_train.shape)
print(X_test.shape, y_test.shape)

In [None]:
# 학습 완료된 모델 준비
from tensorflow.keras.applications import VGG16
vgg16 = VGG16(weights = 'imagenet',
             input_shape = (32, 32, 3),
             include_top = False)
vgg16.trainable=False

In [None]:
# 분류 부분의 모델 구성
model = keras.Sequential()
model.add(vgg16)
model.add(keras.layers.Flatten())
model.add(layers.Dense(256, activation='relu'))
model.add(layers.Dropout(0.3))
model.add(layers.Dense(64, activation='relu'))
model.add(layers.Dropout(0.3))
model.add(keras.layers.Dense(10, activation = 'softmax'))

In [None]:
# 분류 부분의 모델 구성
model.compile(optimizer = keras.optimizers.Adam(0.001),
       loss = 'categorical_crossentropy',
       metrics = ['acc'])
model.summary()

In [None]:
# 콜백 생성
from tensorflow.keras.callbacks import ReduceLROnPlateau
lrr = ReduceLROnPlateau(monitor='val_acc',
             patience=3,
             verbose=1,
             factor=0.8,
             min_lr=0.0001)

# 바운딩 박스 근사


In [None]:
# 바운딩 박스 정보 추출
import glob
import xml.etree.ElementTree as ET
import pandas as pd # Import pandas

def xml_to_csv(path):
  xml_list = []
  for xml_file in glob.glob(path + '/*.xml'):
    tree = ET.parse(xml_file)
    root = tree.getroot()
    for member in root.findall('object'):
      bbx = member.find('bndbox')
      xmin = int(bbx.find('xmin').text)
      ymin = int(bbx.find('ymin').text)
      xmax = int(bbx.find('xmax').text)
      ymax = int(bbx.find('ymax').text)
      label = member.find('name').text
      value = (root.find('filename').text,
            int(root.find('size')[0].text),
            int(root.find('size')[1].text),
            label, xmin, ymin, xmax, ymax)
      xml_list.append(value)

  column_name = ['filename', 'width', 'height',
           'class', 'xmin', 'ymin', 'xmax', 'ymax']
  xml_df = pd.DataFrame(xml_list, columns=column_name)
  return xml_df

In [None]:
import os
import pandas as pd

# Define IMAGE_PATH - **Replace '/content/your_image_directory' with the actual path to your XML files**
IMAGE_PATH = '/content/your_image_directory'

xml_df = xml_to_csv(IMAGE_PATH)
csv_path = os.path.join(IMAGE_PATH, 'labels_cats.csv')
xml_df.to_csv(csv_path, index=None)
print('csv path:', csv_path)

In [None]:
import os

# 원래 파일명
original_path = "/content/(20')CCTV학습데이터셋(35,200장).egg"

# 새 이름으로 변경
new_path = "/content/cctv_dataset.egg"
os.rename(original_path, new_path)


In [None]:
# 1. 우선 unzip 시도
!unzip -q /content/cctv_dataset.egg -d /content/cctv_data


In [None]:
import os

# 압축 해제된 폴더 구조 확인
for root, dirs, files in os.walk("/content/cctv_data"):
    print(f"📁 {root}")
    for f in files[:10]:  # 처음 10개만 보기
        print("  └─", f)
    print()
