In [None]:
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import os
os.environ["OPENCV_IO_MAX_IMAGE_PIXELS"] = pow(2,40).__str__() 
import tensorflow as tf
import cv2
from glob import glob
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras import models
from tensorflow.keras.models import load_model, save_model
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import load_img
from sklearn.metrics import f1_score
from PIL import Image
Image.MAX_IMAGE_PIXELS = None

In [None]:
df = pd.read_csv("/kaggle/input/UBC-OCEAN/train.csv")
df.head()

In [None]:
# is_tma가 Ture면 이미지를 가져와 읽을 수 없으므로 is_tma가 True이면 제거하고 사용해야한다.
df.drop(df[df["is_tma"] == True].index, inplace=True)

In [None]:
df.info()

In [None]:
df["label"].value_counts()

In [None]:
fig, ax = plt.subplots()

fruits = ["FULL LABEL", "HGSC", "EC", "CC", "LGSC", "MC"]
counts = [len(df["label"]), len(df[df["label"] == "HGSC"]), len(df[df["label"] == "EC"]), len(df[df["label"] == "CC"]), len(df[df["label"] == "LGSC"]), len(df[df["label"] == "MC"])]

bar_labels = ['pink', 'red', 'blue', 'red', 'orange', 'gray']

ax.bar(fruits, counts, label=bar_labels, color=bar_labels)

ax.set_ylabel('fruit supply')
ax.set_title('Fruit supply by kind and color')
ax.legend(title='Fruit color')

plt.show()

In [None]:
fig, ax = plt.subplots()

fruits = ["HGSC", "EC", "CC", "LGSC", "MC"]
counts = [len(df[df["label"] == "HGSC"]), len(df[df["label"] == "EC"]), len(df[df["label"] == "CC"]), len(df[df["label"] == "LGSC"]), len(df[df["label"] == "MC"])]

bar_labels = ['red', 'blue', 'red', 'orange', 'gray']

ax.bar(fruits, counts, label=bar_labels, color=bar_labels)

ax.set_ylabel('fruit supply')
ax.set_title('Fruit supply by kind and color')
ax.legend(title='Fruit color')

plt.show()


In [None]:
def label_extract(df, label_name = None):
    save_list_id = []
    image_index = df[df["label"] == label_name].index
    save_list_id = (df.loc[image_index, "image_id"].values)
    return save_list_id

In [None]:
# def load_img(img_path = None):
#     img_list = []
#     for img_number in img_path:
#         img = cv2.imread(img_number)
#         img = cv2.resize(img, (640, 640))
#         img_list.append(img)
#     return np.array(img_list)

In [None]:
def load_img(img_path = None):
    for img_number in img_path:
        img = cv2.imread(img_number)
        img = cv2.resize(img, (640, 640))
        img = img / 255.0
        return img

In [None]:
HGSC_id = np.array(label_extract(df, "HGSC"))
EC_id = np.array(label_extract(df, "EC"))
CC_id = np.array(label_extract(df, "CC"))
LGSC_id = np.array(label_extract(df, "LGSC"))
MC_id = np.array(label_extract(df, "MC"))

In [None]:
img_id = np.concatenate((HGSC_id, EC_id, CC_id, LGSC_id, MC_id), axis=0)

In [None]:
show_img_id = np.concatenate((HGSC_id[:5], EC_id[:5], CC_id[:5], LGSC_id[:5], MC_id[:5]), axis=0)

In [None]:
load_thum_img = []
for UBC_img_id in img_id:
    thum_img = []
    thum_img = glob(f"/kaggle/input/UBC-OCEAN/train_thumbnails/{UBC_img_id}_thumbnail.png")
    load_thum_img.append(load_img(thum_img))

In [None]:
load_show_img = []
for UBC_img_id in show_img_id:
    thum_img = []
    thum_img = glob(f"/kaggle/input/UBC-OCEAN/train_thumbnails/{UBC_img_id}_thumbnail.png")
    load_show_img.append(load_img(thum_img))

In [None]:
plt.imshow(load_thum_img[0])
plt.show()

In [None]:
Image_name = ["HGSC", "EC", "CC", "LGSC", "MC"]
def show_image(row, col, Image_name, show_img):
    index = 0
    fig, ax = plt.subplots(row, col, figsize=(20, 20))
    for i in range(row):
        for j in range(col):
            ax[i][j].imshow(show_img[index], cmap="gray")
            ax[i][j].set_title(f"{Image_name[i]} Image{index}")
            ax[i][j].axis("off")
            index += 1
    plt.show()
show_image(5, 5, Image_name, load_show_img)

In [None]:
# num_rows, num_cols = 5, 5
# Image_name = ["HGSC", "EC", "CC", "LGSC", "MC"]
# index = 0

# fig, axes = plt.subplots(num_rows, num_cols, figsize=(20, 20))

# for i in range(num_rows):
#     for j in range(num_cols):
#         axes[i, j].imshow(load_show_img[j], cmap='gray')
#         axes[i, j].set_title(f'{Image_name[i]} Image{index + 1}')
#         axes[i, j].axis('off')
# plt.show()

In [None]:
def label_generater(image_id, number):
    return np.full(len(image_id), number)
HGSC_label = label_generater(HGSC_id, 0)
EC_label = label_generater(EC_id, 1)
CC_label = label_generater(CC_id, 2)
LGSC_label = label_generater(LGSC_id, 3)
MC_label = label_generater(MC_id, 4)

In [None]:
image_label = np.concatenate((HGSC_label, EC_label, CC_label, LGSC_label, MC_label), axis=0)

In [None]:
X_train, X_valid, Y_train, Y_valid = train_test_split(np.array(load_thum_img), np.array(image_label), test_size=0.1, shuffle=True)

In [None]:
model = models.Sequential()
model.add(layers.Conv2D(32, (3, 3), activation='relu', input_shape=(640, 640, 3), padding="same"))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(64, (3, 3), activation='relu', padding="same"))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(64, (3, 3), activation='relu', padding="same"))
model.add(layers.Flatten())
model.add(layers.Dense(256, activation='relu'))
model.add(layers.BatchNormalization())
model.add(layers.Dropout(rate=0.2))
model.add(layers.Dense(128, activation='relu'))
model.add(layers.BatchNormalization())
model.add(layers.Dropout(rate=0.2))
model.add(layers.Dense(64, activation='relu'))
model.add(layers.BatchNormalization())
model.add(layers.Dense(5, activation='softmax'))

In [None]:
checkpoint_filepath = '/kaggle/working/checkpoint.h5'
checkpoing = tf.keras.callbacks.ModelCheckpoint(
    filepath = checkpoint_filepath,
    monitor = 'val_loss',
    mode = 'auto',
    save_freq='epoch'
)

In [None]:
earlystopping = tf.keras.callbacks.EarlyStopping(
    monitor='val_loss',
    min_delta=0,
    patience=10,
    verbose=0,
    mode='auto',
    restore_best_weights=True,
)

In [None]:
model.compile(optimizer="adam", loss="sparse_categorical_crossentropy", metrics=["accuracy"])

In [None]:
history = model.fit(X_train, Y_train, epochs=10000, validation_data=(X_valid, Y_valid), callbacks=[checkpoing, earlystopping])

훈련을 돌리고 픽셀을 255로 나누는 정규화를 수행함 만약 이미지나 모델의 성능이 크게 바뀌면 이게 원인일 것이고 이럴 경우 img_load 함수의 픽셀을 정규화 하는 코드를 제거하면 됨

----------------------------------------------------------------------------------------------------------

In [None]:
model.save('my_model.h5')

In [None]:
plt.plot(history.history["accuracy"], "b")
plt.plot(history.history["val_accuracy"], "r")
plt.xlabel("epochs")
plt.ylabel("accuracy")
plt.show()

In [None]:
plt.plot(history.history["loss"], "b")
plt.plot(history.history["val_loss"], "r")
plt.xlabel("epochs")
plt.ylabel("loss")
plt.show()

In [None]:
pred = model.predict(X_valid)
np.argmax(pred, axis=1)

In [None]:
pred = model.predict(X_valid)
pred = np.argmax(pred, axis=1)
f1 = f1_score(Y_valid, pred, average='micro')
print("F1 Score:", f1)

In [None]:
path = test_img = "/kaggle/input/UBC-OCEAN/test_thumbnails/41_thumbnail.png"
read_img = cv2.imread(path)
re_img = cv2.resize(read_img, (640, 640))
re_img = re_img / 255.0
re_img = re_img.reshape(1, 640, 640, 3)

In [None]:
pred = model.predict(re_img)
pre = np.argmax(pred, axis=1)

In [None]:
pre

In [None]:
categor = str()
if pre == 0:
    categor = "HGSC"
elif pre == 1:
    categor = "EC"
elif pre == 2:
    categor = "CC"
elif pre == 3:
    categor = "LGSC"
else:
    categor = "MC"

In [None]:
id_values = 41
prediction_values = [categor]

data = {'Id': id_values, 'Prediction': prediction_values}
submission_df = pd.DataFrame(data)

submission_df.to_csv('submission.csv', index=False)

In [None]:
submission_df