In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
!unzip -uq '/content/drive/MyDrive/data/딥러닝/235842_작물 병해 분류 AI 경진대회_data.zip' -d './'

In [3]:
import numpy as np
import pandas as pd
from PIL import Image 
from tensorflow.keras.preprocessing.image import img_to_array
import matplotlib.pyplot as plt


df = pd.read_csv('train.csv')

In [4]:
df.head()

Unnamed: 0,uid,img_path,disease,disease_code
0,10000,train_imgs/10000.jpg,시설포도노균병,1
1,10001,train_imgs/10001.jpg,시설포도노균병,1
2,10002,train_imgs/10002.jpg,시설포도노균병반응,2
3,10003,train_imgs/10003.jpg,축과병,4
4,10004,train_imgs/10004.jpg,시설포도노균병,1


In [None]:
# 데이터 자체의 불균형 여부
# train test 나눈 뒤에도 불균형여부 
# SMOTE를 활용 불균형 해소 여부
# 각각의 train test 를 통하여 모델 제작

## 데이터셋 자체 불균형 여부

In [5]:
df.disease_code.value_counts() 

# 0 - 6 까지 질병종류가 줄어들고 있다

0    106
1     46
2     30
3     29
4     17
5     12
6     10
Name: disease_code, dtype: int64

## xy 로 나눔


In [149]:
x= []
y =[]
for i, j in zip(df.img_path, df.disease_code):
  image = Image.open(i)
  image = image.resize((255,255)) #괄호 두개 있지말자
  image = img_to_array(image) / 255
  x.append(image)
  y.append(j)

In [148]:
np.shape(x)

(250, 255, 255, 3)

## 불균형 분할

In [150]:
from sklearn.model_selection import train_test_split

x_train, x_test, y_train, y_test = train_test_split(x, y, 
                                                  test_size = 0.3)

In [151]:
np.unique(y_train, return_counts=True)

(array([0, 1, 2, 3, 4, 5, 6]), array([78, 30, 16, 22, 15,  8,  6]))

In [152]:
np.unique(y_test, return_counts=True)

(array([0, 1, 2, 3, 4, 5, 6]), array([28, 16, 14,  7,  2,  4,  4]))

In [153]:
np.shape(x_train), np.shape(y_train), np.shape(x_test), np.shape(y_test)

((175, 255, 255, 3), (175,), (75, 255, 255, 3), (75,))

In [154]:
np.shape(np.array(x_test))

(75, 255, 255, 3)

In [157]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPool2D, Dense, Flatten
from tensorflow.keras.optimizers import Adam

model = Sequential()
# (255, 255, 3)의 데이터를 입력으로 받습니다. 
model.add(Conv2D(filters = 32, kernel_size = 3, padding = 'same', activation = 'relu', input_shape = (255, 255, 3)))
model.add(Conv2D(filters = 32, kernel_size = 3, padding = 'same', activation = 'relu'))
model.add(MaxPool2D(pool_size = (2, 2), strides = 2, padding = 'same'))

model.add(Conv2D(filters = 64, kernel_size = 3, padding = 'same', activation = 'relu'))
model.add(Conv2D(filters = 64, kernel_size = 3, padding = 'same', activation = 'relu'))
model.add(MaxPool2D(pool_size = (2, 2), strides = 2, padding = 'same'))

model.add(Conv2D(filters = 128, kernel_size = 3, padding = 'same', activation = 'relu'))
model.add(Conv2D(filters = 128, kernel_size = 3, padding = 'same', activation = 'relu'))
model.add(MaxPool2D(pool_size = (2, 2), strides = 2, padding = 'same'))

model.add(Flatten())
model.add(Dense(256, activation = 'relu'))

model.add(Dense(7, activation = 'softmax'))

In [158]:
model.compile(optimizer = Adam(1e-4),
             loss = 'sparse_categorical_crossentropy', # 원핫 안해도 마지막 dense 2개로 할수 있다 
             metrics = ['acc'])

In [159]:
history = model.fit(np.array(x_train), np.array(y_train),
                    epochs = 5,
                    batch_size = 32,
                    validation_data = (np.array(x_test), np.array(y_test)))

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


## 균형 분할

In [160]:
from imblearn.over_sampling import BorderlineSMOTE

In [163]:
x_train = np.array(x_train)
x_test = np.array(x_test)

In [174]:
x_train_smoote = x_train.reshape((x_train.shape[0], x_train.shape[1] * x_train.shape[2] * x_train.shape[3])).astype(np.float32)
smote = BorderlineSMOTE()
x_train_smoote, y_train_sommte = smote.fit_resample(x_train_smoote, y_train)

x_train_smoote = x_train_smoote.reshape(-1,255,255,3) #다시 원상태로



In [183]:
np.shape(x_train), np.shape(y_train)

((175, 255, 255, 3), (175,))

In [182]:
np.shape(x_train_smoote), np.shape(y_train_sommte) #두개의 길이차이가 난다

((546, 255, 255, 3), (546,))

In [177]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPool2D, Dense, Flatten
from tensorflow.keras.optimizers import Adam

model = Sequential()
# (255, 255, 3)의 데이터를 입력으로 받습니다. 
model.add(Conv2D(filters = 32, kernel_size = 3, padding = 'same', activation = 'relu', input_shape = (255, 255, 3)))
model.add(Conv2D(filters = 32, kernel_size = 3, padding = 'same', activation = 'relu'))
model.add(MaxPool2D(pool_size = (2, 2), strides = 2, padding = 'same'))

model.add(Conv2D(filters = 64, kernel_size = 3, padding = 'same', activation = 'relu'))
model.add(Conv2D(filters = 64, kernel_size = 3, padding = 'same', activation = 'relu'))
model.add(MaxPool2D(pool_size = (2, 2), strides = 2, padding = 'same'))

model.add(Conv2D(filters = 128, kernel_size = 3, padding = 'same', activation = 'relu'))
model.add(Conv2D(filters = 128, kernel_size = 3, padding = 'same', activation = 'relu'))
model.add(MaxPool2D(pool_size = (2, 2), strides = 2, padding = 'same'))

model.add(Flatten())
model.add(Dense(256, activation = 'relu'))

model.add(Dense(7, activation = 'softmax'))
model.compile(optimizer = Adam(1e-4),
             loss = 'sparse_categorical_crossentropy', # 원핫 안해도 마지막 dense 2개로 할수 있다 
             metrics = ['acc'])

In [181]:
history = model.fit(np.array(x_train_smoote), np.array(y_train_sommte),
                    epochs = 5,
                    batch_size = 32,
                    validation_data = (np.array(x_test), np.array(y_test)))

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
