In [1]:
import pandas as pd
from google.colab import drive
from zipfile import ZipFile
from matplotlib import pyplot as plt
import PIL.Image
from PIL import Image
import numpy as np
import pickle
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import random

import keras
from keras.layers import Dense, Dropout, Input, MaxPooling2D, ZeroPadding2D, Conv2D, Flatten
from keras.models import Sequential, Model
from keras.losses import categorical_crossentropy
from keras.optimizers import Adam, SGD
from keras.preprocessing.image import img_to_array, load_img, ImageDataGenerator
from keras.utils import to_categorical
from tensorflow.keras import regularizers
from tensorflow.keras.layers import MaxPool2D, AveragePooling2D, GlobalAveragePooling2D
from keras.losses import sparse_categorical_crossentropy

In [2]:
from google.colab import drive
drive.mount('/content/drive')
path = "/content/drive/MyDrive/Data2040_midterm_project/"

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [11]:
archive_train = ZipFile(path + "cassava-leaf-disease-classification.zip", 'r')

In [5]:
#nwidth, nheight, nsamples = 800, 600, 21397
nwidth, nheight, nsamples = 96, 96, 21397


s = (nsamples, nwidth, nheight,3) 
allImage = np.zeros(s)
labels = np.zeros(nsamples)


train_jpgs = []
with ZipFile(path + "cassava-leaf-disease-classification.zip", 'r') as myzip:
    with myzip.open('train.csv') as myfile:
        labels_raw = pd.read_csv(myfile, 
                         header=0, sep=',', quotechar='"')

    for i in range(labels_raw.shape[0]):
        jpg, label = labels_raw['image_id'][i], labels_raw['label'][i]
        with myzip.open('train_images/' + jpg) as myfile:
            myfile = Image.open(myfile)
            #train_jpgs.append(Image.open(myfile))
            myfile = myfile.resize((nwidth, nheight))
            myfiles = np.array(myfile)
            myfile = np.clip(myfiles / 255.0, 0.0, 1.0)
            allImage[i] = myfile
            labels[i] = label

len(train_jpgs), allImage.shape, labels.shape

(0, (21397, 96, 96, 3), (21397,))

In [6]:
save_name = "/content/drive/MyDrive/Data2040_midterm_project/train96x96"
pickle.dump(allImage, open( save_name + '.pickle', "wb" ,) ,protocol=4)
pickle.dump(labels, open( save_name + 'labels' + '.pickle', "wb" ),protocol=4 )

In [8]:
train = pickle.load( open( "/content/drive/MyDrive/Data2040_midterm_project/train96x96.pickle", "rb" ) )
labels = pickle.load( open( "/content/drive/MyDrive/Data2040_midterm_project/train96x96labels.pickle", "rb" ) )
print(train.shape)
print(labels.shape)

(21397, 96, 96, 3)
(21397,)


In [9]:
random.seed(2040)
X = train
y = labels
print(X.shape)
print(y.shape)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
print(X_train.shape)
print(y_train.shape)
print(X_test.shape)
print(y_test.shape)

(21397, 96, 96, 3)
(21397,)
(17117, 96, 96, 3)
(17117,)
(4280, 96, 96, 3)
(4280,)


In [10]:
# baseline for predicting each class 0-4 for each image in both train and test
for c in labels_raw['label'].unique():
    y_train_pred = np.full(X_train.shape[0], c)
    train_score = accuracy_score(y_train, y_train_pred)
    y_test_pred = np.full(X_test.shape[0], c)
    test_score = accuracy_score(y_test, y_test_pred)
    print(f'predict class {c}:')
    print(f'  train accuracy score: {train_score}')
    print(f'  test accuracy score: {test_score}')

predict class 0:
  train accuracy score: 0.05100192790792779
  test accuracy score: 0.05
predict class 3:
  train accuracy score: 0.6142431500847111
  test accuracy score: 0.6177570093457944
predict class 1:
  train accuracy score: 0.10194543436349827
  test accuracy score: 0.10373831775700934
predict class 2:
  train accuracy score: 0.11275340304959981
  test accuracy score: 0.10654205607476636
predict class 4:
  train accuracy score: 0.12005608459426301
  test accuracy score: 0.12196261682242991


In [11]:
image_resize = 96
n_classes = labels_raw['label'].nunique()
def build_model():
  inputs = Input(shape = (image_resize, image_resize, 3))
  model = Conv2D(filters=32, kernel_size=(3, 3), padding='same', activation='relu', input_shape=(image_resize, image_resize, 3))(inputs)
  model = MaxPool2D(pool_size=(2, 2))(model)
  model = Conv2D(filters=64, kernel_size=(3, 3), padding='same', activation='relu')(model)
  model = MaxPool2D(pool_size=(2, 2))(model)
  model = Flatten()(model)
  model = Dense(200, activation = "relu", kernel_initializer="he_normal")(model)
  out = Dense(n_classes, activation = 'softmax')(model)
  model = Model(inputs=inputs, outputs=out)
  return model

model = build_model()

opt = keras.optimizers.Adam(learning_rate=0.00005)
model.compile(loss=sparse_categorical_crossentropy,optimizer=opt,metrics=['accuracy'])
model.summary()

Model: "model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(None, 96, 96, 3)]       0         
_________________________________________________________________
conv2d (Conv2D)              (None, 96, 96, 32)        896       
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 48, 48, 32)        0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 48, 48, 64)        18496     
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 24, 24, 64)        0         
_________________________________________________________________
flatten (Flatten)            (None, 36864)             0         
_________________________________________________________________
dense (Dense)                (None, 200)               737300

In [12]:
history = model.fit(
X_train, y_train,
validation_data=(X_test, y_test),
epochs=50,
verbose=1,
shuffle=True
)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
