In [6]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import os
import shutil

from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications.efficientnet import preprocess_input
from tensorflow.python.framework.config import list_physical_devices, set_memory_growth

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session


In [7]:
# set up subfolders so we have different locations to store training and test data 
os.mkdir('/kaggle/temp')
os.chdir('/kaggle/temp')
os.mkdir('train')
os.mkdir('valid')
os.mkdir('test')
os.chdir('/kaggle/working')

In [3]:
# label the data path 
data_path = '/kaggle/input/mushrooms-classification-common-genuss-images/Mushrooms'

In [8]:
# create a list of images and for each image, create a 2d list of pixels 
for genus in os.listdir(data_path):
    print('genus: ', genus)
    # create path to current genus
    path = f'{data_path}/{genus}'
#     print(path)
    # make list of each file within current genus
    data = os.listdir(path)
#     print('data: ', data)
    # divvy out the images into training, valid, and testing groups
    num_mushrooms = len(data)
    train_mushrooms = int(num_mushrooms * .7)
    train_path = f'/kaggle/temp/train/{genus}'
    
    valid_mushrooms = int(num_mushrooms * .8)
    valid_path = f'/kaggle/temp/valid/{genus}'
    
    test_path = f'/kaggle/temp/test/{genus}'
    
    # change to the training data and create a subfolder per genus
    os.chdir('/kaggle/temp/train')
    os.mkdir(genus)
    # copy over data in the training data range into the new directory 
    for mushroom in range(train_mushrooms):
        original = f'{data_path}/{genus}/{data[mushroom]}'
        print('original: ', original)
        new_mushroom = f'{train_path}/{data[mushroom]}'
        print('new: ', new_mushroom)
        shutil.copyfile(original, new_mushroom)
        
    # change to the validation images and create a subfolder per genus
    os.chdir('/kaggle/temp/valid')
    os.mkdir(genus)
    # copy over data in the validation data range into the new directory
    for mushroom in range(train_mushrooms, valid_mushrooms):
        original = f'{data_path}/{genus}/{data[mushroom]}'
        new_mushroom = f'{valid_path}/{data[mushroom]}'
        shutil.copyfile(original, new_mushroom)
    
    # change the to testing data
    os.chdir('/kaggle/temp/test')
    os.mkdir(genus)
    # copy over data from the testing images and put images in a new dir
    for mushroom in range(valid_mushrooms, num_mushrooms):
        original = f'{data_path}/{genus}/{data[mushroom]}'
        new_mushroom = f'{test_path}/{data[mushroom]}'
        shutil.copyfile(original, new_mushroom)

In [10]:
# from PIL import ImageFile
# ImageFile.LOAD_TRUNCATED_IMAGES = True

# import matplotlib.pyplot as plt
# import matplotlib.image as mpimg
# %matplotlib inline

# # Settings for displaying charts
# plt.rcParams['figure.figsize'] = 12, 8
# plt.rcParams.update({'font.size': 12})

physical_devices = list_physical_devices('GPU')
print(f'Number of GPUs available: {len(physical_devices)}')

if len(physical_devices) > 0:
    set_memory_growth(physical_devices[0], True)

In [11]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications.efficientnet import preprocess_input

In [12]:
# standardize the batch and image size
image_dim = 320
batch_size = 32

In [13]:
# training data image set 
training_images = ImageDataGenerator(preprocessing_function=preprocess_input, horizontal_flip=True,
                                         width_shift_range=0.1, height_shift_range=0.1)\
                    .flow_from_directory(directory='/kaggle/temp/train',
                                         target_size=(image_dim, image_dim),
                                         class_mode= 'categorical',
                                         batch_size= batch_size,
                                         shuffle=True)

In [14]:
# validation data image set 
valid_images = ImageDataGenerator(preprocessing_function=preprocess_input)\
                    .flow_from_directory(directory='/kaggle/temp/valid',
                                         target_size=(image_dim, image_dim),
                                         class_mode= 'categorical',
                                         batch_size= batch_size,
                                         shuffle=True)

In [15]:
# test data image set 
test_images = ImageDataGenerator(preprocessing_function=preprocess_input)\
                    .flow_from_directory(directory='/kaggle/temp/test',
                                         target_size=(image_dim, image_dim),
                                         class_mode='categorical',
                                         batch_size=batch_size,
                                         shuffle=True)

In [16]:
# from tensorflow.keras.models import Sequential
# from tensorflow.keras.layers import Dense, Embedding, Flatten
# from tensorflow.keras.optimizers import SGD
# from tensorflow.keras.initializers import HeNormal

In [20]:
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' 
import tensorflow as tf
model = tf.keras.applications.efficientnet.EfficientNetB7(
    include_top=False,
    weights='imagenet',
    input_tensor=None,
    input_shape=(image_dim, image_dim, 3),
    pooling='avg',
    classes=1000,
    classifier_activation='softmax'
)

In [23]:
# wrap model and add last dense layer
new_model = tf.keras.models.Sequential(
    [
        model,
        tf.keras.layers.Dense(9, activation='softmax')
    ]
)

In [24]:
new_model.layers[0].trainable = False
# Metrics and optimizer
new_model.compile(loss='categorical_crossentropy',
                  optimizer='adam',
                  metrics=['accuracy'])


In [25]:
new_model.summary()

In [26]:
# Callbacks 
early_stop = tf.keras.callbacks.EarlyStopping(monitor='val_accuracy',
                                              patience=10,
                                              restore_best_weights=True)

reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(monitor='val_accuracy',
                                                 factor=0.1,
                                                 mode='max',
                                                 cooldown=2,
                                                 patience=2,
                                                 min_lr=0)

In [29]:
# Train new model:
network_history = new_model.fit(training_images,
                        validation_data=valid_images,
                        epochs=10,
                        steps_per_epoch=79,
                        validation_steps=16,
                        verbose=2,
                        callbacks=[reduce_lr, early_stop],
                        use_multiprocessing=True,
                        workers=2)

In [31]:
loss, accuracy = new_model.evaluate(test_images,
                                    steps=11, 
                                    verbose=2, 
                                    use_multiprocessing=True, 
                                    workers=2)
print(f'Model performance on test images:\nAccuracy = {accuracy}\nLoss = {loss}')