In [None]:
import os        
import numpy as np # linear algebra
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import tensorflow as tf
import PIL
import PIL.Image
from tensorflow import keras
import tensorflow_datasets as tfds
import matplotlib.pyplot as plt
from sklearn.model_selection import KFold


In [None]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator

train_datagen =ImageDataGenerator(rescale=1./255)
    
train_seedlings = train_datagen.flow_from_directory(
        '../input/plant-seedlings-classification/train',  
            target_size=(64, 64),  # Resizes images
            batch_size=4750,
            class_mode='categorical',subset = 'training', seed=50)

x_train, y_train = next(train_seedlings)

In [None]:
len(y_train)

In [None]:
y_train

In [None]:
type(x_train)

In [None]:
# validation_seedlings = train_datagen.flow_from_directory(
#     '../input/plant-seedlings-classification/train',
#     target_size=(224, 224),
#     batch_size=82,
#     class_mode='categorical',
#     subset='validation')

In [None]:
import matplotlib.pyplot as plt
images = x_train[:9]
labels = y_train[:9]

# to visualize some images from our data set
fig, axes = plt.subplots(3, 3, figsize=(2*3,2*3))
for i in range(9):
    ax = axes[i//3, i%3]
    ax.imshow(images[i], cmap='gray')
plt.show()

In [None]:
from tensorflow.keras.models import Sequential 
from tensorflow.keras.layers import Conv2D,MaxPooling2D,Dropout,Flatten,BatchNormalization,Dense


In [None]:
def get_model():
    model = Sequential()
    # 1st conv layer
    model.add(Conv2D(32, (3,3), activation='relu', input_shape=train_seedlings.image_shape))
    model.add(MaxPooling2D(2,2))
    model.add(Dropout(rate=0.15))

    #2nd conv layer
    model.add(Conv2D(64, (3,3), activation='relu', input_shape=train_seedlings.image_shape))
    model.add(MaxPooling2D(2,2))
    model.add(Dropout(rate=0.10))
    #3rd conv layer
    model.add(Conv2D(128, (3,3), activation='relu', input_shape=train_seedlings.image_shape))
    model.add(MaxPooling2D(2,2))
    model.add(Dropout(rate=0.10))
    #4th conv layer
    model.add(Conv2D(256, (3,3), activation='relu', input_shape=train_seedlings.image_shape))
    model.add(MaxPooling2D(2,2))
    model.add(Dropout(rate=0.10))
    
    # Reshape
    model.add(Flatten())

    # Fully connected Hidden layer
    model.add(Dense(512, activation='relu'))

    #Normalize
    model.add(BatchNormalization())
    model.add(Dropout(rate=0.10))

    # 12 output neurons for the 12 classes of Seedling Images
    model.add(Dense(12, activation='softmax'))
    
    model.compile(loss='categorical_crossentropy',optimizer="adam",metrics=['acc'])
    
    return model


In [None]:
cvscores = []
f1scores = []

kff = 1

kf = KFold(n_splits = 5, shuffle = True, random_state = 2)
for train_index, test_index in kf.split(x_train):
    model = get_model()
    
    model.fit(x_train[train_index], y_train[train_index], epochs=20, batch_size=10, verbose=0)
    score = model.evaluate(x_train[test_index], y_train[test_index], verbose=1)
    print("Fold %s -- %s: %.2f%%" % (kff, model.metrics_names[1], score[1]*100))
    kff = kff + 1
    cvscores.append(score[1])
    
    del model

In [None]:
print('\n-------- Overall results ----')
print("F1 %.4f%% (+/- %.4f%%)" % (np.mean(cvscores), np.std(cvscores)))

In [None]:
len(x_train)

In [None]:
model = get_model()
model.fit(x_train, y_train, epochs=20, batch_size=10, verbose=1)

In [None]:
test_datagen = ImageDataGenerator(rescale=1./255)
    
test_generator = test_datagen.flow_from_directory(
    directory='/kaggle/input/plant-seedlings-classification/',
    classes=['test'],
    target_size=(64, 64),
    batch_size=32,
    shuffle=False,
    class_mode='categorical',
    seed=1
)

In [None]:
species_list = ["Black-grass", "Charlock", "Cleavers", "Common Chickweed", "Common wheat", "Fat Hen",
                "Loose Silky-bent", "Maize", "Scentless Mayweed", "Shepherds Purse", "Small-flowered Cranesbill",
                "Sugar beet"]
preds = model.predict(test_generator, steps=test_generator.samples)
class_list = []
for i in range(preds.shape[0]):
    y_class = preds[i,:].argmax(axis=-1)
    class_list.append(species_list[y_class])
    
submission = pd.DataFrame()
submission['file'] = test_generator.filenames
submission['file'] = submission['file'].str.replace(r'test/', '')
submission['species'] = class_list

In [None]:
preds.shape[0]

In [None]:
preds[0,:].argmax(axis=-1)

In [None]:
submission.to_csv('submission1.csv', index=False)

In [None]:
model.save('./output_model.h5')