In [None]:
# Modules
import numpy as np 
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import scipy as sp 

import os
import glob
from IPython.display import display,HTML
import gc

In [None]:
# Preliminary
CWD = os.getcwd()
root_path = lambda path: os.path.join("/kaggle/input/cassava-leaf-disease-classification",path)
paths = {
    "train_tfrecords" : root_path("train_tfrecords"),
    "train_images" : root_path("train_images"),
    "train" : root_path("train.csv"),
    "test_images" : root_path("test_images"),
    "test_tfrecords" : root_path("test_tfrecords"),
    "sample_submission" : root_path("sample_submission.csv"),
    "labels" : root_path("label_num_to_disease_map.json"),
}

(dict:) **paths**

In [None]:
# DATA
# Have more than 50% data > 3 in labels. labels ~= [0,1,2,3,4]

cassava = pd.read_csv(paths["train"], usecols=["image_id","label"])
test_df = pd.read_csv(paths["sample_submission"])

unique_labels = cassava.label.unique()

print(f"Shape: {cassava.shape}")
display(cassava.head(2))
plt.show(cassava.hist())


train_images = [ im.split("/")[-1] for im 
                in glob.glob( paths["train_images"] + "/*.jpg") ]

train_tfrecords = [ rec.split("/")[-1] for rec
                   in glob.glob(paths["train_tfrecords"] + "/*.tfrec")]



with open(paths["labels"],"r") as f:
    import json
    labels_dict = json.load(f)

In [None]:
def prepare_df(df=cassava):
    try:
        df["img_paths"] = pd.Series(os.path.join(paths["train_images"], img) for img 
                                  in df["image_id"] )
        df = df.drop("image_id", axis=1)
    except Exception as e:
        print(e)
        
    return df

cassava = prepare_df(cassava)

(DataFrames:) df

(vectors:) train_images, train_tfrecords

In [None]:
# example image
from keras.preprocessing.image import load_img, img_to_array

img = load_img(os.path.join(paths["train_images"],train_images[2]))
img_array = np.asarray(img)

print("Image Shapes:", img_array.shape)
plt.imshow(img)


In [None]:
# Keras CNN model
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Dense, Flatten, Dropout
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.preprocessing.image import load_img, img_to_array

def define_model():
    model = Sequential()

    model.add(Conv2D(32, kernel_size=2, padding="same", input_shape=(600,800,3)))
    model.add(MaxPooling2D(pool_size=2))
    model.add(Conv2D(16, kernel_size=2, padding="same"))
    model.add(MaxPooling2D(pool_size=2))
    model.add(Conv2D(8, kernel_size=2, padding="same"))
    model.add(MaxPooling2D(pool_size=2))
    model.add(Flatten())
    model.add(Dense(16, activation='relu'))
    model.add(Dropout(0.2))
    model.add(Dense(5, activation='softmax'))
    
    
    model.compile(optimizer="adam", 
             loss="categorical_crossentropy",
             metrics=["accuracy"])
    
    return model

model = define_model()
print(model.summary())

In [None]:
from PIL import Image

def get_features_target(df=cassava, _fraction=0.1):
    
    if _fraction:
        df = df.sample(frac=_fraction)
        
    X = np.array([np.asarray(Image.open(img)) for img in df["img_paths"]])
    y = df["label"].values.reshape((-1,1))
    return X,y
    
X, y = get_features_target(_fraction=0.1)

print("X shape:",X.shape)
print("Y shape:", y.shape)

In [None]:
# FIT!
model.fit(X,to_categorical(y),
          batch_size=20,
          epochs=5,
         )

In [None]:
from PIL import Image


ex = Image.open(cassava.loc[100,"img_paths"])
plt.imshow(ex)
print("Esta hojita es:", labels_dict[cassava.loc[100,"label"].__str__()])

In [None]:
test_img_paths = [ os.path.join(paths["test_images"],img) for img in test_df["image_id"]]
x_test = np.array([np.asarray(Image.open(img)) for img in test_img_paths])
y_test = test_df["label"].values.reshape(1,1)

print(f"{x_test.shape}", f"{y_test.shape}")

In [None]:
preds = model.predict(x_test)
best_pred = np.argmax(preds)
test_df["label"] = pd.Series(best_pred)
test_df.to_csv("submission.csv", index=False, header=True)