In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load in 

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the "../input/" directory.
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# Any results you write to the current directory are saved as output.

In [None]:
DATASET_DIR = '../input/understanding_cloud_organization/'
TEST_SIZE = 0.3
RANDOM_STATE = 1024

NUM_TRAIN_SAMPLES = 5 # The number of train samples used for visualization
NUM_VAL_SAMPLES = 5 # The number of val samples used for visualization
COLORS = ['b', 'g', 'r', 'm'] # Color of each class

import pandas as pd
import os
import cv2
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
from matplotlib.patches import Polygon
from matplotlib.collections import PatchCollection

from shutil import copyfile
from sklearn.model_selection import train_test_split
from tqdm import tqdm_notebook

df = pd.read_csv(os.path.join(DATASET_DIR, 'train.csv'))

In [None]:
df

In [None]:
df['Image'] = df['Image_Label'].map(lambda x: x.split('_')[0])
df['HavingDefection'] = df['EncodedPixels'].map(lambda x: 0 if x is np.nan else 1)

In [None]:
np.array(df['HavingDefection']).reshape(-1,4)

In [None]:
sum(np.array(df['HavingDefection'])!=1) + sum(np.array(df['HavingDefection'])!=0)

In [None]:
np.array(df['HavingDefection']).shape

In [None]:
image_col = np.array(df['Image'])
image_files = image_col[::4]
all_labels = np.array(df['HavingDefection']).reshape(-1, 4)

num_img_fish = np.sum(all_labels[:, 0])
num_img_flower = np.sum(all_labels[:, 1])
num_img_gravel = np.sum(all_labels[:, 2])
num_img_sugar = np.sum(all_labels[:, 3])
print('Fish: {} images'.format(num_img_fish))
print('Flower: {} images'.format(num_img_flower))
print('Gravel: {} images'.format(num_img_gravel))
print('Sugar: {} images'.format(num_img_sugar)) 

In [None]:
X_train, X_val, y_train, y_val = train_test_split(image_files, all_labels, test_size=TEST_SIZE, random_state=RANDOM_STATE)
train_pairs = np.array(list(zip(X_train, y_train)))
train_samples = train_pairs[np.random.choice(train_pairs.shape[0], NUM_TRAIN_SAMPLES, replace=False), :]

In [None]:
df[df['Image_Label'].str.contains(train_samples[2][0])]['EncodedPixels'].values

In [None]:
def rle2mask(mask_rle, shape=(2100, 1400)):
    '''
    mask_rle: run-length as string formated (start length)
    shape: (width,height) of array to return 
    Returns numpy array, 1 - mask, 0 - background

    '''
    s = mask_rle.split()
    starts, lengths = [np.asarray(x, dtype=int) for x in (s[0:][::2], s[1:][::2])]
    starts -= 1
    ends = starts + lengths
    img = np.zeros(shape[0]*shape[1], dtype=np.uint8)
    for lo, hi in zip(starts, ends):
        img[lo:hi] = 1
    return img.reshape(shape).T


def show_samples(samples):
    for sample in samples:
        fig, ax = plt.subplots(figsize=(15, 10))
        img_path = os.path.join(DATASET_DIR, 'train_images', sample[0])
        img = cv2.imread(img_path, 1)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

        # Get annotations
        labels = df[df['Image_Label'].str.contains(sample[0])]['EncodedPixels']

        patches = []
        for idx, rle in enumerate(labels.values):
            if rle is not np.nan:
                mask = rle2mask(rle)
                contours, _ = cv2.findContours(mask, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
                for contour in contours:
                    poly_patch = Polygon(contour.reshape(-1, 2), closed=True, linewidth=2, edgecolor=COLORS[idx], facecolor=COLORS[idx], fill=True)
                    patches.append(poly_patch)
        p = PatchCollection(patches, match_original=True, cmap=matplotlib.cm.jet, alpha=0.3)

        ax.imshow(img/255)
        ax.set_title('{} - ({})'.format(sample[0], ', '.join(sample[1].astype(np.str))))
        ax.add_collection(p)
        ax.set_xticklabels([])
        ax.set_yticklabels([])
        plt.show()
        

        


In [None]:
X_train, X_val, y_train, y_val = train_test_split(image_files, all_labels, test_size=TEST_SIZE, random_state=RANDOM_STATE)
train_pairs = np.array(list(zip(X_train, y_train)))
train_samples = train_pairs[np.random.choice(train_pairs.shape[0], NUM_TRAIN_SAMPLES, replace=False), :]

show_samples(train_samples)

In [None]:
image_files
all_labels

In [None]:
df['EncodedPixels']

In [None]:
df['HavingDefection']

In [None]:
df.columns


In [None]:
X_train, X_val, y_train, y_val = train_test_split(image_files, all_labels, test_size=TEST_SIZE, random_state=RANDOM_STATE)

In [None]:
print('X_train:', X_train.shape)
print('y_train:', y_train.shape)
print('X_val:', X_val.shape)
print('y_val:', y_val.shape)

In [None]:
X_train

In [None]:
from keras_preprocessing.image import ImageDataGenerator
from keras.layers import Dense, Activation, Flatten, Dropout, BatchNormalization
from keras.layers import Conv2D, MaxPooling2D
from keras import regularizers, optimizers
import pandas as pd
import numpy as np
from keras.models import Sequential

model = Sequential()
model.add(Conv2D(32, (3, 3), padding='same',
                 input_shape=(256,256,3)))
model.add(Activation('relu'))
model.add(Conv2D(32, (3, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
model.add(Conv2D(64, (3, 3), padding='same'))
model.add(Activation('relu'))
model.add(Conv2D(64, (3, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
model.add(Flatten())
model.add(Dense(512))
model.add(Activation('relu'))
model.add(Dropout(0.5))
model.add(Dense(4, activation='sigmoid'))
model.compile(optimizers.rmsprop(lr=0.0001, decay=1e-6),loss="binary_crossentropy",metrics=["accuracy"])

In [None]:
image_files.shape

In [None]:
data = {'Image' : image_files , 'Fish' : all_labels[:,0] , 'Flower' : all_labels[:,1] , 'Gravel' : all_labels[:,2] , 'Sugar' : all_labels[:,3] }

In [None]:
data

In [None]:
df_new = pd.DataFrame(data)

In [None]:
df_new.columns

In [None]:
columns=["Fish", "Flower", "Gravel", "Sugar"]
datagen=ImageDataGenerator(rescale=1./255.)
test_datagen=ImageDataGenerator(rescale=1./255.)
train_generator=datagen.flow_from_dataframe(dataframe=df_new[:4436],directory="/kaggle/input/understanding_cloud_organization/train_images",x_col="Image",y_col=columns,batch_size=32,seed=42,shuffle=True,class_mode="other")

valid_generator=test_datagen.flow_from_dataframe(dataframe=df_new[4436:5546],directory="/kaggle/input/understanding_cloud_organization/train_images",x_col="Image",y_col=columns,batch_size=32,seed=42,shuffle=True,class_mode="other")

#test_generator=test_datagen.flow_from_dataframe(dataframe=df_new[4990:5546],directory="/kaggle/input/understanding_cloud_organization/train_images",x_col="Image",y_col=columns,batch_size=32,seed=42,shuffle=True,class_mode="other")


In [None]:
STEP_SIZE_TRAIN=train_generator.n//train_generator.batch_size
STEP_SIZE_VALID=valid_generator.n//valid_generator.batch_size
#STEP_SIZE_TEST=test_generator.n//test_generator.batch_size
model.fit_generator(generator=train_generator,steps_per_epoch=STEP_SIZE_TRAIN,validation_data=valid_generator,validation_steps=STEP_SIZE_VALID,epochs=10)

In [None]:
#test_generator
test_generator.reset()
pred=model.predict_generator(test_generator,steps=STEP_SIZE_TEST,verbose=1)

In [None]:
df1 = pd.read_csv(os.path.join(DATASET_DIR, 'val.csv'))
df1['Image'] = df1['Image_Label'].map(lambda x: x.split('_')[0])
df1['HavingDefection'] = df1['EncodedPixels'].map(lambda x: 0 if x is np.nan else 1)
image_col1 = np.array(df1['Image'])
image_files1 = image_col1[::4]
all_labels1 = np.array(df1['HavingDefection']).reshape(-1, 4)

num_img_fish1 = np.sum(all_labels1[:, 0])
num_img_flower1 = np.sum(all_labels1[:, 1])
num_img_gravel1 = np.sum(all_labels1[:, 2])
num_img_sugar1 = np.sum(all_labels1[:, 3])
print('Fish: {} images'.format(num_img_fish))
print('Flower: {} images'.format(num_img_flower))
print('Gravel: {} images'.format(num_img_gravel))
print('Sugar: {} images'.format(num_img_sugar))


In [None]:
train_pairs = np.array(list(zip(X_train, y_train)))
train_samples = train_pairs[np.random.choice(train_pairs.shape[0], NUM_TRAIN_SAMPLES, replace=False), :]
for sample in train_samples:
    labels = df[df['Image_Label'].str.contains(sample[0])]['EncodedPixels']
    print(labels)
    