# Import Modules

In [None]:
#import modules

# Data Manipulation
import numpy as np
import pandas as pd

# Data Viz
import matplotlib.pyplot as plt
import matplotlib.image as mpimg 
import seaborn as sns

# machine learning and deep learning
from tqdm import tqdm
from sklearn.model_selection import train_test_split
import keras
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten
from keras.layers import Conv2D, MaxPooling2D
from keras.layers import BatchNormalization
from keras.preprocessing import image

from PIL import Image

#Others
import os



print("Modules Imported")

# First Look at the Data
## Let's first look at the labels.

In [None]:
df = pd.read_csv("/kaggle/input/hpa-single-cell-image-classification/train.csv")
df.head()

In [None]:
from sklearn.preprocessing import MultiLabelBinarizer
mlb = MultiLabelBinarizer()
mlb_result = mlb.fit_transform([str(df.loc[i,'Label']).split('|') for i in range(len(df))])
df_final = pd.concat([df['ID'],pd.DataFrame(mlb_result,columns=list(mlb.classes_))],axis=1)
y = df_final.head(200)
y = np.array(y.drop('ID', axis=1))


This table presents pictures ID and the labels of proteins organelle localization in the picture in the picture. The corresponding structures to these integers are presented below :
1. Nucleoplasm
1. Nuclear membrane
1. Nucleoli
1. Nucleoli fibrillar center
1. Nuclear speckles
1. Nuclear bodies
1. Endoplasmic reticulum
1. Golgi apparatus
1. Intermediate filaments
1. Actin filaments 10. Microtubules
1. Mitotic spindle
1. Centrosome
1. Plasma membrane
1. Mitochondria
1. Aggresome
1. Cytosol
1. Vesicles and punctate cytosolic patterns
1. Negative


## Lets Look at The First Picture

In [None]:
colors = {"microtubules":"red", "target":"green", "nucleus":"blue", "endoplasmic reticulum":"yellow"}


img_r = mpimg.imread('/kaggle/input/hpa-single-cell-image-classification/train/' + '5c27f04c-bb99-11e8-b2b9-ac1f6b6435d0_' + 'red' + '.png')
img_g = mpimg.imread('/kaggle/input/hpa-single-cell-image-classification/train/' + '5c27f04c-bb99-11e8-b2b9-ac1f6b6435d0_' + 'yellow' + '.png')
img_b = mpimg.imread('/kaggle/input/hpa-single-cell-image-classification/train/' + '5c27f04c-bb99-11e8-b2b9-ac1f6b6435d0_' + 'blue' + '.png')


img_rgb = np.dstack((img_r,img_g,img_b))  # stacks 3 h x w arrays -> h x w x 3

plt.figure(figsize=(10,10))
plt.imshow(img_rgb)
plt.title("Show Only Cells Without Target (R:Microtubules, Blue:Nucleus, Green:Endoplasmic Reticulum)")

In [None]:

fig, axs = plt.subplots(1, 4, figsize=(20,40))

colors = {"microtubules":"red", "nucleus":"blue", "endoplasmic reticulum":"yellow", "target":"green"}

i = 0
for key in colors:
    img = mpimg.imread('/kaggle/input/hpa-single-cell-image-classification/train/' + '5c27f04c-bb99-11e8-b2b9-ac1f6b6435d0_' + colors[key] + '.png')
    axs[i].imshow(img, cmap='gray')
    axs[i].set_title(key)
    i+=1


print(img)**

In [None]:
print(img)

In [None]:
image_directory = '/kaggle/input/hpa-single-cell-image-classification/train/'
#for filename in os.listdir(image_directory):
    #print(os.path.join(image_directory, filename))
 #   img = Image.open(os.path.join(image_directory, filename))
 #   print(img.size)

In [None]:
size = 200
nbpictures = 200
X_dataset = []
for i in tqdm(range(y.shape[0])):
    img_r = image.load_img(image_directory + df["ID"][i] + '_red.png', target_size=(size, size, 1))
    img_g = image.load_img(image_directory + df["ID"][i] + '_green.png', target_size=(size, size, 1))
    img_b = image.load_img(image_directory + df["ID"][i] + '_blue.png', target_size=(size, size, 1))
    img_y = image.load_img(image_directory + df["ID"][i] + '_yellow.png', target_size=(size, size, 1))
    img_r = image.img_to_array(img_r)
    img_g = image.img_to_array(img_g)
    img_b = image.img_to_array(img_b)
    img_y = image.img_to_array(img_y)
    img = np.dstack((img_r,img_g,img_b, img_y))
    img = img / 255
    X_dataset.append(img)
    
X = np.array(X_dataset)
    

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=20, test_size=0.3)

In [None]:
model = Sequential()

model.add(Conv2D(16, kernel_size=(5,5), activation='relu', input_shape=(200,200,4)))
model.add(BatchNormalization())
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Dropout(0.2))

model.add(Conv2D(32, kernel_size=(5,5), activation='relu'))
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(BatchNormalization())
model.add(Dropout(0.2))

model.add(Conv2D(64, kernel_size=(5,5), activation='relu'))
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(BatchNormalization())
model.add(Dropout(0.2))

model.add(Conv2D(64, kernel_size=(5,5), activation='relu'))
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(BatchNormalization())
model.add(Dropout(0.2))

model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(64, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(18, activation='sigmoid'))

model.summary()


In [None]:
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

In [None]:
history = model.fit(X_train, y_train, epochs=10, validation_data=())