In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
from PIL import Image
import matplotlib.pyplot as plt
import os
import re
from sklearn.model_selection import train_test_split
import tensorflow as tf
import cv2
from keras import applications
from keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, Input
from keras.models import Model
from keras.optimizers import Adam

In [None]:
df = pd.read_csv('/kaggle/input/human-protein-atlas-image-classification/train.csv')

In [None]:
INPUT_SHAPE = (512, 512, 3)
BATCH_SIZE = 16

In [None]:
path_to_train = '/kaggle/input/human-protein-atlas-image-classification/train/'

In [None]:
df["complete_path"] = path_to_train + df["Id"]

In [None]:
df.head()

In [None]:
import random
fig, axes = plt.subplots(3, 4, figsize=(11, 11))
for i in range(3):
    for j in range(4):
        idx = random.randint(0, df.shape[0])
        row = df.iloc[idx,:]
        path = row.complete_path
        red = np.array(Image.open(path + '_red.png'))
        green = np.array(Image.open(path + '_green.png'))
        blue = np.array(Image.open(path + '_blue.png'))
        im = np.stack((
                red,
                green,
                blue),-1)
        axes[i][j].imshow(im)
        axes[i][j].set_title(row.Target)
        axes[i][j].set_xticks([])
        axes[i][j].set_yticks([])
fig.tight_layout()
fig.show();

In [None]:
train, test = train_test_split(df, test_size=0.2, random_state=42)


train, val = train_test_split(train, test_size=0.2, random_state=42)

In [None]:
print(f'Shape of train: {train.shape}')
print(f'Shape of test: {test.shape}')
print(f'Shape of val: {val.shape}')

In [None]:
def get_clean_data(df):
    targets = []
    paths = []
    for _, row in df.iterrows():
        target_np = np.zeros((28))
        t = [int(t) for t in row.Target.split()]
        target_np[t] = 1
        targets.append(target_np)
        paths.append(row.complete_path)
    return np.array(paths), np.array(targets)

In [None]:
train_path, train_target = get_clean_data(train)
val_path, val_target = get_clean_data(val)
test_path, test_target = get_clean_data(test)

In [None]:
print(f'Train path shape: {train_path.shape}')
print(f'Train target shape: {train_target.shape}')
print(f'Val path shape: {val_path.shape}')
print(f'Val target shape: {val_target.shape}')
print(f'Test path shape: {test_path.shape}')
print(f'Test target shape: {test_target.shape}')

In [None]:
train_data = tf.data.Dataset.from_tensor_slices((train_path, train_target))
val_data = tf.data.Dataset.from_tensor_slices((val_path, val_target))
test_data = tf.data.Dataset.from_tensor_slices((test_path, test_target))

In [None]:
def load_data(path, target):
    red = tf.squeeze(tf.image.decode_png(tf.io.read_file(path+'_red.png'), channels=1), [2])
    blue = tf.squeeze(tf.image.decode_png(tf.io.read_file(path+'_blue.png'), channels=1), [2])
    green = tf.squeeze(tf.image.decode_png(tf.io.read_file(path+'_green.png'), channels=1), [2])
    img = tf.stack((
                red,
                green,
                blue), axis=2)
    return img, target

AUTOTUNE = tf.data.experimental.AUTOTUNE

train_data = train_data.map(load_data, num_parallel_calls=AUTOTUNE)
val_data = val_data.map(load_data, num_parallel_calls=AUTOTUNE)
test_data = test_data.map(load_data, num_parallel_calls=AUTOTUNE)

In [None]:
def image_augment(img, target):
    img = tf.image.random_contrast(img, lower=0.3, upper=1.2)
    img = tf.image.random_flip_up_down(img)
    img = tf.image.random_brightness(img, max_delta=0.5)
    return img, target
    
train_data = train_data.map(image_augment, num_parallel_calls=AUTOTUNE)

In [None]:
train_data_batches = train_data.batch(BATCH_SIZE).prefetch(buffer_size=AUTOTUNE)
val_data_batches = val_data.batch(BATCH_SIZE).prefetch(buffer_size=AUTOTUNE)
test_data_batches = test_data.batch(BATCH_SIZE).prefetch(buffer_size=AUTOTUNE)

In [None]:
resnet_model = tf.keras.applications.ResNet50V2(include_top=False, weights='imagenet')

resnet_model.trainable = True

input_layer = Input(shape=INPUT_SHAPE)
x = resnet_model(input_layer)
x = Flatten()(x)
x = Dropout(0.5)(x)
x = Dense(512, activation='relu')(x)
x = Dropout(0.5)(x)
output = Dense(28, activation='sigmoid')(x)
model = Model(input_layer, output)

model.summary()

In [None]:
model.compile(optimizer=Adam(1e-2), loss='MSLE', metrics=['AUC'])

In [None]:
history = model.fit(train_data_batches, steps_per_epoch = 100, validation_data = val_data_batches, epochs=10)

In [None]:
results = model.evaluate(test_data_batches, batch_size=42)
print("test loss, test acc:", results)
