In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
#for dirname, _, filenames in os.walk('/kaggle/input'):
    #for filename in filenames:
        #print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
import matplotlib.pyplot as plt
import matplotlib
import cv2

from sklearn.model_selection import train_test_split

import tensorflow as tf
from keras.utils import to_categorical, Sequence
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPool2D, BatchNormalization
from keras.optimizers import RMSprop,Adam

In [None]:
path = '/kaggle/input/hpa-single-cell-image-classification/'
#os.listdir(path)

In [None]:
data = pd.read_csv(path+'train.csv')
ids = ["../input/hpa-single-cell-image-classification/train/" + fname + '_green.png' for fname in data['ID']]
num_classes = 19
def get_labels(labels):
    new_labels = []
    for label in labels:
        label = label.split('|')
        label = list(map(int, label))
        label = to_categorical(label, num_classes=num_classes)
        label = label.sum(axis=0)
        new_labels.append(label)
    return new_labels
labels = get_labels(data['Label'])
num_images = 1000

In [None]:
data['Label']

In [None]:
from sklearn.model_selection import train_test_split
X_train, X_valid, y_train, y_valid = train_test_split(ids[:num_images],labels[:num_images], 
                                                      test_size = 0.2)

In [None]:
X_train[:2],y_train[:2]

In [None]:
IMG_SIZE = 64

def process_image(image_path, img_size = IMG_SIZE):
  """
  Takes an image file path and turns the image into a Tensor. 
  """
  # Read in an image file
  image = tf.io.read_file(image_path)
  # Turn the jpeg image into numerical Tensor with 3 colour channels (Red, Green, Blue)
  image = tf.image.decode_jpeg(image, channels = 3)
  # Convert the colour channel values from 0-255 to 0-1 values
  image = tf.image.convert_image_dtype(image, tf.float32)
  # Resize the image to our desired value (224, 224)
  image = tf.image.resize(image,size = [img_size, img_size])
  return image

In [None]:
def get_image_label(image_path, label):
  """
  Takes an image file path name and the assosciated label,
  processes the image and reutrns a typle of (image, label).
  """
  image = process_image(image_path)
  return image,label

In [None]:
BATCH_SIZE = 64

def create_data_batches(X , y = None, batch_size = BATCH_SIZE, valid_data = False, test_data = False):
  """
  Creates batches of data out of image (X) and label (y) pairs.
  Shuffles the data if it's training data but doesn't shuffle if it's validation data.
  Also accepts test data as input (no labels).
  """
  if test_data:
    print('Creating test data batches........')
    data = tf.data.Dataset.from_tensor_slices((tf.constant(X)))
    data_batch = data.map(process_image).batch(batch_size)
    return data_batch
  
  elif valid_data:
    print('Creating valid data batches...........')
    data = tf.data.Dataset.from_tensor_slices((tf.constant(X),tf.constant(y)))
    data_batch = data.map(get_image_label).batch(batch_size)
    return data_batch
  
  else:
    print('Creating training data batches...............')
    data = tf.data.Dataset.from_tensor_slices((tf.constant(X),tf.constant(y)))
    data = data.shuffle(buffer_size = len(X))
    data_batch = data.map(get_image_label).batch(batch_size)
    return data_batch

In [None]:
train_data = create_data_batches(X_train,y_train)
valid_data = create_data_batches(X_valid,y_valid, valid_data=True)

In [None]:
img_size = 64
img_channel = 3

In [None]:
metrics = [tf.keras.metrics.AUC(name='auc', multi_label=True)]
learning_rate = 1e-3

In [None]:
from keras.layers import Conv2D,Dropout
from keras.layers import MaxPool2D

In [None]:
model = Sequential()
model.add(tf.keras.layers.Conv2D(64, (3,3), activation = 'relu', padding = 'Same',input_shape = (img_size, img_size, img_channel))),
model.add(tf.keras.layers.MaxPooling2D(2, 2)),
model.add(tf.keras.layers.Dropout(0.25)),
model.add(Flatten())
model.add(Dense(64, activation='relu'))
model.add(BatchNormalization())
model.add(Dropout(0.1))
model.add(Dense(num_classes, activation='sigmoid'))

In [None]:
model.output

In [None]:
model.compile(optimizer=Adam(lr=learning_rate), loss="binary_crossentropy", metrics=metrics)

In [None]:
early_stopping = tf.keras.callbacks.EarlyStopping(monitor = 'val_accuracy', patience=3)

In [None]:
model.summary()

In [None]:
model.fit(train_data,validation_data=valid_data,epochs=1)

In [None]:
def save_model(model,suffix = 'None'):
  """
  Saves a given model in a models directory and appends a suffix (string).
  """
  modeldir = './'
  model_path = modeldir + '-' + suffix + '.h5' # save format of model
  print(f'Saving model to: {model_path}')
  model.save(model_path)
  return model_path

In [None]:
save_model(model,suffix = 'model-1')

In [None]:
model_path = './-model-1.h5'
loaded_model = tf.keras.models.load_model(model_path)

In [None]:
id_names = [ids for ids in os.listdir('../input/hpa-single-cell-image-classification/test/')]
test_ids = []
for each_id in id_names:
    if '_green.png' in each_id:
        test_ids.append(each_id)

In [None]:
test_ids

In [None]:
test_data = create_data_batches(test_ids, test_data= True)

In [None]:
test_data

In [None]:
loaded_model.predict(test_data)