In [2]:
import os
# from os import environ
# environ["KERAS_BACKEND"] = "plaidml.keras.backend"
# import tifffile as tiff
# import re
from sklearn.utils import shuffle
import tensorflow as tf
import matplotlib.pyplot as plt
from keras.utils import to_categorical
from sklearn.preprocessing import normalize
from sklearn.model_selection import train_test_split
from keras.preprocessing.image import ImageDataGenerator
import pandas as pd
import numpy as np
from keras.models import Sequential
from keras.layers import Dense, Conv2D, MaxPooling2D, BatchNormalization, Dropout, Flatten
from keras.optimizers import Adam
# from tensorflowkeras.optimizers.schedules import ExponentialDecay
from keras.callbacks import EarlyStopping
from keras.callbacks import LearningRateScheduler

In [3]:
#input training labels (file and label)
train = pd.read_csv('train_labels.csv')
#putting labels as string for image generator
train['label'] = train['label'].astype(str)
#changing labels of files to match filesystem in google colab
train['id'] += '.tif'
# train['id'] = train['id'].apply(lambda x: 'train/' + x)
#creating random state to apply shuffling to data
train = shuffle(train, random_state = 42)


#split this into training and validation for model
train, valid = train_test_split(train, test_size = 0.25)

#sample_submission
sample = pd.read_csv('sample_submission.csv')

#creating test data frame to import data, creates id col with name of file
test = pd.DataFrame({'id':os.listdir('test')})

#setting batch size for entire project
batch_size = 512


In [4]:
#creating method to generate training and validation images
#creates validation set of 0.2 size of training data and scales pixels
generator_data = ImageDataGenerator(
    rescale=1./255.,
    validation_split=0.2
)


#getting image generator based upon labels in train df
train_generator = generator_data.flow_from_dataframe(
    dataframe = train,
    directory = 'train',
    x_col = 'id',
    y_col = 'label',
    shuffle = True,
    featurewise_std_normalization = True,
    class_mode = 'binary',
    batch_size = batch_size,
    random_state = 42,
    horizontal_flip = True,
    target_size = (96,96)
)

#getting image generator based upon lables in valid df
valid_generator = generator_data.flow_from_dataframe(
    dataframe = valid,
    directory = 'train',
    x_col = 'id',
    y_col = 'label', 
    shuffle = True,
    featurewise_std_normalization = True,
    class_mode = 'binary',
    batch_size = batch_size,
    random_state = 42,
    horizontal_flip = True,
    target_size = (96,96)
)

Found 165018 validated image filenames belonging to 2 classes.
Found 55007 validated image filenames belonging to 2 classes.


In [5]:
#beginning model

#building model with layers
model = Sequential([
    Conv2D(16, kernel_size = 3, activation = 'relu', input_shape = (96,96,3), strides = (2,2)),
    BatchNormalization(),
    Conv2D(16, kernel_size = 3, activation = 'relu', strides = (2,2)),
    BatchNormalization(),
    MaxPooling2D(pool_size = (2,2)),
    Conv2D(64, kernel_size = 3, activation = 'relu', strides = (2,2)),
    BatchNormalization(),
    Conv2D(64, kernel_size = 3, activation = 'relu', strides = (2,2)),
    BatchNormalization(),
    MaxPooling2D(pool_size = (2,2)),
    Flatten(),
    Dense(8, activation = 'relu'),
    Dense(1, activation = 'sigmoid')
]);

#sgd optimizer
from keras.optimizers import SGD
opt = SGD(learning_rate = 0.01,
  nesterov = True,
  momentum = 0.99
)

#learning rate scheduler for improved speed
def scheduler(epoch, lr):
    if epoch < 3:
        return lr
    else:
        return lr * tf.math.exp(-0.1)

#callback to run learning rate scheduler
callback = LearningRateScheduler(scheduler)

#model compiler
model.compile(optimizer=opt, loss='binary_crossentropy', metrics=['accuracy'])

#fitting the model
model.fit(
    train_generator,
    validation_data = valid_generator,
    batch_size = batch_size,
    epochs = 10,
    callbacks = [callback],
)

Metal device set to: Apple M2


2023-02-13 13:57:03.957646: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:306] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2023-02-13 13:57:03.957665: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:272] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)


Epoch 1/10


2023-02-13 13:57:04.339657: W tensorflow/tsl/platform/profile_utils/cpu_utils.cc:128] Failed to get CPU frequency: 0 Hz
2023-02-13 13:57:04.578870: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.




2023-02-13 13:58:00.492976: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.


Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x28cfb7190>

In [None]:
data_generator_test = ImageDataGenerator(
    rescale = 1./255.,
)

#creating sequence of image generator
test_generator = data_generator_test.flow_from_dataframe(
    dataframe = test,
    directory ='test',
    x_col = 'id', 
    y_col = None,
    target_size = (96,96),         
    batch_size = 1,
    shuffle = False,
    class_mode = None
)


#prediction
predict = model.predict(
    x = test_generator
)

Found 57458 validated image filenames.
   54/57458 [..............................] - ETA: 1:49 

2023-02-13 14:08:58.143171: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.




In [None]:
print(predict)
print(sample)
print(predict.shape)

In [None]:
# predict = np.transpose(predict)[0]
submission_df = pd.DataFrame()
submission_df['id'] = test['id'].apply(lambda x: x.split('.')[0])
submission_df['label'] = list(map(lambda x: 0 if x < 0.5 else 1, predict))
submission_df.head()