In [1]:
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
import seaborn as sns
from os import listdir
from numpy import zeros
from numpy import asarray
from numpy import savez_compressed
from pandas import read_csv
from keras.preprocessing.image import load_img
from keras.preprocessing.image import img_to_array

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
df = pd.read_csv('emotics_annotations_clean_v4.csv')
df.head()

FileNotFoundError: ignored

In [None]:
# create a mapping of tags to integers given the loaded mapping file
def create_tag_mapping(mapping_csv):
	# create a set of all known tags
	labels = set()
	for i in range(len(mapping_csv)):
		# convert spaced separated tags into an array of tags
		tags = mapping_csv['categories'][i].split(',')
		# add tags to the set of known labels
		labels.update(tags)
	# convert set of labels to a list to list
	labels = list(labels)
	# order set alphabetically
	labels.sort()
	# dict that maps labels to integers, and the reverse
	labels_map = {labels[i]:i for i in range(len(labels))}
	inv_labels_map = {i:labels[i] for i in range(len(labels))}
	return labels_map, inv_labels_map

In [None]:
mapping, inv_mapping = create_tag_mapping(df)
print(len(mapping))
print(mapping)

In [None]:
# create a mapping of filename to tags
def create_file_mapping(mapping_csv):
	mapping = dict()
	for i in range(len(mapping_csv)):
		name, tags = mapping_csv['filename'][i], mapping_csv['categories'][i]
		mapping[name] = tags.split(',')
	return mapping

In [None]:
# create a one hot encoding for one list of tags
def one_hot_encode(tags, mapping):
	# create empty vector
	encoding = zeros(len(mapping), dtype='uint8')
	# mark 1 for each tag in the vector
	for tag in tags:
		encoding[mapping[tag]] = 1
	return encoding

In [2]:
# load all images into memory
def load_dataset(path, file_mapping, tag_mapping):
    photos, targets = list(), list()
    # enumerate files in the directory
    for f in listdir(path):
        f = path + f + '/images/'
        for filename in listdir(f):
            # load image
            photo = load_img(f + filename, target_size=(128,128))
            # convert to numpy array
            photo = img_to_array(photo, dtype='uint8')
            # get tags
            try:
                tags = file_mapping[filename]
            except:
                print(filename)
            # one hot encode tags
            target = one_hot_encode(tags, tag_mapping)
            # store
            photos.append(photo)
            targets.append(target)
    X = asarray(photos, dtype='uint8')
    y = asarray(targets, dtype='uint8')
    return X, y

In [None]:
# load the mapping file
filename = 'emotics_annotations_clean_v4.csv'
mapping_csv = pd.read_csv(filename)
# create a mapping of tags to integers
tag_mapping, _ = create_tag_mapping(mapping_csv)
# create a mapping of filenames to tag lists
file_mapping = create_file_mapping(mapping_csv)
# load the jpeg images
folder = 'raw_data/EMOTIC_DATASET/emotic/'
X, y = load_dataset(folder, file_mapping, tag_mapping)
print(X.shape, y.shape)
# save both arrays to one file in compressed format
savez_compressed('planet_data.npz', X, y)

In [None]:
# load prepared planet dataset
from numpy import load
data = load('drive/MyDrive/planet_data.npz')
X, y = data['arr_0'], data['arr_1']
print('Loaded: ', X.shape, y.shape)

Loaded:  (7749, 128, 128, 3) (7749, 26)


In [3]:
# load train and test dataset
def load_dataset():
	# load dataset
	data = load('drive/MyDrive/planet_data.npz')
	X, y = data['arr_0'], data['arr_1']
	# separate into train and test datasets
	trainX, testX, trainY, testY = train_test_split(X, y, test_size=0.3, random_state=1)
	print(trainX.shape, trainY.shape, testX.shape, testY.shape)
	return trainX, trainY, testX, testY

In [4]:
import tensorflow as tf
physical_devices = tf.config.list_physical_devices('GPU')
tf.config.experimental.set_memory_growth(physical_devices[0], enable=True)

In [3]:
# test f-beta score
from numpy import load
from numpy import ones
from numpy import asarray
from sklearn.model_selection import train_test_split
from sklearn.metrics import fbeta_score

# load train and test dataset
def load_dataset():
	# load dataset
	data = load('drive/MyDrive/planet_data.npz')
	X, y = data['arr_0'], data['arr_1']
	# separate into train and test datasets
	trainX, testX, trainY, testY = train_test_split(X, y, test_size=0.3, random_state=1)
	print(trainX.shape, trainY.shape, testX.shape, testY.shape)
	return trainX, trainY, testX, testY

# load dataset
trainX, trainY, testX, testY = load_dataset()
# make all one predictions
train_yhat = asarray([ones(trainY.shape[1]) for _ in range(trainY.shape[0])])
test_yhat = asarray([ones(testY.shape[1]) for _ in range(testY.shape[0])])
# evaluate predictions
train_score = fbeta_score(trainY, train_yhat, 2, average='samples')
test_score = fbeta_score(testY, test_yhat, 2, average='samples')
print('All Ones: train=%.3f, test=%.3f' % (train_score, test_score))

(5424, 128, 128, 3) (5424, 26) (2325, 128, 128, 3) (2325, 26)
All Ones: train=0.281, test=0.280


In [4]:
from keras import backend

# calculate fbeta score for multi-class/label classification
def fbeta(y_true, y_pred, beta=2):
	# clip predictions
	y_pred = backend.clip(y_pred, 0, 1)
	# calculate elements
	tp = backend.sum(backend.round(backend.clip(y_true * y_pred, 0, 1)), axis=1)
	fp = backend.sum(backend.round(backend.clip(y_pred - y_true, 0, 1)), axis=1)
	fn = backend.sum(backend.round(backend.clip(y_true - y_pred, 0, 1)), axis=1)
	# calculate precision
	p = tp / (tp + fp + backend.epsilon())
	# calculate recall
	r = tp / (tp + fn + backend.epsilon())
	# calculate fbeta, averaged across each class
	bb = beta ** 2
	fbeta_score = backend.mean((1 + bb) * (p * r) / (bb * p + r + backend.epsilon()))
	return fbeta_score

In [5]:
# load dataset
trainX, trainY, testX, testY = load_dataset()
# make all one predictions
train_yhat = asarray([ones(trainY.shape[1]) for _ in range(trainY.shape[0])])
test_yhat = asarray([ones(testY.shape[1]) for _ in range(testY.shape[0])])
# evaluate predictions with sklearn
train_score = fbeta_score(trainY, train_yhat, 2, average='samples')
test_score = fbeta_score(testY, test_yhat, 2, average='samples')
print('All Ones (sklearn): train=%.3f, test=%.3f' % (train_score, test_score))
# evaluate predictions with keras
train_score = fbeta(backend.variable(trainY), backend.variable(train_yhat))
test_score = fbeta(backend.variable(testY), backend.variable(test_yhat))
print('All Ones (keras): train=%.3f, test=%.3f' % (train_score, test_score))

(5424, 128, 128, 3) (5424, 26) (2325, 128, 128, 3) (2325, 26)
All Ones (sklearn): train=0.281, test=0.280
All Ones (keras): train=0.281, test=0.280


In [6]:
# baseline model for the planet dataset
import sys
from numpy import load
from matplotlib import pyplot
from sklearn.model_selection import train_test_split
from keras import backend
from keras.preprocessing.image import ImageDataGenerator
from keras.models import Sequential
from keras.layers import Conv2D
from keras.layers import MaxPooling2D
from keras.layers import Dense
from keras.layers import Flatten
from keras.layers import Dropout
from keras.optimizers import SGD

In [None]:
# define cnn model
def define_model(in_shape=(128, 128, 3), out_shape=26):
  model = Sequential()
  model.add(Conv2D(32, (3, 3), activation='relu', kernel_initializer='he_uniform', padding='same', input_shape=in_shape))
  model.add(Conv2D(32, (3, 3), activation='relu', kernel_initializer='he_uniform', padding='same'))
  model.add(MaxPooling2D((2, 2)))
  model.add(Conv2D(64, (3, 3), activation='relu', kernel_initializer='he_uniform', padding='same'))
  model.add(Conv2D(64, (3, 3), activation='relu', kernel_initializer='he_uniform', padding='same'))
  model.add(MaxPooling2D((2, 2)))
  model.add(Conv2D(128, (3, 3), activation='relu', kernel_initializer='he_uniform', padding='same'))
  model.add(Conv2D(128, (3, 3), activation='relu', kernel_initializer='he_uniform', padding='same'))
  model.add(MaxPooling2D((2, 2)))
  model.add(Flatten())
  model.add(Dense(128, activation='relu', kernel_initializer='he_uniform'))
  model.add(Dense(out_shape, activation='sigmoid'))
  # compile model
  opt = SGD(lr=0.01, momentum=0.9)
  model.compile(optimizer="adam", loss='binary_crossentropy', metrics=[fbeta])
  return model

In [7]:
# plot diagnostic learning curves
def summarize_diagnostics(history, name=""):
	# plot loss
	pyplot.subplot(211)
	pyplot.title('Cross Entropy Loss')
	pyplot.plot(history.history['loss'], color='blue', label='train')
	pyplot.plot(history.history['val_loss'], color='orange', label='test')
	# plot accuracy
	pyplot.subplot(212)
	pyplot.title('Fbeta')
	pyplot.plot(history.history['fbeta'], color='blue', label='train')
	pyplot.plot(history.history['val_fbeta'], color='orange', label='test')
	# save plot to file
	filename = sys.argv[0].split('/')[-1]
	pyplot.savefig(filename + '_' + name + '_plot.png')
	pyplot.close()

In [None]:
# run the test harness for evaluating a model
def run_test_harness():
  # load dataset
  trainX, trainY, testX, testY = load_dataset()
  # create data generator
  datagen = ImageDataGenerator(rescale=1.0/255.0,
                              featurewise_center=True,
                              featurewise_std_normalization=True,
                              rotation_range=20,
                              width_shift_range=0.2,
                              height_shift_range=0.2,
                              horizontal_flip=True,
                              validation_split=0.2)
  # prepare iterators
  train_it = datagen.flow(trainX, trainY, batch_size=128)
  test_it = datagen.flow(testX, testY, batch_size=128)
  # define model
  model = define_model()
  # fit model
  history = model.fit(train_it, steps_per_epoch=len(train_it),
    validation_data=test_it, validation_steps=len(test_it), epochs=20, batch_size=64)
  # evaluate model
  loss, fbeta = model.evaluate(test_it, steps=len(test_it), verbose=0)
  print('> loss=%.3f, fbeta=%.3f' % (loss, fbeta))
  # learning curves
  summarize_diagnostics(history)
  return model

In [None]:
model = run_test_harness()

KeyboardInterrupt: ignored

In [None]:
model.save("./score_3135.h5")

In [None]:
!zip -r score_3135.zip score_3135

  adding: score_3135/ (stored 0%)
  adding: score_3135/assets/ (stored 0%)
  adding: score_3135/keras_metadata.pb (deflated 93%)
  adding: score_3135/saved_model.pb (deflated 90%)
  adding: score_3135/variables/ (stored 0%)
  adding: score_3135/variables/variables.data-00000-of-00001 (deflated 25%)
  adding: score_3135/variables/variables.index (deflated 70%)


In [8]:
# define cnn model
def define_model(in_shape=(128, 128, 3), out_shape=26):
  model = Sequential()
  model.add(Conv2D(16, (3, 3), activation='relu', kernel_initializer='he_uniform', padding='same', input_shape=in_shape))
  model.add(Conv2D(16, (3, 3), activation='relu', kernel_initializer='he_uniform', padding='same'))
  model.add(MaxPooling2D((2, 2)))
  model.add(Dropout(0.2))
  model.add(Conv2D(32, (3, 3), activation='relu', kernel_initializer='he_uniform', padding='same'))
  model.add(Conv2D(32, (3, 3), activation='relu', kernel_initializer='he_uniform', padding='same'))
  model.add(MaxPooling2D((2, 2)))
  model.add(Dropout(0.2))
  model.add(Conv2D(64, (3, 3), activation='relu', kernel_initializer='he_uniform', padding='same'))
  model.add(Conv2D(64, (3, 3), activation='relu', kernel_initializer='he_uniform', padding='same'))
  model.add(MaxPooling2D((2, 2)))
  model.add(Dropout(0.2))
  model.add(Conv2D(128, (3, 3), activation='relu', kernel_initializer='he_uniform', padding='same'))
  model.add(Conv2D(128, (3, 3), activation='relu', kernel_initializer='he_uniform', padding='same'))
  model.add(MaxPooling2D((2, 2)))
  model.add(Dropout(0.2))
  model.add(Flatten())
  model.add(Dense(128, activation='relu', kernel_initializer='he_uniform'))
  model.add(Dense(out_shape, activation='sigmoid'))
  # compile model
  opt = SGD(learning_rate=0.03, momentum=0.9)
  model.compile(optimizer=opt, loss='binary_crossentropy', metrics=[fbeta])
  return model

In [16]:
# run the test harness for evaluating a model
def run_test_harness():
  # load dataset
  trainX, trainY, testX, testY = load_dataset()
  # create data generator
  datagen = ImageDataGenerator(rescale=1.0/255.0,
                              featurewise_center=True,
                              featurewise_std_normalization=True,
                              rotation_range=20,
                              width_shift_range=0.2,
                              height_shift_range=0.2,
                              horizontal_flip=True,
                              validation_split=0.2)
  # prepare iterators
  train_it = datagen.flow(trainX, trainY, batch_size=128)
  test_it = datagen.flow(testX, testY, batch_size=128)
  # define model
  model = define_model()
  # fit model
  history = model.fit(train_it, steps_per_epoch=len(train_it),
    validation_data=test_it, validation_steps=len(test_it), epochs=20,
    batch_size=64)
  # evaluate model
  loss, fbeta = model.evaluate(test_it, steps=len(test_it), verbose=0)
  print('> loss=%.3f, fbeta=%.3f' % (loss, fbeta))
  # learning curves
  summarize_diagnostics(history, str(int(fbeta*1000)))
  return model

In [17]:
model = run_test_harness()

(5424, 128, 128, 3) (5424, 26) (2325, 128, 128, 3) (2325, 26)




Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
> loss=0.259, fbeta=0.010


In [18]:
model.save("./score_0087.h5")
model.save("./score_0087")

INFO:tensorflow:Assets written to: ./score_0087/assets


In [19]:
!zip -r score_0087.zip score_0087

  adding: score_0087/ (stored 0%)
  adding: score_0087/assets/ (stored 0%)
  adding: score_0087/keras_metadata.pb (deflated 94%)
  adding: score_0087/saved_model.pb (deflated 90%)
  adding: score_0087/variables/ (stored 0%)
  adding: score_0087/variables/variables.data-00000-of-00001 (deflated 20%)
  adding: score_0087/variables/variables.index (deflated 71%)


In [24]:
trainX.shape

(5424, 128, 128, 3)

In [9]:
# run the test harness for evaluating a model
def run_test_harness():
  # load dataset
  trainX, trainY, testX, testY = load_dataset()
  # create data generator
  datagen = ImageDataGenerator(rescale=1.0/255.0,
                              featurewise_center=True,
                              featurewise_std_normalization=True,
                              rotation_range=20,
                              width_shift_range=0.2,
                              height_shift_range=0.2,
                              horizontal_flip=True)
  # prepare iterators
  train_it = datagen.flow(trainX, trainY, batch_size=32)
  test_it = datagen.flow(testX, testY, batch_size=32)
  # define model
  model = define_model()
  # fit model
  history = model.fit(train_it, steps_per_epoch=165,
    validation_data=test_it, epochs=80,
    batch_size=32,shuffle=True)
  # evaluate model
  loss, fbeta = model.evaluate(test_it, steps=len(test_it), verbose=0)
  print('> loss=%.3f, fbeta=%.3f' % (loss, fbeta))
  # learning curves
  summarize_diagnostics(history, str(int(fbeta*1000)))
  return model

In [10]:
model = run_test_harness()

(5424, 128, 128, 3) (5424, 26) (2325, 128, 128, 3) (2325, 26)




Epoch 1/80
Epoch 2/80
Epoch 3/80
Epoch 4/80
Epoch 5/80
Epoch 6/80
Epoch 7/80
Epoch 8/80
Epoch 9/80
Epoch 10/80
Epoch 11/80
Epoch 12/80
Epoch 13/80
Epoch 14/80
Epoch 15/80
Epoch 16/80
Epoch 17/80
Epoch 18/80
Epoch 19/80
Epoch 20/80
Epoch 21/80
Epoch 22/80
Epoch 23/80
Epoch 24/80
Epoch 25/80
Epoch 26/80
Epoch 27/80
Epoch 28/80
Epoch 29/80
Epoch 30/80
Epoch 31/80
Epoch 32/80
Epoch 33/80
Epoch 34/80
Epoch 35/80
Epoch 36/80
Epoch 37/80
Epoch 38/80
Epoch 39/80
Epoch 40/80
Epoch 41/80
Epoch 42/80
Epoch 43/80
Epoch 44/80
Epoch 45/80
Epoch 46/80
Epoch 47/80
Epoch 48/80
Epoch 49/80
Epoch 50/80
Epoch 51/80
Epoch 52/80
Epoch 53/80
Epoch 54/80
Epoch 55/80
Epoch 56/80
Epoch 57/80
Epoch 58/80
Epoch 59/80
Epoch 60/80
Epoch 61/80
Epoch 62/80
Epoch 63/80
Epoch 64/80
Epoch 65/80
Epoch 66/80
Epoch 67/80
Epoch 68/80
Epoch 69/80
Epoch 70/80
Epoch 71/80
Epoch 72/80
Epoch 73/80
Epoch 74/80
Epoch 75/80
Epoch 76/80
Epoch 77/80
Epoch 78/80
Epoch 79/80
Epoch 80/80
> loss=0.215, fbeta=0.325


In [11]:
model.save("./score_3249.h5")
model.save("./score_3249")

INFO:tensorflow:Assets written to: ./score_3249/assets


In [12]:
!zip -r score_3249.zip score_3249

  adding: score_3249/ (stored 0%)
  adding: score_3249/assets/ (stored 0%)
  adding: score_3249/keras_metadata.pb (deflated 94%)
  adding: score_3249/saved_model.pb (deflated 89%)
  adding: score_3249/variables/ (stored 0%)
  adding: score_3249/variables/variables.data-00000-of-00001 (deflated 18%)
  adding: score_3249/variables/variables.index (deflated 71%)
