In [6]:
import pandas as pd
import numpy as np
import os
import matplotlib.pyplot as plt
import seaborn as sns
import time
import cv2
from sklearn.metrics import confusion_matrix, classification_report, f1_score
from sklearn.model_selection import train_test_split
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.layers import Dense, Activation, Dropout, Conv2D, MaxPooling2D, BatchNormalization, GlobalAveragePooling2D
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.models import Model
from tensorflow.keras.applications import EfficientNetB3
from tqdm import tqdm
from google.colab import drive

drive.mount('/content/drive')
base_dir = '/content/drive/MyDrive/data'
img_size = (240, 310)
batch_size = 20

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [7]:
def print_in_color(txt_msg, fore_tupple=(0,255,255), back_tupple=(100,100,100)):
    rf, gf, bf = fore_tupple
    rb, gb, bb = back_tupple
    msg = '{0}' + txt_msg
    mat = '\33[38;2;' + str(rf) + ';' + str(gf) + ';' + str(bf) + ';48;2;' + str(rb) + ';' + str(gb) + ';' + str(bb) + 'm'
    print(msg.format(mat), flush=True)
    print('\33[0m', flush=True)

In [8]:
def make_dataframes(sdir):
    filepaths = []
    labels = []
    classlist = sorted(os.listdir(sdir))
    for klass in classlist:
        classpath = os.path.join(sdir, klass)
        if os.path.isdir(classpath):
            flist = sorted(os.listdir(classpath))
            desc = f'{klass:25s}'
            for f in tqdm(flist, ncols=130, desc=desc, unit='files', colour='blue'):
                fpath = os.path.join(classpath, f)
                filepaths.append(fpath)
                labels.append(klass)
    Fseries = pd.Series(filepaths, name='filepaths')
    Lseries = pd.Series(labels, name='labels')
    df = pd.concat([Fseries, Lseries], axis=1)
    train_df, dummy_df = train_test_split(df, train_size=.7, shuffle=True, random_state=123, stratify=df['labels'])
    valid_df, test_df = train_test_split(dummy_df, train_size=.5, shuffle=True, random_state=123, stratify=dummy_df['labels'])
    print('train_df length: ', len(train_df), '  test_df length: ', len(test_df), '  valid_df length: ', len(valid_df))
    return train_df, test_df, valid_df

In [9]:
train_df, test_df, valid_df = make_dataframes(base_dir)
train_datagen = ImageDataGenerator(rescale=1./255)
valid_test_datagen = ImageDataGenerator(rescale=1./255)

train_gen = train_datagen.flow_from_dataframe(
    train_df, x_col='filepaths', y_col='labels', target_size=img_size,
    class_mode='categorical', color_mode='rgb', shuffle=True, batch_size=batch_size
)

valid_gen = valid_test_datagen.flow_from_dataframe(
    valid_df, x_col='filepaths', y_col='labels', target_size=img_size,
    class_mode='categorical', color_mode='rgb', shuffle=False, batch_size=batch_size
)

test_gen = valid_test_datagen.flow_from_dataframe(
    test_df, x_col='filepaths', y_col='labels', target_size=img_size,
    class_mode='categorical', color_mode='rgb', shuffle=False, batch_size=batch_size
)

drive                    : 100%|[34m███████████████████████████████████████████████████████[0m| 1224/1224 [00:00<00:00, 687351.47files/s][0m
legglance-flick          : 100%|[34m███████████████████████████████████████████████████████[0m| 1120/1120 [00:00<00:00, 531104.63files/s][0m
pullshot                 : 100%|[34m███████████████████████████████████████████████████████[0m| 1260/1260 [00:00<00:00, 662590.65files/s][0m
sweep                    : 100%|[34m███████████████████████████████████████████████████████[0m| 1174/1174 [00:00<00:00, 646803.22files/s][0m


train_df length:  3344   test_df length:  717   valid_df length:  717
Found 3343 validated image filenames belonging to 4 classes.
Found 717 validated image filenames belonging to 4 classes.




Found 717 validated image filenames belonging to 4 classes.


In [10]:
def make_model(img_size, num_classes, lr=0.001):
    base_model = EfficientNetB3(include_top=False, weights="imagenet", input_shape=(*img_size, 3), pooling='max')
    base_model.trainable = True
    x = base_model.output
    x = BatchNormalization(axis=-1, momentum=0.99, epsilon=0.001)(x)
    x = Dense(256, kernel_regularizer=keras.regularizers.l2(0.016),
              activity_regularizer=keras.regularizers.l1(0.006),
              bias_regularizer=keras.regularizers.l1(0.006), activation='relu')(x)
    x = Dropout(rate=.4, seed=123)(x)
    output = Dense(num_classes, activation='softmax')(x)
    model = Model(inputs=base_model.input, outputs=output)
    model.compile(Adam(learning_rate=lr), loss='categorical_crossentropy', metrics=['accuracy'])
    return model

In [11]:
class LR_ASK(keras.callbacks.Callback):
    def __init__(self, model, epochs, ask_epoch, dwell=True, factor=.4):
        super(LR_ASK, self).__init__()
        self.model = model
        self.ask_epoch = ask_epoch
        self.epochs = epochs
        self.ask = True
        self.dwell = dwell
        self.factor = factor
        self.lowest_vloss = np.inf
        self.best_weights = self.model.get_weights()
        self.best_epoch = 1

    def on_epoch_end(self, epoch, logs=None):
        vloss = logs.get('val_loss')
        if vloss < self.lowest_vloss:
            self.lowest_vloss = vloss
            self.best_weights = self.model.get_weights()
            self.best_epoch = epoch + 1
            print_in_color(f'\nValidation loss of {vloss:.4f} is below lowest loss, saving weights from epoch {self.best_epoch}', (0,255,0))
        else:
            print_in_color(f'\nValidation loss of {vloss:.4f} is above lowest loss of {self.lowest_vloss:.4f}, keeping weights from epoch {self.best_epoch}', (255,255,0))
            if self.dwell:
                lr = float(tf.keras.backend.get_value(self.model.optimizer.lr))
                new_lr = lr * self.factor
                print_in_color(f'Learning rate adjusted from {lr:.6f} to {new_lr:.6f}, model weights set to best weights', (0,255,255))
                tf.keras.backend.set_value(self.model.optimizer.lr, new_lr)
                self.model.set_weights(self.best_weights)

        if self.ask and epoch + 1 == self.ask_epoch:
            print_in_color('\nEnter H to end training or an integer for the number of additional epochs to run then ask again')
            ans = input()
            if ans.lower() == 'h':
                self.model.stop_training = True
            else:
                self.ask_epoch += int(ans)

    def on_train_end(self, logs=None):
        self.model.set_weights(self.best_weights)

In [None]:
num_classes = len(train_gen.class_indices)
model = make_model(img_size, num_classes)

epochs = 20
ask_epoch = 30 #so the callback doesnt trigger
ask = LR_ASK(model, epochs, ask_epoch)
callbacks = [ask]

history = model.fit(
    train_gen, epochs=epochs, verbose=1, callbacks=callbacks, validation_data=valid_gen,
    validation_steps=None, shuffle=False, initial_epoch=0
)

Epoch 1/20

In [None]:
def tr_plot(tr_data, start_epoch):
    tacc = tr_data.history['accuracy']
    tloss = tr_data.history['loss']
    vacc = tr_data.history['val_accuracy']
    vloss = tr_data.history['val_loss']
    Epoch_count = len(tacc) + start_epoch
    Epochs = [i for i in range(start_epoch, Epoch_count)]
    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(20,8))
    ax1.plot(Epochs, tloss, 'r', label='Training loss')
    ax1.plot(Epochs, vloss, 'g', label='Validation loss')
    ax1.set_title('Training and Validation Loss')
    ax1.set_xlabel('Epochs')
    ax1.set_ylabel('Loss')
    ax1.legend()
    ax2.plot(Epochs, tacc, 'r', label='Training Accuracy')
    ax2.plot(Epochs, vacc, 'g', label='Validation Accuracy')
    ax2.set_title('Training and Validation Accuracy')
    ax2.set_xlabel('Epochs')
    ax2.set_ylabel('Accuracy')
    ax2.legend()

    plt.tight_layout()
    plt.show()

tr_plot(history, 0)

In [None]:
def predictor(test_gen):
    y_pred = []
    y_true = test_gen.labels
    classes = list(test_gen.class_indices.keys())
    class_count = len(classes)
    errors = 0
    preds = model.predict(test_gen, verbose=1)
    tests = len(preds)
    for i, p in enumerate(preds):
        pred_index = np.argmax(p)
        true_index = test_gen.labels[i]
        if pred_index != true_index:
            errors += 1
        y_pred.append(pred_index)

    acc = (1 - errors / tests) * 100
    print_in_color(f'There were {errors} errors in {tests} tests for an accuracy of {acc:.2f}%', (0,255,255))

    y_pred = np.array(y_pred)
    y_true = np.array(y_true)
    f1score = f1_score(y_true, y_pred, average='weighted') * 100

    if class_count <= 30:
        cm = confusion_matrix(y_true, y_pred)
        plt.figure(figsize=(12, 8))
        sns.heatmap(cm, annot=True, vmin=0, fmt='g', cmap='Blues', cbar=False)
        plt.xticks(np.arange(class_count)+.5, classes, rotation=90)
        plt.yticks(np.arange(class_count)+.5, classes, rotation=0)
        plt.xlabel("Predicted")
        plt.ylabel("Actual")
        plt.title("Confusion Matrix")
        plt.show()

    clr = classification_report(y_true, y_pred, target_names=classes, digits=4)
    print("Classification Report:\n----------------------\n", clr)
    return errors, tests, f1score

In [None]:
errors, tests, f1score = predictor(test_gen)
name = f'CRICKET-{len(train_gen.class_indices)}-({img_size[0]}x{img_size[1]})'
save_id = f'{name}-{f1score:.2f}.h5'
model_save_loc = os.path.join('/content/drive/MyDrive', save_id)
model.save(model_save_loc)
print_in_color(f'Model was saved as {model_save_loc}', (0,255,255))