In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import pickle

from sklearn.preprocessing import LabelEncoder
from keras.utils import to_categorical
from sklearn.model_selection import train_test_split

from keras.layers import Dense
from sklearn.metrics import confusion_matrix, classification_report,accuracy_score, precision_score, recall_score, f1_score

from tensorflow.keras.layers import Input, Dense, Add, Dropout, BatchNormalization
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam

##### Loading data

In [None]:
trainX_path = '/content/drive/MyDrive/trainX.pkl'
trainy_path = '/content/drive/MyDrive/trainy.pkl'
testX_path = '/content/drive/MyDrive/testX.pkl'
testy_path = '/content/drive/MyDrive/testy.pkl'

def load_data(trainX_path, trainy_path, testX_path, testy_path):
    with open(trainX_path, 'rb') as file:
        X_train = pickle.load(file)
    with open(trainy_path, 'rb') as file:
        y_train = pickle.load(file)
    with open(testX_path, 'rb') as file:
        X_test = pickle.load(file)
    with open(testy_path, 'rb') as file:
        y_test = pickle.load(file)
    
    return X_train, y_train, X_test, y_test

X_train, y_train, X_test, y_test = load_data(trainX_path, trainy_path, testX_path, testy_path)

In [None]:
print(len(X_train))
print(len(y_train))
print(len(X_test))
print(len(y_test))

##### Labeling and data division

In [None]:
valid_labels = ['Emilia', 'Joanna', 'Kacper', 'Karolina', 'Mirek', 'Sylwia', 'Łukasz', 'Mariusz', 'Ania']
filtered_train_indices = [i for i, label in enumerate(y_train) if label in valid_labels]
filtered_test_indices = [i for i, label in enumerate(y_test) if label in valid_labels]

filtered_trainX = np.asarray(X_train)[filtered_train_indices]
filtered_testX = np.asarray(X_test)[filtered_test_indices]

filtered_trainy = np.asarray(y_train)[filtered_train_indices]
filtered_testy = np.asarray(y_test)[filtered_test_indices]

label_encoder = LabelEncoder()
label_encoder.classes_ = np.array(valid_labels)

filtered_trainy_encoded = label_encoder.transform(filtered_trainy)
filtered_testy_encoded = label_encoder.transform(filtered_testy)

label_to_index = {label: idx for idx, label in enumerate(valid_labels)}

filtered_trainy_encoded = np.array([label_to_index[label] for label in filtered_trainy])
filtered_testy_encoded = np.array([label_to_index[label] for label in filtered_testy])

num_classes = len(valid_labels)
print(num_classes)
filtered_trainy_one_hot = to_categorical(filtered_trainy_encoded, num_classes)
filtered_testy_one_hot = to_categorical(filtered_testy_encoded, num_classes)

trainX, valX, trainy, valy = train_test_split(filtered_trainX, filtered_trainy_one_hot, test_size=0.2, random_state=42)

##### Architecture and model training

In [None]:
def residual_block(x, units, dropout_rate=0.3):
    shortcut = x
    x = Dense(units, activation='relu')(x)
    x = BatchNormalization()(x)
    x = Dropout(dropout_rate)(x)
    x = Dense(units, activation=None)(x)
    x = Add()([x, shortcut])
    x = BatchNormalization()(x)
    x = Dropout(dropout_rate)(x)
    x = Dense(units, activation='relu')(x)
    return x

inputs = Input(shape=(512,))
x = Dense(512, activation='relu')(inputs)
x = BatchNormalization()(x)

x = residual_block(x, 512)
x = residual_block(x, 512)
x = residual_block(x, 512)

x = Dense(256, activation='relu')(x)
x = Dropout(0.3)(x)
outputs = Dense(num_classes, activation='softmax')(x)

model = Model(inputs, outputs)

model.compile(optimizer=Adam(learning_rate=0.001),
              loss='categorical_crossentropy',
              metrics=['accuracy'])

epochs = 10
batch_size = 32

history = model.fit(trainX, trainy, epochs=epochs, batch_size=batch_size, validation_data=(valX, valy))

In [None]:
test_loss, test_accuracy = model.evaluate(filtered_testX, filtered_testy_one_hot)
print(f'Test Loss: {test_loss}, Test Accuracy: {test_accuracy}')

y_pred = np.argmax(model.predict(filtered_testX), axis=1)
y_true = np.argmax(filtered_testy_one_hot, axis=1)

conf_matrix = confusion_matrix(y_true, y_pred, labels=np.arange(num_classes))
plt.figure(figsize=(10, 8))
sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues', xticklabels=valid_labels, yticklabels=valid_labels)
plt.xlabel('Predicted')
plt.ylabel('True')
plt.title('Confusion Matrix')
plt.show()

In [8]:
def plot_confusion_matrix(y_true, y_pred, labels):
    '''
    Plot confusion matrix
    '''
    conf_matrix = confusion_matrix(y_true, y_pred, labels=np.arange(len(labels)))
    plt.figure(figsize=(10, 8))
    sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues', xticklabels=labels, yticklabels=labels)
    plt.xlabel('Predicted Class')
    plt.ylabel('True Class')
    plt.show()

def print_metrics(y_true, y_pred, dataset_name):
    '''
    Print accuracy, precision, recall and F1 score
    '''
    accuracy = accuracy_score(y_true, y_pred)
    precision = precision_score(y_true, y_pred, average='weighted')
    recall = recall_score(y_true, y_pred, average='weighted')
    f1 = f1_score(y_true, y_pred, average='weighted')

    print(f"\nMetrics for {dataset_name}:")
    print(f"Accuracy: {accuracy:.4f}")
    print(f"Precision: {precision:.4f}")
    print(f"Recall: {recall:.4f}")
    print(f"F1 Score: {f1:.4f}")

def plot_training_history(history):
    '''
    Plot training and validation loss and accuracy
    '''
    plt.figure(figsize=(12, 5))

    plt.subplot(1, 2, 1)
    plt.plot(history.history['loss'], label='Training Loss')
    plt.plot(history.history['val_loss'], label='Validation Loss')
    plt.xlabel('Epochs')
    plt.ylabel('Loss')
    plt.title('Training and Validation Loss')
    plt.legend()

    plt.subplot(1, 2, 2)
    plt.plot(history.history['accuracy'], label='Training Accuracy')
    plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
    plt.xlabel('Epochs')
    plt.ylabel('Accuracy')
    plt.title('Training and Validation Accuracy')
    plt.legend()

    plt.tight_layout()
    plt.show()

In [None]:
class_report = classification_report(y_true, y_pred, target_names=valid_labels)
print("Classification Report:\n", class_report)

y_train_pred = np.argmax(model.predict(trainX), axis=1)
y_train_true = np.argmax(trainy, axis=1)

y_val_pred = np.argmax(model.predict(valX), axis=1)
y_val_true = np.argmax(valy, axis=1)

y_test_pred = np.argmax(model.predict(filtered_testX), axis=1)
y_test_true = np.argmax(filtered_testy_one_hot, axis=1)

print('Confusion Matrix - Training Set')
plot_confusion_matrix(y_train_true, y_train_pred, valid_labels)
print_metrics(y_train_true, y_train_pred, 'Training Set')

print('Confusion Matrix - Validation Set')
plot_confusion_matrix(y_val_true, y_val_pred, valid_labels)
print_metrics(y_val_true, y_val_pred, 'Validation Set')

print('Confusion Matrix - Test Set')
plot_confusion_matrix(y_test_true, y_test_pred, valid_labels)
print_metrics(y_test_true, y_test_pred, 'Test Set')

class_report = classification_report(y_test_true, y_test_pred, target_names=valid_labels)
print("\nClassification Report for Test Set:\n", class_report)

In [None]:
plot_training_history(history)

In [None]:
model.save("/content/drive/MyDrive/model.h5")