#### Imported Libraries:

In [29]:
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import numpy as np
import pandas as pd
import cv2
import os

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.utils import array_to_img, img_to_array, load_img, to_categorical
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint, CSVLogger
from scikeras.wrappers import KerasClassifier

import random

from sklearn.model_selection import train_test_split

from sklearn.model_selection import GridSearchCV

import types



##### constants:

In [30]:
img_height = 100
img_width = 100

#### Load Data

In [31]:
def resize_without_squeezing(image, target_size):
    h, w = image.shape[:2]
    aspect_ratio = w / h

    # Calculate new size while preserving aspect ratio
    if aspect_ratio > 1:
        new_w = target_size
        new_h = int(target_size / aspect_ratio)
    else:
        new_h = target_size
        new_w = int(target_size * aspect_ratio)

    # Resize the image
    resized_img = cv2.resize(image, (new_w, new_h))

    # Create a canvas with the target size and fill with padding color
    canvas = np.full((target_size, target_size, 3), (255,255,255), dtype=np.uint8)

    # Calculate the position to paste the resized image in the center
    y_offset = (target_size - new_h) // 2
    x_offset = (target_size - new_w) // 2

    # Paste the resized image onto the canvas
    canvas[y_offset:y_offset + new_h, x_offset:x_offset + new_w] = resized_img

    return canvas

In [1]:
'''
img = cv2.imread("../data/dataset/salamba sirsasana/55-0.png")
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)  # Convert to RGB
#img = cv2.resize(img, (100, 100))
plt.imshow(img)
plt.show()
'''

'\nimg = cv2.imread("../data/dataset/salamba sirsasana/55-0.png")\nimg = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)  # Convert to RGB\n#img = cv2.resize(img, (100, 100))\nplt.imshow(img)\nplt.show()\n'

In [2]:
'''
img = cv2.imread("../data/dataset/salamba sirsasana/55-0.png")
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)  # Convert to RGB
img = resize_without_squeezing(img,img_height)
plt.imshow(img)
plt.show()
'''

'\nimg = cv2.imread("../data/dataset/salamba sirsasana/55-0.png")\nimg = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)  # Convert to RGB\nimg = resize_without_squeezing(img,img_height)\nplt.imshow(img)\nplt.show()\n'

In [32]:
def get_folders_in_directory(directory_path):
    # Get the list of all files and folders in the specified directory
    items = os.listdir(directory_path)
    # Filter out only the folders from the list
    folders = [item for item in items if os.path.isdir(os.path.join(directory_path, item))]
    return folders
    
def load_images_from_folder(folder):
    # Function to load and preprocess images from a folder
    images = []
    for filename in os.listdir(folder):
        img = cv2.imread(os.path.join(folder, filename))
        if img is not None:
            img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)  # Convert to RGB
            img = resize_without_squeezing(img,img_height)
            #img = cv2.resize(img, (img_height, img_width))
            images.append(img)
    return images



base_dir = '../data/dataset'
labels_name = get_folders_in_directory(base_dir) # get the list of folder names
labels_dict = {label:i for i,label in enumerate(labels_name)} # assign an int for each folder name = asana

# Get all images and corresp labels
all_images=[]
all_labels=[]
for label in labels_name:
    new_images = load_images_from_folder(base_dir+'/'+label)
    all_images = all_images + new_images
    for i in range(len(new_images)):
        all_labels.append(labels_dict[label])

# Ensure that all_image_paths and all_labels are numpy arrays for easier manipulation
images = np.array(all_images)/255 # normalize images to the range [0-1]
labels = np.array(all_labels)

# Shuffle indices
indices = np.arange(len(images))
np.random.shuffle(indices)

# Use the shuffled indices to reorder X and y
images_shuffled = images[indices]
labels_shuffled = labels[indices]

images = images_shuffled
labels = labels_shuffled

print("images data shape: ", images.shape)
print("labels data shape: ", labels.shape)

images data shape:  (5992, 100, 100, 3)
labels data shape:  (5992,)


In [3]:
'''
img = images[700]
plt.imshow(img)
plt.show()
'''

'\nimg = images[700]\nplt.imshow(img)\nplt.show()\n'

#### Split Data into train, validation, and test sets

In [8]:
# SPLITTING 1
'''
# Split the dataset into training and temporary sets (combined validation and test)
train_data, temp_data, train_labels, temp_labels = train_test_split(
    images, labels, test_size=0.3, random_state=42
)

# Split the temporary set into validation and test sets
val_data, test_data, val_labels, test_labels = train_test_split(
    temp_data, temp_labels, test_size=0.5, random_state=42
)
'''

In [5]:
# SPLITTING 2
# Split the dataset into training and test sets
train_data, test_data, train_labels, test_labels = train_test_split(
    images, labels, test_size=0.2, random_state=42
)

In [6]:
from tensorflow.keras.utils import to_categorical

# For multi-class classification
train_labels_one_hot = to_categorical(train_labels, num_classes=107)
#val_labels_one_hot = to_categorical(val_labels, num_classes=107)
test_labels_one_hot = to_categorical(test_labels, num_classes=107)

#### Inicialize ImageDataGenerator

In [33]:
'''
# Create an ImageDataGenerator for data augmentation
def add_noise(img):
    #Add random noise to an image
    VARIABILITY = 50
    deviation = VARIABILITY*random.random()
    noise = np.random.normal(0, deviation, img.shape)
    new_img = img + noise
    np.clip(new_img, 0., 255.)
    return new_img
'''

# Initialising the ImageDataGenerator class.
# We will pass in the augmentation parameters in the constructor.
datagen = ImageDataGenerator(
        rotation_range = 30,
        width_shift_range=0.2,
        height_shift_range=0.2,
        shear_range=0.2,
        zoom_range=0.2,
        horizontal_flip = True
        #preprocessing_function=add_noise
)

### Create Model

In [34]:
def compile_model(model, optimizer, metrics):
    model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=metrics)
    return model

##### CNN

In [35]:
def create_CNN():
    model = keras.Sequential([
        layers.Conv2D(16, (3, 3), padding='same', activation='relu'),
        layers.MaxPooling2D(2, 2),
        layers.Dropout(0.2), #to avoid overfitting
        
        layers.Conv2D(32, (3, 3), padding='same',  activation='relu'),
        layers.MaxPooling2D(2, 2),
        layers.Dropout(0.2),
        
        layers.Conv2D(64, (3, 3), padding='same', activation='relu'),
        layers.MaxPooling2D(2, 2),
        layers.Dropout(0.2),
        
        layers.Flatten(),
        #layers.Dense(512, activation='relu'), #prova anche 256 e 1024
        layers.Dropout(0.5),
        layers.Dense(107, activation='softmax') 
    ])
    
    return model

##### ResNet

In [11]:
class ResidualBlock(tf.keras.layers.Layer):
    def __init__(self, filters, kernel_size=3, stride=1):
        super(ResidualBlock, self).__init__()
        self.conv1 = layers.Conv2D(filters, kernel_size, strides=stride, padding='same', use_bias=False)
        self.bn1 = layers.BatchNormalization()
        self.relu = layers.Activation('relu')
        self.dropout = layers.Dropout(0.25)
        self.conv2 = layers.Conv2D(filters, kernel_size, strides=1, padding='same', use_bias=False)
        self.bn2 = layers.BatchNormalization()
        self.downsample = None

        # Adding a shortcut/skip connection if dimensions change
        if stride != 1:
            self.downsample = tf.keras.Sequential([
                layers.Conv2D(filters, 1, strides=stride, use_bias=False),
                layers.BatchNormalization()
            ])

    def call(self, inputs, training=False):
        residual = inputs

        x = self.conv1(inputs)
        x = self.bn1(x, training=training)
        x = self.relu(x)

        x = self.conv2(x)
        x = self.bn2(x, training=training)

        # Adding the skip connection if present
        if self.downsample is not None:
            residual = self.downsample(residual)

        x += residual
        x = self.relu(x)

        return x

class ResNet(tf.keras.Model):
    def __init__(self, num_classes):
        super(ResNet, self).__init__()
        self.conv1 = layers.Conv2D(64, 3, strides=2, padding='same', use_bias=False)
        self.bn1 = layers.BatchNormalization()
        self.relu = layers.Activation('relu')
        self.pool1 = layers.MaxPooling2D(pool_size=3, strides=2, padding='same')

        self.block1 = ResidualBlock(64)
        self.block2 = ResidualBlock(64)
        

        self.global_avg_pooling = layers.GlobalAveragePooling2D()
        self.fc = layers.Dense(num_classes, activation='softmax')

    
    def call(self, inputs, training=False):
        x = self.conv1(inputs)
        x = self.bn1(x, training=training)
        x = self.relu(x)
        x = self.pool1(x)

        x = self.block1(x, training=training)
        x = self.block2(x, training=training)
        # Add more blocks as needed...

        x = self.global_avg_pooling(x)
        output = self.fc(x)

        return output





In [16]:
# Create an instance of the ResNet model
num_classes = 107  # Set the number of classes based on your task
resnet_model = ResNet(num_classes)

##### DenseNet121

In [36]:
def create_DN():
    ptrain_model = tf.keras.applications.DenseNet121(input_shape=(img_height,img_width,3),
                                                      include_top=False,
                                                      weights='imagenet',
                                                      pooling='avg')
    ptrain_model.trainable = False
    
    inputs = ptrain_model.input
    
    drop_layer = tf.keras.layers.Dropout(0.25)(ptrain_model.output)
    x_layer = tf.keras.layers.Dense(512, activation='relu')(drop_layer)
    x_layer1 = tf.keras.layers.Dense(128, activation='relu')(x_layer)
    drop_layer1 = tf.keras.layers.Dropout(0.20)(x_layer1)
    outputs = tf.keras.layers.Dense(107, activation='softmax')(drop_layer1)
    model = tf.keras.Model(inputs=inputs, outputs=outputs)
                  
    return model 

##### Other

- Popular CNN architectures include:

AlexNet <br>
VGGNet <br>
GoogLeNet (Inception) <br>
ResNet <br>
MobileNet

- Popular pre-trained models include: (Consider using pre-trained models like InceptionV3, ResNet50, or MobileNetV2 and fine-tuning them on your dataset.)

Inception <br>V3
ResNe <br>t50
MobileN <br>etV2
Xce <br>ption
Effici <br>entNet

### Compile Model

In [24]:
initial_learning_rate = 0.001
lr_schedule = keras.optimizers.schedules.ExponentialDecay(
    initial_learning_rate, decay_steps=10000, decay_rate=0.9, staircase=True
)

optimizer = keras.optimizers.Adam(learning_rate=lr_schedule)
# possible optimizer : rmsprop, adam, SGD

# Compile the model
model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'])
#resnet_model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'])

### Train Model

In [None]:
# Train the model with data augmentation
batch_size = 16 # number of training examples utilized in one iteration
num_epochs = 10 # number of iteration
es = EarlyStopping(monitor='val_accuracy', mode='max', min_delta=1, verbose=1, patience = 4)

# Use flow for the training set
train_datagen = datagen.flow(train_data, train_labels_one_hot, batch_size=batch_size)
# Use flow for the validation set
#val_datagen = datagen.flow(val_data, val_labels_one_hot, batch_size=batch_size)

# Train the model with data augmentation
history = model.fit(train_datagen, epochs=num_epochs, validation_split=0.3, callback=[es])


# Evaluate the model on the test set
test_loss, test_acc = model.evaluate(test_data, test_labels_one_hot)
print(f'Test accuracy: {test_acc}')

plt.plot(history.history['accuracy'],label='train')
plt.plot(history.history['val_accuracy'], labek='test')
plt.show

In [31]:
#model.summary()

#### CNN

In [12]:
batch_size = 32
num_epochs = 30
# image_size = 100x100
# no noise added
# optimizer: adam
# without layers.Dense(512, activation='relu')

train_datagen = datagen.flow(train_data, train_labels_one_hot, batch_size=batch_size)
val_datagen = datagen.flow(val_data, val_labels_one_hot, batch_size=batch_size)
history = model.fit(train_datagen, epochs=num_epochs, validation_data=val_datagen)
test_loss, test_acc = model.evaluate(test_data, test_labels_one_hot)
print(f'Test accuracy: {test_acc}')

Epoch 1/30


Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
Test accuracy: 0.2849999964237213


In [24]:
batch_size = 32
num_epochs = 30
# image_size = 100x100
# no noise added
# optimizer: rmsprop
# without layers.Dense(512, activation='relu')

train_datagen = datagen.flow(train_data, train_labels_one_hot, batch_size=batch_size)
val_datagen = datagen.flow(val_data, val_labels_one_hot, batch_size=batch_size)
history = model.fit(train_datagen, epochs=num_epochs, validation_data=val_datagen)

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


In [25]:
test_loss, test_acc = model.evaluate(test_data, test_labels_one_hot)
print(f'Test accuracy: {test_acc}')

Test accuracy: 0.28333333134651184


#### RESNET

In [18]:
batch_size = 32
num_epochs = 30 
# image_size = 100x100
# no noise added
# optimizer: adam

train_datagen = datagen.flow(train_data, train_labels_one_hot, batch_size=batch_size)
val_datagen = datagen.flow(val_data, val_labels_one_hot, batch_size=batch_size)
history = resnet_model.fit(train_datagen, epochs=num_epochs, validation_data=val_datagen)
test_loss, test_acc = resnet_model.evaluate(test_data, test_labels_one_hot)
print(f'Test accuracy: {test_acc}')

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
Test accuracy: 0.15000000596046448


In [13]:
batch_size = 32
num_epochs = 30 
# image_size = 100x100
# no noise added
# optimizer: rmsprop

train_datagen = datagen.flow(train_data, train_labels_one_hot, batch_size=batch_size)
val_datagen = datagen.flow(val_data, val_labels_one_hot, batch_size=batch_size)
history = resnet_model.fit(train_datagen, epochs=num_epochs, validation_data=val_datagen)
test_loss, test_acc = resnet_model.evaluate(test_data, test_labels_one_hot)
print(f'Test accuracy: {test_acc}')

Epoch 1/30


Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
Test accuracy: 0.07333333045244217


#### DenseSet

In [25]:
batch_size = 32
num_epochs = 30 
# image_size = 100x100
# no noise added
# optimizer: adam

train_datagen = datagen.flow(train_data, train_labels_one_hot, batch_size=batch_size)
val_datagen = datagen.flow(val_data, val_labels_one_hot, batch_size=batch_size)
history = model.fit(train_datagen, epochs=num_epochs, validation_data=val_datagen)
test_loss, test_acc = model.evaluate(test_data, test_labels_one_hot)
print(f'Test accuracy: {test_acc}')

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
Test accuracy: 0.351666659116745


### Compare Models and parameters

In [45]:
# SPLITTING 1
# Split the dataset into training and temporary sets (combined validation and test)
train_data, temp_data, train_labels, temp_labels = train_test_split(
    images, labels, test_size=0.3, random_state=42
)

# Split the temporary set into validation and test sets
val_data, test_data, val_labels, test_labels = train_test_split(
    temp_data, temp_labels, test_size=0.5, random_state=42
)
train_labels_one_hot = to_categorical(train_labels, num_classes=107)
val_labels_one_hot = to_categorical(val_labels, num_classes=107)
test_labels_one_hot = to_categorical(test_labels, num_classes=107)


'''
# SPLITTING 2
train_data, test_data, train_labels, test_labels = train_test_split(
    images, labels, test_size=0.2, random_state=42
)
train_labels_one_hot = to_categorical(train_labels, num_classes=107)
test_labels_one_hot = to_categorical(test_labels, num_classes=107)
'''

'\n# SPLITTING 2\ntrain_data, test_data, train_labels, test_labels = train_test_split(\n    images, labels, test_size=0.2, random_state=42\n)\ntrain_labels_one_hot = to_categorical(train_labels, num_classes=107)\ntest_labels_one_hot = to_categorical(test_labels, num_classes=107)\n'

In [46]:
def train_model(train_data, val_data, test_data, train_labels_one_hot, val_labels_one_hot, test_labels_one_hot, model, optimizer, batch_size, num_epochs, metrics=['accuracy']):

    # optimizer
    initial_learning_rate = 0.001
    lr_schedule = keras.optimizers.schedules.ExponentialDecay(
        initial_learning_rate, decay_steps=10000, decay_rate=0.9, staircase=True)
    if optimizer=='adam':
        opt = keras.optimizers.Adam(learning_rate=lr_schedule)
    elif optimizer=='rmsprop':
        opt = keras.optimizers.RMSprop(learning_rate=lr_schedule)
    elif optimizer=='SGD':
        opt = keras.optimizers.SGD(learning_rate=lr_schedule)

    # compile model
    model = compile_model(model, opt, metrics)

    # train model
    early_stopping = EarlyStopping(monitor='val_accuracy', mode='max', min_delta=1, verbose=1, patience = 4)

    train_datagen = datagen.flow(train_data, train_labels_one_hot, batch_size=batch_size)
    val_datagen = datagen.flow(val_data, val_labels_one_hot, batch_size=batch_size)
    history = cnn_model.fit(train_datagen, epochs=num_epochs, validation_data=val_datagen, callbacks=[early_stopping])

    # Evaluate the model on the test set
    test_loss, test_acc = model.evaluate(test_data, test_labels_one_hot)
    print(f'Test accuracy: {test_acc}')

    plt.plot(history.history['accuracy'],label='train')
    plt.plot(history.history['val_accuracy'], label='test')
    plt.show

In [19]:
initial_learning_rate = 0.001
lr_schedule = keras.optimizers.schedules.ExponentialDecay(
    initial_learning_rate, decay_steps=10000, decay_rate=0.9, staircase=True
)

param_grid = {
    'epochs' : [30,50,70],
    'batch_size': [8,16,32],
    'optimizer': ['Adam','SGD','RMSprop']
}
early_stopping = EarlyStopping(monitor='val_accuracy', mode='max', min_delta=1, verbose=1, patience = 4)

cnn_model = KerasClassifier(create_CNN)
grid_cnn = GridSearchCV(estimator=cnn_model, param_grid=param_grid, scoring='accuracy')
grid_result = grid_cnn.fit(train_data, train_labels_one_hot , validation_split=0.3, callbacks=early_stopping)
best_params = grid_cnn.best_params
print(f"Best Hyperparameters for CNN are: {best_params}")


'''
dn_model = KerasClassifier(create_DN)
grid_dn = GridSearchCV(estimator= create_DN(), param_grid=param_grid, scoring=['accuracy'])
grid_result = grid_dn.fit(train_datagen, validation_split=0.3, callback=[es])
best_params = grid_dn.best_params
print(f"Best Hyperparameters for DenseNet are: {best_params}")
'''

ValueError: 
All the 135 fits failed.
It is very likely that your model is misconfigured.
You can try to debug the error by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
135 fits failed with the following error:
Traceback (most recent call last):
  File "C:\Users\lucia\miniconda3\Lib\site-packages\sklearn\model_selection\_validation.py", line 729, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\lucia\miniconda3\Lib\site-packages\scikeras\wrappers.py", line 1491, in fit
    super().fit(X=X, y=y, sample_weight=sample_weight, **kwargs)
  File "C:\Users\lucia\miniconda3\Lib\site-packages\scikeras\wrappers.py", line 760, in fit
    self._fit(
  File "C:\Users\lucia\miniconda3\Lib\site-packages\scikeras\wrappers.py", line 926, in _fit
    self._check_model_compatibility(y)
  File "C:\Users\lucia\miniconda3\Lib\site-packages\scikeras\wrappers.py", line 549, in _check_model_compatibility
    if self.n_outputs_expected_ != len(self.model_.outputs):
                                   ^^^^^^^^^^^^^^^^^^^^^^^^
TypeError: object of type 'NoneType' has no len()


#### Other

In [None]:
def int_to_asana(n):
    lista=[]
    for el in n:
        l=[key for key, value in labels_dict.items() if value == el]
        lista.append(l[0])
    return lista


def compare(test data, test_labels_one_hot, model):
    lab_pred = model.predict(test_data)
    pred_labels = np.argmax(lab_pred, axis=1)

    test_labels = np.argmax(test_labels_one_hot, axis=1)

    #asana_pred = int_to_asana(labels_pred)
    #asana_test = int_to_asana(test_labels)
    #df = pd.DataFrame({'pred': asana_pred, 'true': asana_test}, index=range(len(asana_pred)))

#### Ways to improve the model:
- train set percentage
- data augmentation parameters
- num_layers and filters
- opt function and learning rate
- batch_size
- num_epochs + add early_stopping
- layer dense