<a href="https://www.kaggle.com/code/pes1ug22am164/image-part?scriptVersionId=171996997" target="_blank"><img align="left" alt="Kaggle" title="Open in Kaggle" src="https://kaggle.com/static/images/open-in-kaggle.svg"></a>

# uncomment on google colab, comment on kaggle.

In [1]:
!mkdir ~/.kaggle
from google.colab import files
files.upload()
!cp kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json
!kaggle datasets download -d shusrith/betterest-preproc

Saving kaggle.json to kaggle.json
Downloading betterest-preproc.zip to /content
 28% 10.0M/35.1M [00:00<00:00, 104MB/s]
100% 35.1M/35.1M [00:00<00:00, 202MB/s]


In [None]:
!unzip /content/betterest-preproc.zip

In [4]:
import os
import numpy as np
directory = '/content/train'

files = [file for file in os.listdir(directory) if file.endswith('.jpg')]
len(files)

10240

In [5]:
mildDem = [i for i in files if i.startswith("milDem")]
moderateDem = [i for i in files if "moderateDim" in i]
nonDem = [i for i in files if "nonDem" in i]
veryMild = [i for i in files if "veryMildDem" in i]
print(len(mildDem), len(moderateDem), len(nonDem), len(veryMild))

2560 2560 2560 2560


In [6]:
import pandas as pd
l = [[mildDem, "0"], [moderateDem, "1"], [nonDem, "2"], [veryMild, "3"]]
x = []
for i in l:
    for j in i[0]:
        path = os.path.join("/content/train", j)
        x.append([i[1], path])

df = pd.DataFrame(x, columns=["Class", "Path"])
x = df["Class"].value_counts()
x

Class
0    2560
1    2560
2    2560
3    2560
Name: count, dtype: int64

In [14]:
from sklearn.model_selection import train_test_split

train_images, test_images, train_labels, test_labels = train_test_split(df["Path"], df["Class"], test_size=0.2, random_state=42)

In [45]:
import pandas as pd
from keras.preprocessing.image import ImageDataGenerator

train_df = pd.DataFrame({
    'filename': train_images,
    'class': train_labels
})
val_df = pd.DataFrame({
    'filename': test_images,
    'class': test_labels
})

datagen = ImageDataGenerator(
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    horizontal_flip=True,
    rescale=1.0/255.0)

train_generator = datagen.flow_from_dataframe(
    dataframe=train_df,
    directory=None,
    x_col='filename',
    y_col='class',
    target_size=(130, 110),
    batch_size=32,
    class_mode='categorical'
)

validation_generator = datagen.flow_from_dataframe(
    dataframe=val_df,
    directory=None,
    x_col='filename',
    y_col='class',
    target_size=(130, 110),
    batch_size=32,
    class_mode='categorical'
)

Found 8192 validated image filenames belonging to 4 classes.
Found 2048 validated image filenames belonging to 4 classes.


In [None]:
# s = x / len(files)
# class_weights = {i:j for i, j in zip(s.index, s.values)}

# class_weights
# Optional normalization (weights sum to 1)
# total_weight = sum(class_weights.values())
# class_weights = {class_label: weight / total_weight for class_label, weight in class_weights.items()}


In [16]:
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, Input, BatchNormalization
from keras.callbacks import EarlyStopping, LearningRateScheduler
from keras.utils import to_categorical
from sklearn.utils.class_weight import compute_sample_weight
from sklearn.model_selection import train_test_split
import numpy as np

def conv_block(filters, act='relu'):
    """Defining a Convolutional NN block for a Sequential CNN model. """

    block = Sequential()
    block.add(Conv2D(filters, 3, activation=act, padding='same'))
    block.add(Conv2D(filters, 3, activation=act, padding='same'))
    block.add(BatchNormalization())
    block.add(MaxPooling2D((2, 2)))

    return block

In [44]:
def dense_block(units, dropout_rate, act='relu'):
    """Defining a Dense NN block for a Sequential CNN model. """

    block = Sequential()
    block.add(Dense(units, activation=act))
    block.add(BatchNormalization())
    block.add(Dropout(dropout_rate))

    return block

In [18]:
import tensorflow as tf

class F1Score(tf.keras.metrics.Metric):
    def __init__(self, num_classes, **kwargs):
        super(F1Score, self).__init__(**kwargs)
        self.num_classes = num_classes
        self.precision = tf.keras.metrics.Precision()
        self.recall = tf.keras.metrics.Recall()

    def update_state(self, y_true, y_pred, sample_weight=None):
        self.precision.update_state(y_true, y_pred, sample_weight)
        self.recall.update_state(y_true, y_pred, sample_weight)

    def result(self):
        precision = self.precision.result()
        recall = self.recall.result()
        return 2 * ((precision * recall) / (precision + recall + tf.keras.backend.epsilon()))

    def reset_state(self):
        self.precision.reset_states()
        self.recall.reset_states()

METRICS = [tf.keras.metrics.CategoricalAccuracy(name='acc'),
           tf.keras.metrics.AUC(name='auc'),
           F1Score(num_classes=4)]

In [46]:
def construct_model(act='relu'):
    """Constructing a Sequential CNN architecture for performing the classification task. """

    model = Sequential([
        Input(shape=(130, 110, 3)),
#         conv_block(16),
        conv_block(32),
        conv_block(64),
        conv_block(128),
        Dropout(0.2),
        conv_block(256),
        Dropout(0.2),
        Flatten(),
        dense_block(512, 0.7),
        dense_block(256, 0.5),
        dense_block(64, 0.3),
        Dense(4, activation='softmax')
    ], name = "cnn_model")

    return model

In [47]:
from focal_loss import SparseCategoricalFocalLoss

model = construct_model()
model.compile(optimizer='adam',
              loss=SparseCategoricalFocalLoss(gamma=2),
              metrics=METRICS)

model.summary()



Model: "cnn_model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 sequential_14 (Sequential)  (None, 65, 55, 32)        10272     
                                                                 
 sequential_15 (Sequential)  (None, 32, 27, 64)        55680     
                                                                 
 sequential_16 (Sequential)  (None, 16, 13, 128)       221952    
                                                                 
 dropout_10 (Dropout)        (None, 16, 13, 128)       0         
                                                                 
 sequential_17 (Sequential)  (None, 8, 6, 256)         886272    
                                                                 
 dropout_11 (Dropout)        (None, 8, 6, 256)         0         
                                                                 
 flatten_2 (Flatten)         (None, 12288)             0 

In [48]:
from keras.optimizers import Adam
def lr_schedule(epoch, learning_rate):
    if epoch < 20:
        return learning_rate
    else:
        if epoch % 2 == 0:
          return learning_rate * 0.95
        else:
          return learning_rate

lr_scheduler = LearningRateScheduler(lr_schedule, verbose=1)

initial_learning_rate = 0.001
optimizer = Adam(learning_rate=initial_learning_rate)

In [49]:
class MyCallback(tf.keras.callbacks.Callback):
    def on_epoch_end(self, epoch, logs={}):
        if logs.get('val_acc') > 0.995:
            print("\nReached accuracy threshold! Terminating training.")
            self.model.stop_training = True

my_callback = MyCallback()

CALLBACKS = [my_callback, lr_scheduler]

In [None]:
EPOCHS = 50

history = model.fit(train_generator, validation_data=(validation_generator), callbacks=CALLBACKS, epochs=EPOCHS)


Epoch 1: LearningRateScheduler setting learning rate to 0.00021463872690219432.
Epoch 1/50


  m.reset_state()



Epoch 2: LearningRateScheduler setting learning rate to 0.00021463872690219432.
Epoch 2/50

Epoch 3: LearningRateScheduler setting learning rate to 0.00021463872690219432.
Epoch 3/50

Epoch 4: LearningRateScheduler setting learning rate to 0.00021463872690219432.
Epoch 4/50

Epoch 5: LearningRateScheduler setting learning rate to 0.00021463872690219432.
Epoch 5/50

Epoch 6: LearningRateScheduler setting learning rate to 0.00021463872690219432.
Epoch 6/50

Epoch 7: LearningRateScheduler setting learning rate to 0.00021463872690219432.
Epoch 7/50

Epoch 8: LearningRateScheduler setting learning rate to 0.00021463872690219432.
Epoch 8/50

Epoch 9: LearningRateScheduler setting learning rate to 0.00021463872690219432.
Epoch 9/50

Epoch 10: LearningRateScheduler setting learning rate to 0.00021463872690219432.
Epoch 10/50

Epoch 11: LearningRateScheduler setting learning rate to 0.00021463872690219432.
Epoch 11/50

Epoch 12: LearningRateScheduler setting learning rate to 0.0002146387269021

In [28]:
import os
import numpy as np
directory = '/content/test'

files = [file for file in os.listdir(directory) if file.endswith('.jpg')]
len(files)

1279

In [29]:
mildDem = [i for i in files if i.startswith("MildDem")]
moderateDem = [i for i in files if "ModerateDem" in i]
nonDem = [i for i in files if "NonDem" in i]
veryMild = [i for i in files if "VeryMildDem" in i]
print(len(mildDem), len(moderateDem), len(nonDem), len(veryMild))

179 12 640 448


In [31]:
import pandas as pd
l = [[mildDem, 0], [moderateDem, 1], [nonDem, 2], [veryMild, 3]]
x = []
for i in l:
    for j in i[0]:
        path = os.path.join("/content/test", j)
        x.append([i[1], path])

df = pd.DataFrame(x, columns=["Class", "Path"])
df["Class"].value_counts()


Class
2    640
3    448
0    179
1     12
Name: count, dtype: int64

In [39]:
from keras.preprocessing.image import load_img, img_to_array
img = [load_img(i, target_size=(130, 110)) for i in df["Path"]]
img = [img_to_array(im) for im in img]
img = [im / 255.0 for im in img]
img = np.stack(img, axis=0)


In [35]:
labels = df["Class"]
labels = to_categorical(labels, num_classes=4)
print(labels.shape)

(1279, 4)


In [36]:
l = []
for i in labels:
  for j in range(4):
    if i[j] == 1:
      l.append(j)


In [54]:
y_prob = model.predict(img)
y_prob = np.argmax(y_prob, axis=-1)



In [55]:
from sklearn.metrics import classification_report

report = classification_report(l, y_prob, output_dict=True)
for i in report:
  print(i, report[i])

0 {'precision': 0.5919540229885057, 'recall': 0.5754189944134078, 'f1-score': 0.58356940509915, 'support': 179}
1 {'precision': 1.0, 'recall': 0.25, 'f1-score': 0.4, 'support': 12}
2 {'precision': 0.8558951965065502, 'recall': 0.6125, 'f1-score': 0.7140255009107468, 'support': 640}
3 {'precision': 0.562111801242236, 'recall': 0.8080357142857143, 'f1-score': 0.663003663003663, 'support': 448}
accuracy 0.6724003127443315
macro avg {'precision': 0.7524902551843229, 'recall': 0.5614886771747806, 'f1-score': 0.59014964225339, 'support': 1279}
weighted avg {'precision': 0.7174032703953529, 'recall': 0.6724003127443315, 'f1-score': 0.674949871087777, 'support': 1279}


In [42]:
model.evaluate(img, labels)

 5/40 [==>...........................] - ETA: 1s - loss: 1.5557 - acc: 0.6687 - auc: 0.8514 - f1_score: 0.6708

  m.reset_state()




[1.349985957145691, 0.691946804523468, 0.8760294914245605, 0.6920062303543091]