In [None]:
import os
import cv2
import random
import numpy as np
import pandas as pd
import tensorflow as tf
import matplotlib.pyplot as plt
from collections import Counter
from sklearn.utils import shuffle
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.regularizers import l2
from sklearn.metrics import accuracy_score, f1_score
from tensorflow.keras.applications import DenseNet121
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from sklearn.model_selection import train_test_split, StratifiedKFold
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau
from tensorflow.keras.layers import Dense, Dropout, GlobalAveragePooling2D, BatchNormalization

In [None]:
print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))

gpus = tf.config.list_physical_devices('GPU')
if gpus:
    print("Using GPU:", gpus)
else:
    print("No GPU detected. Using CPU.")

Num GPUs Available:  0
No GPU detected. Using CPU.


In [7]:
datasets = {"dataset_1": ".\AutismDataset"}

# function to extract file paths and labels
def create_dataframe(dataset_path):
    data = {"file_path": [], "label": []}  # dictionary to hold file paths and labels

    # handle differently based on folder type
    for sub_dir in ["train", "test", "valid"]:
        folder_path = os.path.join(dataset_path, sub_dir)
        if not os.path.exists(folder_path):
            print("WARNING: Directory not found:", folder_path)
            continue

        if sub_dir == "valid":
            # valid has labeled subdirectories
            for label_dir, label in zip(["Autistic", "Non_Autistic"], [1, 0]):
                label_folder_path = os.path.join(folder_path, label_dir)
                if os.path.exists(label_folder_path):
                    for img_file in os.listdir(label_folder_path):
                        data["file_path"].append(os.path.join(label_folder_path, img_file))
                        data["label"].append(label)
        else:
            # `train` and `test` have images directly so infer labels from filenames
            for img_file in os.listdir(folder_path):
                if "Non_Autistic" in img_file:
                    label = 1
                elif "Autistic" in img_file:
                    label = 0
                else:
                    continue  # skip unknown files
                data["file_path"].append(os.path.join(folder_path, img_file))
                data["label"].append(label)

    return pd.DataFrame(data)

# create DataFrame for dataset
merged_df = create_dataframe(datasets["dataset_1"])

# save the dataset as a CSV file
merged_df.to_csv("merged_dataset.csv", index = False)
print("Dataset saved with", len(merged_df), "entries.")

# split the dataset using train_test_split
train_df, temp_df = train_test_split(merged_df, test_size = 0.3, stratify = merged_df["label"], random_state = 42)
valid_df, test_df = train_test_split(temp_df, test_size = 0.5, stratify = temp_df["label"], random_state = 42)

print("Train size:", len(train_df), "| Valid size:", len(valid_df), "| Test size:", len(test_df))

# load the merged dataset
merged_df = pd.read_csv("merged_dataset.csv")

# shuffle the data
train_df = train_df.sample(frac = 1, random_state = 42).reset_index(drop = True)
valid_df = valid_df.sample(frac = 1, random_state = 42).reset_index(drop = True)
test_df = test_df.sample(frac = 1, random_state = 42).reset_index(drop = True)
train_df['label'] = train_df['label'].astype(str)
valid_df['label'] = valid_df['label'].astype(str)
test_df['label'] = test_df['label'].astype(str)

print("Train Dataset:")
print(train_df['label'].value_counts())

print("\nValidation Dataset:")
print(valid_df['label'].value_counts())

print("\nTest Dataset:")
print(test_df['label'].value_counts())

# save the shuffled datasets
train_df.to_csv("train.csv", index = False)
valid_df.to_csv("valid.csv", index = False)
test_df.to_csv("test.csv", index = False)

print("Shuffled datasets saved successfully!")

Dataset saved with 2940 entries.
Train size: 2058 | Valid size: 441 | Test size: 441
Train Dataset:
label
0    1029
1    1029
Name: count, dtype: int64

Validation Dataset:
label
1    221
0    220
Name: count, dtype: int64

Test Dataset:
label
0    221
1    220
Name: count, dtype: int64
Shuffled datasets saved successfully!


In [8]:
# define ImageDataGenerators with improved augmentation
train_datagen = ImageDataGenerator(
    rescale = 1.0 / 255.0,  # normalize pixel values to [0, 1]
    rotation_range = 5,  # rotate images by up to 5 degrees
    width_shift_range = 0.1,  # shift images horizontally by up to 10% of the width
    height_shift_range = 0.1,  # shift images vertically by up to 10% of the height
    horizontal_flip = True,  # flip images horizontally
    fill_mode = 'nearest'  # fill any missing pixels after transformations
)

valid_test_datagen = ImageDataGenerator(rescale = 1.0 / 255.0)  # no augmentation for validation & test

train_generator = train_datagen.flow_from_dataframe(dataframe = train_df, x_col = "file_path", y_col = "label", target_size = (224, 224), batch_size = 32, class_mode = "binary")

valid_generator = valid_test_datagen.flow_from_dataframe(dataframe = valid_df, x_col = "file_path", y_col = "label", target_size = (224, 224), batch_size = 32, class_mode = "binary")

test_generator = valid_test_datagen.flow_from_dataframe(dataframe = test_df, x_col = "file_path", y_col = "label", target_size = (224, 224), batch_size = 32, class_mode = "binary", shuffle = False)

# Print label distribution for each generator
print("\nTrain Generator Label Distribution:")
print(Counter(train_generator.labels))

print("\nValidation Generator Label Distribution:")
print(Counter(valid_generator.labels))

print("\nTest Generator Label Distribution:")
print(Counter(test_generator.labels))

Found 2058 validated image filenames belonging to 2 classes.
Found 441 validated image filenames belonging to 2 classes.
Found 441 validated image filenames belonging to 2 classes.

Train Generator Label Distribution:
Counter({0: 1029, 1: 1029})

Validation Generator Label Distribution:
Counter({1: 221, 0: 220})

Test Generator Label Distribution:
Counter({0: 221, 1: 220})


In [15]:
strategy = tf.distribute.MirroredStrategy()
with strategy.scope():
    base_model = DenseNet121(weights = "imagenet", include_top = False, input_shape = (224, 224, 3))
    x = GlobalAveragePooling2D()(base_model.output)
    x = BatchNormalization()(x)  # helps with stability
    x = Dense(256, activation = "relu")(x)
    x = Dropout(0.6)(x)  # reduce overfitting
    output = Dense(1, activation = "sigmoid")(x)  # binary classification

    model = Model(inputs = base_model.input, outputs = output)

    model.compile(optimizer = Adam(learning_rate = 0.0001), loss = "binary_crossentropy", metrics = ["accuracy"])

checkpoint_path = 'model/densenet121_model.h5'
callbacks = [
    #EarlyStopping(monitor="val_loss", patience=10, restore_best_weights=True),
    ReduceLROnPlateau(monitor = "val_loss", factor = 0.5, patience = 5, min_lr = 0.0001),
    ModelCheckpoint(
    checkpoint_path,
    monitor = "val_loss",
    save_best_only = True,
    mode = "min",
    verbose = 1
)
]

history = model.fit(train_generator, validation_data = valid_generator, epochs = 30, callbacks = callbacks, verbose = 1)

model.load_weights(checkpoint_path)

INFO:tensorflow:Using MirroredStrategy with devices ('/job:localhost/replica:0/task:0/device:CPU:0',)
Epoch 1/30
Epoch 1: val_loss improved from inf to 0.58174, saving model to model\densenet121_model.h5


  saving_api.save_model(


Epoch 2/30
Epoch 2: val_loss improved from 0.58174 to 0.53742, saving model to model\densenet121_model.h5
Epoch 3/30
Epoch 3: val_loss improved from 0.53742 to 0.44736, saving model to model\densenet121_model.h5
Epoch 4/30
Epoch 4: val_loss improved from 0.44736 to 0.44003, saving model to model\densenet121_model.h5
Epoch 5/30
Epoch 5: val_loss did not improve from 0.44003
Epoch 6/30
Epoch 6: val_loss did not improve from 0.44003
Epoch 7/30
Epoch 7: val_loss did not improve from 0.44003
Epoch 8/30
Epoch 8: val_loss did not improve from 0.44003
Epoch 9/30
Epoch 9: val_loss did not improve from 0.44003
Epoch 10/30
Epoch 10: val_loss did not improve from 0.44003
Epoch 11/30
Epoch 11: val_loss did not improve from 0.44003
Epoch 12/30
Epoch 12: val_loss did not improve from 0.44003
Epoch 13/30
Epoch 13: val_loss did not improve from 0.44003
Epoch 14/30
Epoch 14: val_loss did not improve from 0.44003
Epoch 15/30
Epoch 15: val_loss did not improve from 0.44003
Epoch 16/30
Epoch 16: val_loss d

In [16]:
test_loss, test_acc = model.evaluate(test_generator)
print("Test Accuracy:", round(test_acc, 4))
print("Test Loss:", round(test_loss, 4))

Test Accuracy: 0.8027
Test Loss: 0.4806


In [None]:
def plot_training_history(history, save_path = 'model_training_plot.png'):
    plt.figure(figsize = (12, 5))

    plt.subplot(1, 2, 1)
    plt.plot(history.history['accuracy'], label = 'Train Accuracy')
    plt.plot(history.history['val_accuracy'], label = 'Validation Accuracy')
    plt.title('Model Accuracy per Epoch')
    plt.xlabel('Epochs')
    plt.ylabel('Accuracy')
    plt.legend()

    plt.subplot(1, 2, 2)
    plt.plot(history.history['loss'], label = 'Train Loss')
    plt.plot(history.history['val_loss'], label = 'Validation Loss')
    plt.title('Model Loss per Epoch')
    plt.xlabel('Epochs')
    plt.ylabel('Loss')
    plt.legend()

    plt.tight_layout()
    plt.savefig(save_path)
    plt.show()
    print("Plot saved successfully at:", save_path)

plot_training_history(history, save_path = 'model_training_plot.png')