In [None]:
##Importing the necessary libraries 
import os
import warnings
warnings.filterwarnings("ignore")
import shutil
import numpy as np
from PIL import Image
import pandas as pd 
import tensorflow as tf
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Model
from tensorflow.keras.applications import VGG16, VGG19
from tensorflow.keras.models import Sequential
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, GlobalAveragePooling2D
from sklearn.metrics import accuracy_score, precision_recall_fscore_support, roc_auc_score, roc_curve
import matplotlib.pyplot as plt
import seaborn as sns
import cv2
from skimage.feature import local_binary_pattern
from sklearn.base import BaseEstimator, ClassifierMixin
from sklearn.model_selection import RandomizedSearchCV

In [None]:
# Random non-crack image selection non-crack wall images:
source_dir = '/Users/User 1/OneDrive/Desktop/Walls/Non-cracked'
destination_dir = '/Users/User 1/OneDrive/Desktop/Non-cracked images'
num_images = 4500

def copy_random_images(source_dir, destination_dir, num_images):
    all_files = os.listdir(source_dir)
    image_files = [file for file in all_files 
                   if file.lower().endswith(('.png', '.jpg', '.jpeg', '.gif', '.bmp'))]
    selected_images = random.sample(image_files, num_images)
    for image in selected_images:
        source_path = os.path.join(source_dir, image)
        destination_path = os.path.join(destination_dir, image)
        shutil.copy(source_path, destination_path)
copy_random_images(source_dir, destination_dir, num_images)

# Random non-crack image selection non-crack deck images:
source_dir = '/Users/User 1/OneDrive/Desktop/Decks/Non-cracked'
destination_dir = '/Users/User 1/OneDrive/Desktop/Non-cracked images'
num_images = 4500

def copy_random_images(source_dir, destination_dir, num_images):
    all_files = os.listdir(source_dir)
    image_files = [file for file in all_files 
                   if file.lower().endswith(('.png', '.jpg', '.jpeg', '.gif', '.bmp'))]
    selected_images = random.sample(image_files, num_images)
    for image in selected_images:
        source_path = os.path.join(source_dir, image)
        destination_path = os.path.join(destination_dir, image)
        shutil.copy(source_path, destination_path)
copy_random_images(source_dir, destination_dir, num_images)

In [None]:
## Defining a funtion to split the images into Train, Validation and Test sets

def split_data(source, train_dest, val_dest, test_dest):
    os.makedirs(train_dest, exist_ok=True)
    os.makedirs(val_dest, exist_ok=True)
    os.makedirs(test_dest, exist_ok=True)
    files = [file for file in os.listdir(source) if os.path.isfile(os.path.join(source, file))]
    train_files, test_files = train_test_split(files, train_size=0.9, random_state=42)
    train_files, val_files = train_test_split(train_files, train_size=0.9, random_state=42)
     
    def copy_files(files, source, destination):
        for file in files:
            shutil.copy(os.path.join(source, file), os.path.join(destination, file))
    
    copy_files(train_files, source, train_dest)
    copy_files(val_files, source, val_dest)
    copy_files(test_files, source, test_dest)

# Specify the source directories for cracked and non-cracked images
cracked_source = '/Users/User 1/OneDrive/Desktop/Cracked images'
non_cracked_source = '/Users/User 1/OneDrive/Desktop/Non-cracked images'

# Base directory to collect the image data for Train, Validation, and Test datasets
base_dir = '/Users/User 1/OneDrive/Desktop/Base'

def count_images(directory):
    """Counts the number of files in the directory."""
    return len([name for name in os.listdir(directory) if os.path.isfile(os.path.join(directory, name))])

# Count the images in each directory
num_cracked_images = count_images(cracked_source)
num_non_cracked_images = count_images(non_cracked_source)

print(f"Number of images in cracked source: {num_cracked_images}")
print(f"Number of images in non-cracked source: {num_non_cracked_images}")

In [None]:
# Definition of the destination paths and splitting operation:

train_cracked = os.path.join(base_dir, 'train/cracked')
val_cracked = os.path.join(base_dir, 'validation/cracked')
test_cracked = os.path.join(base_dir, 'test/cracked')
train_non_cracked = os.path.join(base_dir, 'train/non cracked')
val_non_cracked = os.path.join(base_dir, 'validation/non cracked')
test_non_cracked = os.path.join(base_dir, 'test/non cracked')

# Then execute the split for both decks and walls, such that the image is loaded into the Base directory 
split_data(cracked_source, train_cracked, val_cracked, test_cracked)
split_data(non_cracked_source, train_non_cracked, val_non_cracked, test_non_cracked)

**VGG16 NOTEBOOK**

In [None]:
train_dir = '/Users/User 1/OneDrive/Desktop/Base/train'
validation_dir = '/Users/User 1/OneDrive/Desktop/Base/validation'
test_dir = '/Users/User 1/OneDrive/Desktop/Base/test'

train_datagen = ImageDataGenerator(rescale=1./255,)
validation_datagen = ImageDataGenerator(rescale=1./255)
test_datagen = ImageDataGenerator(rescale=1./255)

train_generator = train_datagen.flow_from_directory(
    train_dir,
    target_size=(224, 224),
    batch_size=32,
    class_mode='binary'
)

validation_generator = validation_datagen.flow_from_directory(
    validation_dir,
    target_size=(224, 224),
    batch_size=32,
    class_mode='binary'
)

test_generator = test_datagen.flow_from_directory(
    test_dir,
    target_size=(224, 224),
    batch_size=32,
    class_mode='binary',
    shuffle=False
)

def generator_to_dataset(generator):
    return tf.data.Dataset.from_generator(
        lambda: generator,
        output_signature=(
            tf.TensorSpec(shape=(None, 224, 224, 3), dtype=tf.float32),
            tf.TensorSpec(shape=(None,), dtype=tf.float32))
    ).repeat()

train_dataset = generator_to_dataset(train_generator)
validation_dataset = generator_to_dataset(validation_generator)
test_dataset = generator_to_dataset(test_generator)

In [None]:
# Load VGG16 model pre-trained on ImageNet, without top layers

base_model = VGG16(weights='imagenet', 
                   include_top=False, 
                   input_shape=(224, 224, 3))

for layer in base_model.layers:
    layer.trainable = False

x = base_model.output
x = Flatten()(x)
x = Dense(256, activation='relu')(x)
x = Dropout(0.5)(x)
predictions = Dense(1, activation='sigmoid')(x)

VGG16_model = Model(inputs=base_model.input, outputs=predictions)

VGG16_model.compile(optimizer=SGD(learning_rate=0.0001, momentum=0.9),
                    loss='binary_crossentropy', 
                    metrics=['accuracy'])

# Set up early stopping
early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

# Train the model
history = VGG16_model.fit(
    train_dataset,
    steps_per_epoch=train_generator.samples // train_generator.batch_size,
    validation_data=validation_dataset,
    validation_steps=validation_generator.samples // validation_generator.batch_size,
    epochs=20,
    callbacks=[early_stopping]
)

In [None]:
# Evaluation of the trained VGG16 model on the test set:
test_loss, test_accuracy = VGG16_model.evaluate(test_generator)

test_generator.reset()
predictions = VGG16_model.predict(test_generator)
y_true = test_generator.classes
y_pred = np.round(predictions).astype(int).reshape(-1)

accuracy = accuracy_score(y_true, y_pred)
precision = precision_score(y_true, y_pred)
recall = recall_score(y_true, y_pred)
f1 = f1_score(y_true, y_pred)

print(f'Accuracy: {accuracy:.4f}')
print(f'Precision: {precision:.4f}')
print(f'Recall: {recall:.4f}')
print(f'F1 Score: {f1:.4f}')

print(classification_report(y_true, y_pred, target_names=['cracked', 'non-cracked']))

# Confusion matrix
cm = confusion_matrix(y_true, y_pred)
disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=['cracked', 'non-cracked'])
disp.plot(cmap=plt.cm.Blues)
plt.show()

**OPTIMIZATION VGG16 (Optimizer=Adam)**

In [None]:
# Load VGG16 model pre-trained on ImageNet, without top layers

base_model = VGG16(weights='imagenet', 
                   include_top=False, 
                   input_shape=(224, 224, 3))

for layer in base_model.layers:
    layer.trainable = False

x = base_model.output
x = Flatten()(x)
x = Dense(256, activation='relu')(x)
x = Dropout(0.5)(x)
predictions = Dense(1, activation='sigmoid')(x)

VGG16_model = Model(inputs=base_model.input, outputs=predictions)

VGG16_model.compile(optimizer=Adam(learning_rate=0.0001),
                    loss='binary_crossentropy', 
                    metrics=['accuracy'])

# Set up early stopping
early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

# Train the model
history = VGG16_model.fit(
    train_dataset,
    steps_per_epoch=train_generator.samples // train_generator.batch_size,
    validation_data=validation_dataset,
    validation_steps=validation_generator.samples // validation_generator.batch_size,
    epochs=20,
    callbacks=[early_stopping]
)

In [None]:
# Evaluation of the trained VGG16 model on the test set:
test_loss, test_accuracy = VGG16_model.evaluate(test_generator)

test_generator.reset()
predictions = VGG16_model.predict(test_generator)
y_true = test_generator.classes
y_pred = np.round(predictions).astype(int).reshape(-1)

accuracy = accuracy_score(y_true, y_pred)
precision = precision_score(y_true, y_pred)
recall = recall_score(y_true, y_pred)
f1 = f1_score(y_true, y_pred)

print(f'Accuracy: {accuracy:.4f}')
print(f'Precision: {precision:.4f}')
print(f'Recall: {recall:.4f}')
print(f'F1 Score: {f1:.4f}')

print(classification_report(y_true, y_pred, target_names=['cracked', 'non-cracked']))

# Confusion matrix
cm = confusion_matrix(y_true, y_pred)
disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=['cracked', 'non-cracked'])
disp.plot(cmap=plt.cm.Blues)
plt.show()

**VGG19 NOTEBOOK**

In [None]:
# Loading VGG19 model pre-trained on ImageNet, without top layers

base_model = VGG19(weights='imagenet', 
                   include_top=False, 
                   input_shape=(224, 224, 3))
for layer in base_model.layers:
    layer.trainable = False

x = base_model.output
x = Flatten()(x)
x = Dense(256, activation='relu')(x)
x = Dropout(0.5)(x)
predictions = Dense(1, activation='sigmoid')(x)

VGG19_model = Model(inputs=base_model.input, outputs=predictions)

VGG19_model.compile(optimizer=SGD(learning_rate=0.0001, momentum=0.9), 
              loss='binary_crossentropy', 
              metrics=['accuracy'])

# Train the model
history = VGG19_model.fit(
    train_dataset,
    steps_per_epoch=100,
    validation_data=validation_dataset,
    validation_steps=10,
    epochs=20,
    callbacks=[early_stopping]
)

In [None]:
# Evaluation of the trained VGG19 model on the test set:

test_loss, test_accuracy = VGG19_model.evaluate(test_generator)

test_generator.reset()
predictions = VGG19_model.predict(test_generator)
y_true = test_generator.classes
y_pred = np.round(predictions).astype(int).reshape(-1)

accuracy = accuracy_score(y_true, y_pred)
precision = precision_score(y_true, y_pred)
recall = recall_score(y_true, y_pred)
f1 = f1_score(y_true, y_pred)

print(f'Accuracy: {accuracy:.4f}')
print(f'Precision: {precision:.4f}')
print(f'Recall: {recall:.4f}')
print(f'F1 Score: {f1:.4f}')

print(classification_report(y_true, y_pred, target_names=['cracked', 'non-cracked']))

# Confusion matrix
cm = confusion_matrix(y_true, y_pred)
disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=['cracked', 'non-cracked'])
disp.plot(cmap=plt.cm.Blues)
plt.show()

**OPTIMIZATION VGG19 (Optimizer=Adam)**

In [None]:
# Load VGG19 model pre-trained on ImageNet, without top layers

base_model = VGG19(weights='imagenet', 
                   include_top=False, 
                   input_shape=(224, 224, 3))

for layer in base_model.layers:
    layer.trainable = False

x = base_model.output
x = Flatten()(x)
x = Dense(256, activation='relu')(x)
x = Dropout(0.5)(x)
predictions = Dense(1, activation='sigmoid')(x)

VGG19_model = Model(inputs=base_model.input, outputs=predictions)

VGG19_model.compile(optimizer=Adam(learning_rate=0.0001),
                    loss='binary_crossentropy',
                    metrics=['accuracy'])

# Train the model
history = VGG19_model.fit(
    train_generator,
    steps_per_epoch=100,
    validation_data=validation_generator,
    validation_steps=10,
    epochs=20,
    callbacks=[early_stopping]
)

In [None]:
# Evaluation of the trained VGG19 model on the test set:

test_loss, test_accuracy = VGG19_model.evaluate(test_generator)

test_generator.reset()
predictions = VGG19_model.predict(test_generator)
y_true = test_generator.classes
y_pred = np.round(predictions).astype(int).reshape(-1)

accuracy = accuracy_score(y_true, y_pred)
precision = precision_score(y_true, y_pred)
recall = recall_score(y_true, y_pred)
f1 = f1_score(y_true, y_pred)

print(f'Accuracy: {accuracy:.4f}')
print(f'Precision: {precision:.4f}')
print(f'Recall: {recall:.4f}')
print(f'F1 Score: {f1:.4f}')

print(classification_report(y_true, y_pred, target_names=['cracked', 'non-cracked']))

# Confusion matrix
cm = confusion_matrix(y_true, y_pred)
disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=['cracked', 'non-cracked'])
disp.plot(cmap=plt.cm.Blues)
plt.show()

**RESNET18 NOTEBOOK**

In [None]:
# Setting up image loaders for the ResNet models and Image Data Transformations operation:

base_dir = '/Users/s4210323/OneDrive - University of Gloucestershire/Base'

data_tf = {
    'train': transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.RandomHorizontalFlip(),
        transforms.RandomRotation(10),
        transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.2),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    'validation': transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    'test': transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
}

img_datasets = {x: datasets.ImageFolder(os.path.join(base_dir, x), data_tf[x]) for x in ['train', 'validation', 'test']}
img_loaders = {
    'train': DataLoader(img_datasets['train'], batch_size=32, shuffle=True, num_workers=4),
    'validation': DataLoader(img_datasets['validation'], batch_size=32, shuffle=True, num_workers=4),
    'test': DataLoader(img_datasets['test'], batch_size=32, shuffle=False, num_workers=4)
}
ds_sizes = {x: len(img_datasets[x]) for x in ['train', 'validation', 'test']}
class_names = img_datasets['train'].classes

In [None]:
# Initializing the Pretained ResNet18 model:

RN18_model = models.resnet18(weights=models.ResNet18_Weights.IMAGENET1K_V1)
num_ftrs = RN18_model.fc.in_features
RN18_model.fc = nn.Sequential(
    nn.Dropout(p=0.5),
    nn.Linear(num_ftrs, 2)
)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(RN18_model.parameters(), lr=0.001, weight_decay=1e-4)

device = torch.device("cpu")
RN18_model = RN18_model.to(device)

num_epochs = 20

def train_model(model, criterion, optimizer, img_loaders, ds_sizes, num_epochs):
    best_model_wts = copy.deepcopy(model.state_dict())
    best_loss = float('inf')
    patience = 5
    trigger_times = 0
    for epoch in range(num_epochs):
        print(f'Epoch {epoch}/{num_epochs - 1}')
        print('-' * 10)
        for phase in ['train', 'validation']:
            if phase == 'train':
                model.train()  
            else:
                model.eval()   
            running_loss = 0.0
            running_corrects = 0
            for inputs, labels in img_loaders[phase]:
                inputs = inputs.to(device)
                labels = labels.to(device)
                optimizer.zero_grad()
                with torch.set_grad_enabled(phase == 'train'):
                    outputs = model(inputs)
                    _, preds = torch.max(outputs, 1)
                    loss = criterion(outputs, labels)
                    if phase == 'train':
                        loss.backward()
                        optimizer.step()
                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)
            epoch_loss = running_loss / ds_sizes[phase]
            epoch_acc = running_corrects.double() / ds_sizes[phase]
            print(f'{phase} Loss: {epoch_loss:.4f} Acc: {epoch_acc:.4f}')

            # Early stopping:
            if phase == 'validation':
                if epoch_loss < best_loss:
                    best_loss = epoch_loss
                    best_model_wts = copy.deepcopy(model.state_dict())
                    trigger_times = 0
                else:
                    trigger_times += 1
                    print(f'Early stopping trigger times: {trigger_times}/{patience}')

                if trigger_times >= patience:
                    print('Early stopping!')
                    model.load_state_dict(best_model_wts)
                    return model
    model.load_state_dict(best_model_wts)
    return model

RN18_model = train_model(RN18_model, criterion, optimizer, img_loaders, ds_sizes, num_epochs)

In [None]:
def evaluate_model(model, dataloader):
    model.eval()
    y_true = []
    y_pred = []

    with torch.no_grad():
        for inputs, labels in dataloader:
            inputs = inputs.to(device)
            labels = labels.to(device)

            outputs = model(inputs)
            _, preds = torch.max(outputs, 1)

            y_true.extend(labels.cpu().numpy())
            y_pred.extend(preds.cpu().numpy())

    return y_true, y_pred
y_true_test, y_pred_test = evaluate_model(RN18_model, dataloaders['test'])

# Calculate performance metrics
accuracy_test = accuracy_score(y_true_test, y_pred_test)
precision_test = precision_score(y_true_test, y_pred_test)
recall_test = recall_score(y_true_test, y_pred_test)
f1_test = f1_score(y_true_test, y_pred_test)

print(f'Test Accuracy: {accuracy_test:.4f}')
print(f'Test Precision: {precision_test:.4f}')
print(f'Test Recall: {recall_test:.4f}')
print(f'Test F1 Score: {f1_test:.4f}')

# Classification report
print(classification_report(y_true_test, y_pred_test, target_names=['cracked', 'non-cracked']))

# Confusion matrix for test set
cm = confusion_matrix(y_true_test, y_pred_test)
disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=['cracked', 'non-cracked'])
disp.plot(cmap=plt.cm.Blues)
plt.title('Confusion Matrix - Test Set')
plt.show()

**OPTIMIZATION RESNET18 (Optimizer=SGD)**

In [None]:
# Initializing the Pretained ResNet18 model:

RN18_model = models.resnet18(weights=models.ResNet18_Weights.IMAGENET1K_V1)
num_ftrs = RN18_model.fc.in_features
RN18_model.fc = nn.Sequential(
    nn.Dropout(p=0.5),
    nn.Linear(num_ftrs, 2)
)

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(RN18_model.parameters(), lr=0.001, momentum=0.9, weight_decay=1e-4)

device = torch.device("cpu")
RN18_model = RN18_model.to(device)

num_epochs = 20

def train_model(model, criterion, optimizer, img_loaders, ds_sizes, num_epochs):
    best_model_wts = copy.deepcopy(model.state_dict())
    best_loss = float('inf')
    patience = 5
    trigger_times = 0
    for epoch in range(num_epochs):
        print(f'Epoch {epoch}/{num_epochs - 1}')
        print('-' * 10)
        for phase in ['train', 'validation']:
            if phase == 'train':
                model.train()  
            else:
                model.eval()   
            running_loss = 0.0
            running_corrects = 0
            for inputs, labels in img_loaders[phase]:
                inputs = inputs.to(device)
                labels = labels.to(device)
                optimizer.zero_grad()
                with torch.set_grad_enabled(phase == 'train'):
                    outputs = model(inputs)
                    _, preds = torch.max(outputs, 1)
                    loss = criterion(outputs, labels)
                    if phase == 'train':
                        loss.backward()
                        optimizer.step()
                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)
            epoch_loss = running_loss / ds_sizes[phase]
            epoch_acc = running_corrects.double() / ds_sizes[phase]
            print(f'{phase} Loss: {epoch_loss:.4f} Acc: {epoch_acc:.4f}')

            # Early stopping:
            if phase == 'validation':
                if epoch_loss < best_loss:
                    best_loss = epoch_loss
                    best_model_wts = copy.deepcopy(model.state_dict())
                    trigger_times = 0
                else:
                    trigger_times += 1
                    print(f'Early stopping trigger times: {trigger_times}/{patience}')

                if trigger_times >= patience:
                    print('Early stopping!')
                    model.load_state_dict(best_model_wts)
                    return model
    model.load_state_dict(best_model_wts)
    return model

RN18_model = train_model(RN18_model, criterion, optimizer, img_loaders, ds_sizes, num_epochs)

In [None]:
def evaluate_model(model, dataloader):
    model.eval()
    y_true = []
    y_pred = []

    with torch.no_grad():
        for inputs, labels in dataloader:
            inputs = inputs.to(device)
            labels = labels.to(device)

            outputs = model(inputs)
            _, preds = torch.max(outputs, 1)

            y_true.extend(labels.cpu().numpy())
            y_pred.extend(preds.cpu().numpy())

    return y_true, y_pred
y_true_test, y_pred_test = evaluate_model(RN18_model, dataloaders['test'])

# Calculate performance metrics
accuracy_test = accuracy_score(y_true_test, y_pred_test)
precision_test = precision_score(y_true_test, y_pred_test)
recall_test = recall_score(y_true_test, y_pred_test)
f1_test = f1_score(y_true_test, y_pred_test)

print(f'Test Accuracy: {accuracy_test:.4f}')
print(f'Test Precision: {precision_test:.4f}')
print(f'Test Recall: {recall_test:.4f}')
print(f'Test F1 Score: {f1_test:.4f}')

# Classification report
print(classification_report(y_true_test, y_pred_test, target_names=['cracked', 'non-cracked']))

# Confusion matrix for test set
cm = confusion_matrix(y_true_test, y_pred_test)
disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=['cracked', 'non-cracked'])
disp.plot(cmap=plt.cm.Blues)
plt.title('Confusion Matrix - Test Set')
plt.show()

**RESNET34 NOTEBOOK**

In [None]:
# Initializing the Pretained ResNet18 model:

RN34_model = models.resnet34(weights=models.ResNet34_Weights.IMAGENET1K_V1)
num_ftrs = RN34_model.fc.in_features
RN34_model.fc = nn.Sequential(
    nn.Dropout(p=0.5),
    nn.Linear(num_ftrs, 2)
)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(RN34_model.parameters(), lr=0.001, weight_decay=1e-4)

device = torch.device("cpu")
RN34_model = RN34_model.to(device)

num_epochs = 20

def train_model(model, criterion, optimizer, img_loaders, ds_sizes, num_epochs):
    best_model_wts = copy.deepcopy(model.state_dict())
    best_loss = float('inf')
    patience = 5
    trigger_times = 0
    for epoch in range(num_epochs):
        print(f'Epoch {epoch}/{num_epochs - 1}')
        print('-' * 10)
        for phase in ['train', 'validation']:
            if phase == 'train':
                model.train()  
            else:
                model.eval()   
            running_loss = 0.0
            running_corrects = 0
            for inputs, labels in img_loaders[phase]:
                inputs = inputs.to(device)
                labels = labels.to(device)
                optimizer.zero_grad()
                with torch.set_grad_enabled(phase == 'train'):
                    outputs = model(inputs)
                    _, preds = torch.max(outputs, 1)
                    loss = criterion(outputs, labels)
                    if phase == 'train':
                        loss.backward()
                        optimizer.step()
                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)
            epoch_loss = running_loss / ds_sizes[phase]
            epoch_acc = running_corrects.double() / ds_sizes[phase]
            print(f'{phase} Loss: {epoch_loss:.4f} Acc: {epoch_acc:.4f}')

            # Early stopping:
            if phase == 'validation':
                if epoch_loss < best_loss:
                    best_loss = epoch_loss
                    best_model_wts = copy.deepcopy(model.state_dict())
                    trigger_times = 0
                else:
                    trigger_times += 1
                    print(f'Early stopping trigger times: {trigger_times}/{patience}')

                if trigger_times >= patience:
                    print('Early stopping!')
                    model.load_state_dict(best_model_wts)
                    return model
    model.load_state_dict(best_model_wts)
    return model

RN34_model = train_model(RN34_model, criterion, optimizer, img_loaders, ds_sizes, num_epochs)

In [None]:
def evaluate_model(model, dataloader):
    model.eval()
    y_true = []
    y_pred = []

    with torch.no_grad():
        for inputs, labels in dataloader:
            inputs = inputs.to(device)
            labels = labels.to(device)

            outputs = model(inputs)
            _, preds = torch.max(outputs, 1)

            y_true.extend(labels.cpu().numpy())
            y_pred.extend(preds.cpu().numpy())

    return y_true, y_pred
y_true_test, y_pred_test = evaluate_model(RN34_model, dataloaders['test'])

# Calculate performance metrics
accuracy_test = accuracy_score(y_true_test, y_pred_test)
precision_test = precision_score(y_true_test, y_pred_test)
recall_test = recall_score(y_true_test, y_pred_test)
f1_test = f1_score(y_true_test, y_pred_test)

print(f'Test Accuracy: {accuracy_test:.4f}')
print(f'Test Precision: {precision_test:.4f}')
print(f'Test Recall: {recall_test:.4f}')
print(f'Test F1 Score: {f1_test:.4f}')

# Classification report
print(classification_report(y_true_test, y_pred_test, target_names=['cracked', 'non-cracked']))

# Confusion matrix for test set
cm = confusion_matrix(y_true_test, y_pred_test)
disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=['cracked', 'non-cracked'])
disp.plot(cmap=plt.cm.Blues)
plt.title('Confusion Matrix - Test Set')
plt.show()

**OPTIMIZATION RESNET34 (Optimizer=SGD)**

In [None]:
# Initializing the Pretained ResNet18 model:

RN34_model = models.resnet34(weights=models.ResNet34_Weights.IMAGENET1K_V1)
num_ftrs = RN34_model.fc.in_features
RN34_model.fc = nn.Sequential(
    nn.Dropout(p=0.5),
    nn.Linear(num_ftrs, 2)
)

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(RN34_model.parameters(), lr=0.001, momentum=0.9, weight_decay=1e-4)

device = torch.device("cpu")
RN34_model = RN34_model.to(device)

num_epochs = 20

def train_model(model, criterion, optimizer, img_loaders, ds_sizes, num_epochs):
    best_model_wts = copy.deepcopy(model.state_dict())
    best_loss = float('inf')
    patience = 5
    trigger_times = 0
    for epoch in range(num_epochs):
        print(f'Epoch {epoch}/{num_epochs - 1}')
        print('-' * 10)
        for phase in ['train', 'validation']:
            if phase == 'train':
                model.train()  
            else:
                model.eval()   
            running_loss = 0.0
            running_corrects = 0
            for inputs, labels in img_loaders[phase]:
                inputs = inputs.to(device)
                labels = labels.to(device)
                optimizer.zero_grad()
                with torch.set_grad_enabled(phase == 'train'):
                    outputs = model(inputs)
                    _, preds = torch.max(outputs, 1)
                    loss = criterion(outputs, labels)
                    if phase == 'train':
                        loss.backward()
                        optimizer.step()
                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)
            epoch_loss = running_loss / ds_sizes[phase]
            epoch_acc = running_corrects.double() / ds_sizes[phase]
            print(f'{phase} Loss: {epoch_loss:.4f} Acc: {epoch_acc:.4f}')

            # Early stopping:
            if phase == 'validation':
                if epoch_loss < best_loss:
                    best_loss = epoch_loss
                    best_model_wts = copy.deepcopy(model.state_dict())
                    trigger_times = 0
                else:
                    trigger_times += 1
                    print(f'Early stopping trigger times: {trigger_times}/{patience}')

                if trigger_times >= patience:
                    print('Early stopping!')
                    model.load_state_dict(best_model_wts)
                    return model
    model.load_state_dict(best_model_wts)
    return model

RN34_model = train_model(RN34_model, criterion, optimizer, img_loaders, ds_sizes, num_epochs)

In [None]:
def evaluate_model(model, dataloader):
    model.eval()
    y_true = []
    y_pred = []

    with torch.no_grad():
        for inputs, labels in dataloader:
            inputs = inputs.to(device)
            labels = labels.to(device)

            outputs = model(inputs)
            _, preds = torch.max(outputs, 1)

            y_true.extend(labels.cpu().numpy())
            y_pred.extend(preds.cpu().numpy())

    return y_true, y_pred
y_true_test, y_pred_test = evaluate_model(RN34_model, dataloaders['test'])

# Calculate performance metrics
accuracy_test = accuracy_score(y_true_test, y_pred_test)
precision_test = precision_score(y_true_test, y_pred_test)
recall_test = recall_score(y_true_test, y_pred_test)
f1_test = f1_score(y_true_test, y_pred_test)

print(f'Test Accuracy: {accuracy_test:.4f}')
print(f'Test Precision: {precision_test:.4f}')
print(f'Test Recall: {recall_test:.4f}')
print(f'Test F1 Score: {f1_test:.4f}')

# Classification report
print(classification_report(y_true_test, y_pred_test, target_names=['cracked', 'non-cracked']))

# Confusion matrix for test set
cm = confusion_matrix(y_true_test, y_pred_test)
disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=['cracked', 'non-cracked'])
disp.plot(cmap=plt.cm.Blues)
plt.title('Confusion Matrix - Test Set')
plt.show()

**RESNET50 NOTEBOOK**

In [None]:
# Initializing the Pretained ResNet18 model:

RN50_model = models.resnet50(weights=models.ResNet50_Weights.IMAGENET1K_V1)
num_ftrs = RN50_model.fc.in_features
RN50_model.fc = nn.Sequential(
    nn.Dropout(p=0.5),
    nn.Linear(num_ftrs, 2)
)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(RN50_model.parameters(), lr=0.001, weight_decay=1e-4)

device = torch.device("cpu")
RN50_model = RN50_model.to(device)

num_epochs = 20

def train_model(model, criterion, optimizer, img_loaders, ds_sizes, num_epochs):
    best_model_wts = copy.deepcopy(model.state_dict())
    best_loss = float('inf')
    patience = 5
    trigger_times = 0
    for epoch in range(num_epochs):
        print(f'Epoch {epoch}/{num_epochs - 1}')
        print('-' * 10)
        for phase in ['train', 'validation']:
            if phase == 'train':
                model.train()  
            else:
                model.eval()   
            running_loss = 0.0
            running_corrects = 0
            for inputs, labels in img_loaders[phase]:
                inputs = inputs.to(device)
                labels = labels.to(device)
                optimizer.zero_grad()
                with torch.set_grad_enabled(phase == 'train'):
                    outputs = model(inputs)
                    _, preds = torch.max(outputs, 1)
                    loss = criterion(outputs, labels)
                    if phase == 'train':
                        loss.backward()
                        optimizer.step()
                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)
            epoch_loss = running_loss / ds_sizes[phase]
            epoch_acc = running_corrects.double() / ds_sizes[phase]
            print(f'{phase} Loss: {epoch_loss:.4f} Acc: {epoch_acc:.4f}')

            # Early stopping:
            if phase == 'validation':
                if epoch_loss < best_loss:
                    best_loss = epoch_loss
                    best_model_wts = copy.deepcopy(model.state_dict())
                    trigger_times = 0
                else:
                    trigger_times += 1
                    print(f'Early stopping trigger times: {trigger_times}/{patience}')

                if trigger_times >= patience:
                    print('Early stopping!')
                    model.load_state_dict(best_model_wts)
                    return model
    model.load_state_dict(best_model_wts)
    return model

RN50_model = train_model(RN50_model, criterion, optimizer, img_loaders, ds_sizes, num_epochs)

In [None]:
def evaluate_model(model, dataloader):
    model.eval()
    y_true = []
    y_pred = []

    with torch.no_grad():
        for inputs, labels in dataloader:
            inputs = inputs.to(device)
            labels = labels.to(device)

            outputs = model(inputs)
            _, preds = torch.max(outputs, 1)

            y_true.extend(labels.cpu().numpy())
            y_pred.extend(preds.cpu().numpy())

    return y_true, y_pred
y_true_test, y_pred_test = evaluate_model(RN50_model, dataloaders['test'])

# Calculate performance metrics
accuracy_test = accuracy_score(y_true_test, y_pred_test)
precision_test = precision_score(y_true_test, y_pred_test)
recall_test = recall_score(y_true_test, y_pred_test)
f1_test = f1_score(y_true_test, y_pred_test)

print(f'Test Accuracy: {accuracy_test:.4f}')
print(f'Test Precision: {precision_test:.4f}')
print(f'Test Recall: {recall_test:.4f}')
print(f'Test F1 Score: {f1_test:.4f}')

# Classification report
print(classification_report(y_true_test, y_pred_test, target_names=['cracked', 'non-cracked']))

# Confusion matrix for test set
cm = confusion_matrix(y_true_test, y_pred_test)
disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=['cracked', 'non-cracked'])
disp.plot(cmap=plt.cm.Blues)
plt.title('Confusion Matrix - Test Set')
plt.show()

**OPTIMIZATION RESNET50 (Optimizer=SGD)**

In [None]:
# Initializing the Pretained ResNet18 model:

RN50_model = models.resnet50(weights=models.ResNet50_Weights.IMAGENET1K_V1)
num_ftrs = RN50_model.fc.in_features
RN50_model.fc = nn.Sequential(
    nn.Dropout(p=0.5),
    nn.Linear(num_ftrs, 2)
)

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(RN50_model.parameters(), lr=0.001, momentum=0.9, weight_decay=1e-4)

device = torch.device("cpu")
RN50_model = RN50_model.to(device)

num_epochs = 20

def train_model(model, criterion, optimizer, img_loaders, ds_sizes, num_epochs):
    best_model_wts = copy.deepcopy(model.state_dict())
    best_loss = float('inf')
    patience = 5
    trigger_times = 0
    for epoch in range(num_epochs):
        print(f'Epoch {epoch}/{num_epochs - 1}')
        print('-' * 10)
        for phase in ['train', 'validation']:
            if phase == 'train':
                model.train()  
            else:
                model.eval()   
            running_loss = 0.0
            running_corrects = 0
            for inputs, labels in img_loaders[phase]:
                inputs = inputs.to(device)
                labels = labels.to(device)
                optimizer.zero_grad()
                with torch.set_grad_enabled(phase == 'train'):
                    outputs = model(inputs)
                    _, preds = torch.max(outputs, 1)
                    loss = criterion(outputs, labels)
                    if phase == 'train':
                        loss.backward()
                        optimizer.step()
                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)
            epoch_loss = running_loss / ds_sizes[phase]
            epoch_acc = running_corrects.double() / ds_sizes[phase]
            print(f'{phase} Loss: {epoch_loss:.4f} Acc: {epoch_acc:.4f}')

            # Early stopping:
            if phase == 'validation':
                if epoch_loss < best_loss:
                    best_loss = epoch_loss
                    best_model_wts = copy.deepcopy(model.state_dict())
                    trigger_times = 0
                else:
                    trigger_times += 1
                    print(f'Early stopping trigger times: {trigger_times}/{patience}')

                if trigger_times >= patience:
                    print('Early stopping!')
                    model.load_state_dict(best_model_wts)
                    return model
    model.load_state_dict(best_model_wts)
    return model

RN50_model = train_model(RN50_model, criterion, optimizer, img_loaders, ds_sizes, num_epochs)

In [None]:
def evaluate_model(model, dataloader):
    model.eval()
    y_true = []
    y_pred = []

    with torch.no_grad():
        for inputs, labels in dataloader:
            inputs = inputs.to(device)
            labels = labels.to(device)

            outputs = model(inputs)
            _, preds = torch.max(outputs, 1)

            y_true.extend(labels.cpu().numpy())
            y_pred.extend(preds.cpu().numpy())

    return y_true, y_pred
y_true_test, y_pred_test = evaluate_model(RN50_model, dataloaders['test'])

# Calculate performance metrics
accuracy_test = accuracy_score(y_true_test, y_pred_test)
precision_test = precision_score(y_true_test, y_pred_test)
recall_test = recall_score(y_true_test, y_pred_test)
f1_test = f1_score(y_true_test, y_pred_test)

print(f'Test Accuracy: {accuracy_test:.4f}')
print(f'Test Precision: {precision_test:.4f}')
print(f'Test Recall: {recall_test:.4f}')
print(f'Test F1 Score: {f1_test:.4f}')

# Classification report
print(classification_report(y_true_test, y_pred_test, target_names=['cracked', 'non-cracked']))

# Confusion matrix for test set
cm = confusion_matrix(y_true_test, y_pred_test)
disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=['cracked', 'non-cracked'])
disp.plot(cmap=plt.cm.Blues)
plt.title('Confusion Matrix - Test Set')
plt.show()

**MOBILENET NOTEBOOK**

In [None]:
# Set paths to the dataset

train_dir = '/Users/User 1/OneDrive/Desktop/Base/train'
validation_dir = '/Users/User 1/OneDrive/Desktop/Base/validation'
test_dir = '/Users/User 1/OneDrive/Desktop/Base/test'

# Image Data Generators (Augmentation of Train Data while Training in place)
train_datagen = ImageDataGenerator(rescale=1./255)
validation_datagen = ImageDataGenerator(rescale=1./255)
test_datagen = ImageDataGenerator(rescale=1./255)

train_generator = train_datagen.flow_from_directory(
    train_dir,
    target_size=(224, 224),
    batch_size=32,
    class_mode='binary'
)

validation_generator = validation_datagen.flow_from_directory(
    validation_dir,
    target_size=(224, 224),
    batch_size=32,
    class_mode='binary'
)

test_generator = test_datagen.flow_from_directory(
    test_dir,
    target_size=(224, 224),
    batch_size=32,
    class_mode='binary',
    shuffle=False
)

In [None]:
# Instantiating model, Training and Training history:

base_model = MobileNetV2(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
base_model.trainable = False

MNet_model = Sequential([
    base_model,
    GlobalAveragePooling2D(),
    Dense(1, activation='sigmoid')
])

MNet_model.compile(optimizer=SGD(learning_rate=0.0001,momentum=0.9),
              loss='binary_crossentropy', 
              metrics=['accuracy'])

history = MNet_model.fit(
    train_generator,
    validation_data=validation_generator,
    epochs=20
)

In [None]:
# Evaluate the model
test_loss, test_acc = MNet_model.evaluate(test_generator)

# Generate predictions
y_pred_prob = MNet_model.predict(test_generator)
y_pred = (y_pred_prob > 0.5).astype(int)
y_true = test_generator.classes

# Calculate performance metrics
accuracy = accuracy_score(y_true, y_pred)
precision = precision_score(y_true, y_pred)
recall = recall_score(y_true, y_pred)
f1 = f1_score(y_true, y_pred)

print(f'Test Accuracy: {accuracy:.2f}')
print(f'Test Precision: {precision:.2f}')
print(f'Test Recall: {recall:.2f}')
print(f'Test F1 Score: {f1:.2f}')

# Classification report
print(classification_report(y_true, y_pred, target_names=['cracked', 'non-cracked']))

# Confusion matrix for test set
cm = confusion_matrix(y_true, y_pred)
disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=['cracked', 'non-cracked'])
disp.plot(cmap=plt.cm.Blues)
plt.title('Confusion Matrix - Test Set')
plt.show()

**OPTIMIZATION MOBILENET (Optimizer=Adam)**

In [None]:
# Instantiating model, Training and Training history:

base_model = MobileNetV2(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
base_model.trainable = False

MNet_model = Sequential([
    base_model,
    GlobalAveragePooling2D(),
    Dense(1, activation='sigmoid')
])

MNet_model.compile(optimizer= 'adam',
              loss='binary_crossentropy', 
              metrics=['accuracy'])

history = MNet_model.fit(
    train_generator,
    validation_data=validation_generator,
    epochs=20
)

In [None]:
# Evaluate the model
test_loss, test_acc = MNet_model.evaluate(test_generator)

# Generate predictions
y_pred_prob = MNet_model.predict(test_generator)
y_pred = (y_pred_prob > 0.5).astype(int)
y_true = test_generator.classes

# Calculate performance metrics
accuracy = accuracy_score(y_true, y_pred)
precision = precision_score(y_true, y_pred)
recall = recall_score(y_true, y_pred)
f1 = f1_score(y_true, y_pred)

print(f'Test Accuracy: {accuracy:.2f}')
print(f'Test Precision: {precision:.2f}')
print(f'Test Recall: {recall:.2f}')
print(f'Test F1 Score: {f1:.2f}')

# Classification report
print(classification_report(y_true, y_pred, target_names=['cracked', 'non-cracked']))

# Confusion matrix for test set
cm = confusion_matrix(y_true, y_pred)
disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=['cracked', 'non-cracked'])
disp.plot(cmap=plt.cm.Blues)
plt.title('Confusion Matrix - Test Set')
plt.show()

**EVALUATING OPTIMIZED RESNET18 MODEL ON CHELTENHAM CAPTURED IMAGES**

In [None]:
base_dir = '/Users/s4210323/OneDrive - University of Gloucestershire/Base'

data_tf = {
    'CHL': transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ])
}

img_datasets = {x: datasets.ImageFolder(os.path.join(base_dir, x), data_tf[x]) for x in ['CHL']}
img_loaders = {
    'CHL': DataLoader(img_datasets['CHL'], batch_size=16, shuffle=False, num_workers=4)
}
ds_sizes = {x: len(img_datasets[x]) for x in ['CHL']}
class_names = img_datasets['CHL'].classes

class_counts = defaultdict(int)
for _, label in img_datasets['CHL']:
    class_counts[class_names[label]] += 1

print(f"Dataset sizes: {ds_sizes}")
print(f"Class names: {class_names}")
print("Class distribution:")
for class_name, count in class_counts.items():
    print(f"{class_name}: {count} Images")

In [None]:
def evaluate_model(model, dataloader):
    model.eval()
    y_true = []
    y_pred = []

    with torch.no_grad():
        for inputs, labels in dataloader:
            inputs = inputs.to(device)
            labels = labels.to(device)

            outputs = model(inputs)
            _, preds = torch.max(outputs, 1)

            y_true.extend(labels.cpu().numpy())
            y_pred.extend(preds.cpu().numpy())

    return y_true, y_pred
y_true_CHL, y_pred_CHL = evaluate_model(RN18_model, dataloaders['CHL'])

# performance metrics calculation:
accuracy_test = accuracy_score(y_true_CHL, y_pred_CHL)
precision_test = precision_score(y_true_CHL, y_pred_CHL)
recall_test = recall_score(y_true_CHL, y_pred_CHL)
f1_test = f1_score(y_true_CHL, y_pred_CHL)

print(f'Test Accuracy: {accuracy_test:.4f}')
print(f'Test Precision: {precision_test:.4f}')
print(f'Test Recall: {recall_test:.4f}')
print(f'Test F1 Score: {f1_test:.4f}')

# Classification report
print(classification_report(y_true_CHL, y_pred_CHL, target_names=['cracked', 'non-cracked']))

# Confusion matrix for CHL set
cm = confusion_matrix(y_true_CHL, y_pred_CHL)
disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=['cracked', 'non-cracked'])
disp.plot(cmap=plt.cm.Blues)
plt.title('Confusion Matrix - Cheltenham Images')
plt.show()

**CRACK ANALYSIS ON CHELTENHAM IMAGES**

In [None]:
## Defining a fucntion for crack feature extraction: 

def calculate_crack_features(image_path, visualize=False):
    image = cv2.imread(image_path)
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

    edges = cv2.Canny(gray, threshold1=30, threshold2=100)
    if visualize:
        plt.figure(figsize=(10, 6))
        plt.subplot(2, 2, 1)
        plt.imshow(edges, cmap='gray')
        plt.title('Edge Detection')

    dilated = cv2.dilate(edges, np.ones((3,3), np.uint8), iterations=1)
    if visualize:
        plt.subplot(2, 2, 2)
        plt.imshow(dilated, cmap='gray')
        plt.title('Dilation')

    contours, _ = cv2.findContours(dilated, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    contour_image = cv2.drawContours(image.copy(), contours, -1, (0,255,0), 3)
    total_area = sum(cv2.contourArea(contour) for contour in contours)
    if visualize:
        plt.subplot(2, 2, 3)
        plt.imshow(cv2.cvtColor(contour_image, cv2.COLOR_BGR2RGB))
        plt.title('Contours')

    radius = 3
    n_points = 8 * radius
    lbp = local_binary_pattern(gray, n_points, radius, method='uniform')
    x, counts = np.unique(lbp.ravel(), return_counts=True)
    hist = counts / sum(counts)
    texture_uniformity = np.var(hist)
    if visualize:
        plt.subplot(2, 2, 4)
        plt.imshow(lbp, cmap='gray')
        plt.title('Local Binary Patterns')
        plt.tight_layout()
        plt.show()

    return total_area, texture_uniformity

In [None]:
def process_folder(folder_path, visualize=False):
    crack_features = []
    visualized_count = 0  
    for filename in os.listdir(folder_path):
        if filename.endswith((".jpg", ".png")):
            path = os.path.join(folder_path, filename)
            should_visualize = visualize and visualized_count < 5
            features = calculate_crack_features(path, visualize=should_visualize)
            crack_features.append(features)
            if should_visualize:
                visualized_count += 1 
    return crack_features

def categorize_crack_sizes(crack_features):
    categories = {'small': 0, 'medium': 0, 'large': 0}
    for area, _ in crack_features:
        
        ##If the size is less than 1000, describe it as small crack, medium and large
        if area < 1000:  
            categories['small'] += 1
        elif area < 1500:
            categories['medium'] += 1
        else:
            categories['large'] += 1
    return categories

def plot_crack_size_distribution(categories, title):
    sizes = list(categories.keys())
    counts = list(categories.values())
    plt.figure(figsize=(8, 5))
    plt.bar(sizes, counts, color=['blue', 'orange', 'green'])
    plt.title(title)
    plt.xlabel('Crack Size Category')
    plt.ylabel('Count')
    plt.show()

In [None]:
cracked_images = '/Users/User 1/OneDrive/Desktop/Base/CHL/cracked'

# Set visualize=True in process_folder call to see the visualizations for the first 5 images
crack_features = process_folder(cracked_images, visualize=True)

crack_categories = categorize_crack_sizes(crack_features)

plot_crack_size_distribution(crack_categories, 'Crack Size Distribution')

In [None]:
# Additional function for plotting the numerical distribution of crack sizes

def plot_crack_size_numerical_distribution(crack_categories, title):
    categories = ['small', 'medium', 'large']
    counts_crack = [crack_categories[category] for category in categories]
       
    x = np.arange(len(categories))
    width = 0.35
    
    fig, ax = plt.subplots(figsize=(10, 6))
    rects1 = ax.bar(x, counts_crack, width, label='crack')

    ax.set_xlabel('Crack Size Category')
    ax.set_ylabel('Counts')
    ax.set_title(title)
    ax.set_xticks(x)
    ax.set_xticklabels(categories)
    
    def autolabel(rects):
        for rect in rects:
            height = rect.get_height()
            ax.annotate('{}'.format(height),
                        xy=(rect.get_x() + rect.get_width() / 2, height),
                        xytext=(0, 3),
                        textcoords="offset points",
                        ha='center', va='bottom')

    autolabel(rects1)
    
    fig.tight_layout()
    
    plt.show()

In [None]:
# categorize the crack sizes
crack_features = process_folder(cracked_images)  

crack_categories = categorize_crack_sizes(crack_features)

# Plot the numerical distribution of crack sizes
plot_crack_size_numerical_distribution(crack_categories,'Numerical Distribution of Crack Sizes')