# 1. Data preprocessing

In [None]:
import os
import numpy as np
import time
import matplotlib.pyplot as plt
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from keras.utils import to_categorical
from sklearn.model_selection import train_test_split
import cv2


In [None]:
# Configurable parameters
IMG_SIZE = (32, 32)  # image resolution
NUM_CONV_LAYERS = 5  # convolutional layers
BATCH_SIZE = 32  # batch size
EPOCHS = 20  # training rounds

In [None]:
# data path
# train_data_dir = "/kaggle/input/surface-crack-train/training/"
# test_data_dir = "/kaggle/input/surface-crack-test/test/"
train_data_dir = 'concrete_crack_images/training'
test_data_dir = 'concrete_crack_images/test'

In [None]:
# Load the training data and partition the training and validation sets
def load_and_split_train_data(data_dir, img_size=IMG_SIZE, test_size=0.2):
    images = []
    labels = []

    for label, category in enumerate(['Negative', 'Positive']):
        category_path = os.path.join(data_dir, category)
        for file in os.listdir(category_path):
            img_path = os.path.join(category_path, file)
            img = cv2.imread(img_path)
            if img is not None:
                img = cv2.resize(img, img_size)
                images.append(img)
                labels.append(label)

    images = np.array(images) / 255.0  # Data Normalization
    labels = np.array(labels)

    return train_test_split(images, labels, test_size=test_size, random_state=42)


In [None]:
# Load test data
def load_test_data(data_dir, img_size=IMG_SIZE):
    images = []
    filenames = []

    for file in os.listdir(data_dir):
        img_path = os.path.join(data_dir, file)
        img = cv2.imread(img_path)
        if img is not None:
            img = cv2.resize(img, img_size)
            images.append(img)
            filenames.append(file)

    images = np.array(images) / 255.0  # Data Normalization
    return images, filenames


In [None]:
# Load and split training and validation data
train_images, val_images, train_labels, val_labels = load_and_split_train_data(train_data_dir)

# Load test data
test_images, test_filenames = load_test_data(test_data_dir)

# Output data statistics
print(f"Training dataset size: {train_images.shape[0]}")
print(f"Validation dataset size: {val_images.shape[0]}")
print(f"Testing dataset size: {test_images.shape[0]}")

# 2. Define the CNN model

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout

# Define the CNN model
def create_cnn_model(input_shape=(IMG_SIZE[0], IMG_SIZE[1], 3), num_conv_layers=NUM_CONV_LAYERS):
    model = Sequential()

    filters = 32
    for i in range(num_conv_layers):
        # Adding Convolutional and Pooling Layers
        model.add(Conv2D(filters, (3, 3), activation='relu', padding="same", input_shape=input_shape if i == 0 else None))
        model.add(MaxPooling2D(pool_size=(2, 2), padding="same"))
        filters *= 2  # Double the number of filters per layer

    # Spreading Characteristics Chart
    model.add(Flatten())

    #  Fully Connected Layer
    model.add(Dense(128, activation='relu'))
    model.add(Dropout(0.5))

    # Output Layer
    model.add(Dense(1, activation='sigmoid'))

    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
    return model

# create model
cnn_model = create_cnn_model()
cnn_model.summary()


In [None]:
# Start the timer
start_time = time.time()

# train the model
history = cnn_model.fit(
    train_images,
    train_labels,
    epochs=EPOCHS,
    batch_size=BATCH_SIZE,
    validation_data=(val_images, val_labels),
    verbose=1
)

# End the timer
end_time = time.time()

# Calculate the computational time
training_time = end_time - start_time

# Print the computational time
print(f"Training Time: {training_time:.2f} seconds")


# 3. Evaluate the model

In [None]:
# Validation set prediction probability
val_predictions = cnn_model.predict(val_images).flatten()

# Plotting histograms of predicted probability distributions
plt.hist(val_predictions, bins=20, color='blue', edgecolor='black', alpha=0.7)
plt.xlabel('Predicted Probability')
plt.ylabel('Frequency')
plt.title('Validation Set Prediction Probability Distribution')
plt.show()

# Print basic statistical information about the distribution
print(f"Min: {val_predictions.min()}, Max: {val_predictions.max()}")
print(f"Mean: {val_predictions.mean()}, Std: {val_predictions.std()}")


In [None]:
# Plotting accuracy curves for training and validation
plt.plot(history.history['accuracy'], label='Train Accuracy', marker='o')
plt.plot(history.history['val_accuracy'], label='Validation Accuracy', marker='o')
plt.title('Model Accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()

for i, value in enumerate(history.history['accuracy']):
    plt.text(i, value, f"{value:.4f}", ha='center', va='bottom', fontsize=8)
for i, value in enumerate(history.history['val_accuracy']):
    plt.text(i, value, f"{value:.4f}", ha='center', va='bottom', fontsize=8)

plt.show()

# Plotting loss curves for training and validation
plt.plot(history.history['loss'], label='Train Loss', marker='o')
plt.plot(history.history['val_loss'], label='Validation Loss', marker='o')
plt.title('Model Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()

for i, value in enumerate(history.history['loss']):
    plt.text(i, value, f"{value:.4f}", ha='center', va='bottom', fontsize=8)
for i, value in enumerate(history.history['val_loss']):
    plt.text(i, value, f"{value:.4f}", ha='center', va='bottom', fontsize=8)

plt.show()

# 4. Predict Result

In [None]:
import pandas as pd

# Prediction of test data
predictions = cnn_model.predict(test_images)
predicted_classes = (predictions > 0.5).astype(int).flatten()

# Create DataFrame
output_df = pd.DataFrame({
    'image_id': test_filenames,
    'predicted_class': predicted_classes
})

# Save as CSV file
output_csv_path = "predictions.csv"
output_df.to_csv(output_csv_path, index=False)

print(f"Predictions have been saved to {output_csv_path}")
