In [None]:
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import roc_curve, auc, confusion_matrix, classification_report
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint, LearningRateScheduler
import matplotlib.pyplot as plt
import numpy as np
import os


# Define functions for evaluation metrics and plotting
def plot_training_history(history):
    plt.figure(figsize=(12, 6))

    plt.subplot(1, 2, 1)
    plt.plot(history.history['accuracy'], label='Training Accuracy')
    plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
    plt.xlabel('Epoch')
    plt.ylabel('Accuracy')
    plt.title('Training vs Validation Accuracy')
    plt.legend()

    plt.subplot(1, 2, 2)
    plt.plot(history.history['loss'], label='Training Loss')
    plt.plot(history.history['val_loss'], label='Validation Loss')
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.title('Training vs Validation Loss')
    plt.legend()

    plt.tight_layout()
    plt.show()


# Set up your Google Drive directory and other configurations
drive_dir = '/content/drive/MyDrive/datathon2024'
data_dirs = ['underdeveloped_atl', 'ground_broken_atl', 'concrete_pad_atl', 'framing_up_atl', 'near_completion_atl']
labels = ['underdeveloped_atl', 'ground_broken_atl', 'concrete_pad_atl', 'framing_up_atl', 'near_completion_atl']
batch_size = 32
input_shape = (224, 224, 3)

# Load and preprocess images function
def load_and_preprocess_image(image_path, label):
    image = tf.io.read_file(image_path)
    image = tf.image.decode_image(image, channels=3)
    image.set_shape([None, None, 3])  # Set explicit shape before resizing
    image = tf.image.resize(image, [input_shape[0], input_shape[1]])
    image = tf.cast(image, tf.float32) / 255.0
    return image, label

# Load data and split into train, validation, and test sets
image_paths = []
image_labels = []
for data_dir, label in zip(data_dirs, labels):
    folder_path = os.path.join(drive_dir, data_dir)
    images = os.listdir(folder_path)
    for image in images:
        image_paths.append(os.path.join(folder_path, image))
        image_labels.append(label)

# Encode string labels to integer labels
label_encoder = LabelEncoder()
image_labels_encoded = label_encoder.fit_transform(image_labels)

train_paths, test_paths, train_labels, test_labels = train_test_split(
    image_paths, image_labels_encoded, test_size=0.2, random_state=42)
train_paths, val_paths, train_labels, val_labels = train_test_split(
    train_paths, train_labels, test_size=0.2, random_state=42)

# Data augmentation
datagen = ImageDataGenerator(
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest'
)

# Create TensorFlow datasets
def create_dataset(paths, labels):
    dataset = tf.data.Dataset.from_tensor_slices((paths, labels))
    dataset = dataset.map(load_and_preprocess_image)
    dataset = dataset.shuffle(len(paths)).batch(batch_size)
    return dataset

train_dataset = create_dataset(train_paths, train_labels)
val_dataset = create_dataset(val_paths, val_labels)
test_dataset = create_dataset(test_paths, test_labels)

# Define and compile the model
model = tf.keras.Sequential([
    tf.keras.layers.Conv2D(32, (3, 3), activation='relu', input_shape=input_shape),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.MaxPooling2D((2, 2)),
    tf.keras.layers.Dropout(0.25),

    tf.keras.layers.Conv2D(64, (3, 3), activation='relu', input_shape=input_shape),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.MaxPooling2D((2, 2)),
    tf.keras.layers.Dropout(0.25),

    tf.keras.layers.Conv2D(128, (3, 3), activation='relu', input_shape=input_shape),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.MaxPooling2D((2, 2)),
    tf.keras.layers.Dropout(0.25),

    tf.keras.layers.Flatten(),
    tf.keras.layers.Dropout(0.5),
    tf.keras.layers.Dense(32, activation='relu'),
    tf.keras.layers.Dense(len(labels), activation='softmax')
])

model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.0005),
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

# Define callbacks
callbacks = [
    EarlyStopping(patience=3, monitor='val_loss', restore_best_weights=True),
    ModelCheckpoint(filepath='best_model.h5', save_best_only=True, monitor='val_loss')
]

# Train the model with callbacks
history = model.fit(train_dataset, epochs=10, validation_data=val_dataset, callbacks=callbacks)

# Plot training history
plot_training_history(history)

# Evaluate the model on the test set
test_loss, test_accuracy = model.evaluate(test_dataset)
print(f'Test accuracy: {test_accuracy}')

# Load the best model from ModelCheckpoint
best_model = tf.keras.models.load_model('best_model.h5')

