In [124]:
import os
import zipfile
import numpy as np
import pandas as pd
import cv2
import albumentations as A
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
import tensorflow as tf
from tensorflow.keras import layers, models
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import (Conv2D, MaxPooling2D, Flatten, Dense, Dropout, BatchNormalization, Input, Concatenate, LayerNormalization)
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.losses import MeanSquaredError
from tensorflow.keras.metrics import MeanAbsoluteError
from tensorflow.keras.preprocessing.image import load_img, img_to_array

In [114]:
# Load and preprocess the data
def load_data(image_folder, metadata_path, num_samples=None):
    # Load metadata
    metadata = pd.read_csv(metadata_path)

    # If num_samples is provided, take the first 'num_samples' rows
    if num_samples:
        metadata = metadata.head(num_samples)

    images = []
    missing_images = []

    for index, row in metadata.iterrows():
        image_file = os.path.join(image_folder, str(row['id'])+'.jpg')  # Adjust 'id' column if necessary
        image = cv2.imread(image_file)

        # Check if the image was loaded successfully
        if image is not None:
            images.append(image)
        else:
            missing_images.append(image_file)
            print(f"Warning: Image {image_file} could not be loaded.")

    # Normalize pixel values to [0, 1]
    images = np.array(images) / 255.0

    return images, metadata

In [115]:
# Load the data
image_folder = '/content/train'
metadata_path = 'train.csv'
images, metadata = load_data(image_folder, metadata_path)

metadata.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7680 entries, 0 to 7679
Data columns (total 7 columns):
 #   Column            Non-Null Count  Dtype
---  ------            --------------  -----
 0   id                7680 non-null   int64
 1   shapeset          7680 non-null   int64
 2   type              7680 non-null   int64
 3   total_height      7680 non-null   int64
 4   instability_type  7680 non-null   int64
 5   cam_angle         7680 non-null   int64
 6   stable_height     7680 non-null   int64
dtypes: int64(7)
memory usage: 420.1 KB


In [116]:
train_augmentation = A.Compose([
    A.HorizontalFlip(p=0.5),
    A.Rotate(limit=20, p=0.5),
    A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

val_test_augmentation = A.Compose([
    A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

In [117]:
train_metadata, val_metadata, train_labels, val_labels = train_test_split(metadata, metadata['stable_height'], test_size=0.2, random_state=42)

batch_size = 64

def load_and_preprocess_image(filepath):
    image = load_img(filepath)
    return image

train_images = np.array([load_and_preprocess_image(os.path.join(image_folder, str(fname)+'.jpg')) for fname in train_metadata['id']])
for img in train_images:
  augmented = train_augmentation(image=img)
  img = augmented['image']

# Load images for validation
val_images = np.array([load_and_preprocess_image(os.path.join(image_folder, str(fname)+'.jpg')) for fname in val_metadata['id']])
for img in val_images:
  augmented = val_test_augmentation(image=img)
  img = augmented['image']

In [127]:
# Define CNN model for image input
image_input = Input(shape=(train_images.shape[1], train_images.shape[2], 3))
x = LayerNormalization(axis=[-1,-2,-3])(image_input)
x = Conv2D(32, (3, 3), activation='relu')(image_input)
x = MaxPooling2D(pool_size=(3, 3))(x)
x = Conv2D(64, (3, 3), activation='relu')(x)
x = MaxPooling2D(pool_size=(3, 3))(x)
x = Conv2D(128, (3, 3), activation='relu')(x)
x = MaxPooling2D(pool_size=(3, 3))(x)
x = Flatten()(x)
x = Dropout(0.5)(x)
x = layers.Dense(512, activation='relu')(x)
x = layers.Dense(1, activation='linear')(x)

In [128]:
model = Model(inputs=image_input, outputs=x)
model.compile(optimizer='adam', loss=MeanSquaredError(), metrics=['accuracy'])

In [129]:
# Train the model
history = model.fit(
    train_images,
    train_labels,
    validation_data=(val_images, val_labels),
    epochs=20,
    batch_size=batch_size
)


Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [None]:
# Plot training & validation accuracy and loss (optional)
def plot_history(history):
    plt.figure(figsize=(12, 4))

    plt.subplot(1, 2, 1)
    plt.plot(history.history['mean_absolute_error'], label='Train MAE')
    plt.plot(history.history['val_mean_absolute_error'], label='Validation MAE')
    plt.title('Model Mean Absolute Error')
    plt.ylabel('MAE')
    plt.xlabel('Epoch')
    plt.legend()

    plt.subplot(1, 2, 2)
    plt.plot(history.history['loss'], label='Train Loss')
    plt.plot(history.history['val_loss'], label='Validation Loss')
    plt.title('Model Loss')
    plt.ylabel('Loss')
    plt.xlabel('Epoch')
    plt.legend()

    plt.show()

plot_history(history)

# Save the model
model.save('cnn_with_metadata_model.h5')

In [None]:
import numpy as np
from tensorflow.keras.preprocessing.image import load_img, img_to_array # type: ignore

# Define function to load and preprocess a single image
def load_and_preprocess_single_image(filepath):
    image = load_img(filepath, target_size=(64, 64))  # Ensure the size matches the training images
    image = img_to_array(image) / 255.0  # Normalize pixel values
    return image

# Path to the image you want to test
test_image_path = './COMP90086_2024_Project_test/test/33287.jpg'

# Load and preprocess the test image
test_image = load_and_preprocess_single_image(test_image_path)

# Prepare the corresponding metadata for the test image
# Ensure the metadata matches the feature columns used in training
test_metadata = np.array([[2,1,6,2,1]])  # Replace with actual feature values

# Expand dimensions to match the input shape (1, height, width, channels)
test_image = np.expand_dims(test_image, axis=0)

# Make a prediction
predicted_value = model.predict([test_image, test_metadata])

# Print the prediction result
print(f'Predicted Value: {predicted_value[0][0]}')  # Assuming a single numerical output
