In [1]:
import tensorflow as tf
from tensorflow.keras.optimizers import Adam # type: ignore
from sklearn.model_selection import train_test_split
from tensorflow.keras import layers # type: ignore
import numpy as np
import pandas as pd
import cv2
import os
from tensorflow.keras.models import Model # type: ignore
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, BatchNormalization, Input, Concatenate, GlobalAveragePooling2D # type: ignore
from tensorflow.keras.losses import SparseCategoricalCrossentropy # type: ignore
from tensorflow.keras.metrics import SparseCategoricalAccuracy # type: ignore
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.regularizers import l2 # type: ignore


In [2]:
# Load and preprocess the data
def load_data(image_folder, metadata_path, num_samples=None):
    # Load metadata
    metadata = pd.read_csv(metadata_path)
    
    # If num_samples is provided, take the first 'num_samples' rows
    if num_samples:
        metadata = metadata.head(num_samples)
    
    images = []
    missing_images = []
    
    for index, row in metadata.iterrows():
        image_file = os.path.join(image_folder, str(row['id'])+'.jpg')  # Adjust 'id' column if necessary
        image = cv2.imread(image_file)
        
        # Check if the image was loaded successfully
        if image is not None:
            image = cv2.resize(image, (128, 128))  # Resize to 128x128
            images.append(image)
        else:
            missing_images.append(image_file)
            print(f"Warning: Image {image_file} could not be loaded.")
    
    # Normalize pixel values to [0, 1]
    images = np.array(images) / 255.0  
    
    return images, metadata

In [3]:
# Load the data
image_folder = './COMP90086_2024_Project_train/train'  
metadata_path = './COMP90086_2024_Project_train/train.csv'
test_image_folder = './COMP90086_2024_Project_test/test' 
test_metadata_path = './COMP90086_2024_Project_test/test.csv'

images, metadata = load_data(image_folder, metadata_path)
test_images, test_metadata = load_data(test_image_folder, test_metadata_path)

metadata.info()
test_metadata.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7680 entries, 0 to 7679
Data columns (total 7 columns):
 #   Column            Non-Null Count  Dtype
---  ------            --------------  -----
 0   id                7680 non-null   int64
 1   shapeset          7680 non-null   int64
 2   type              7680 non-null   int64
 3   total_height      7680 non-null   int64
 4   instability_type  7680 non-null   int64
 5   cam_angle         7680 non-null   int64
 6   stable_height     7680 non-null   int64
dtypes: int64(7)
memory usage: 420.1 KB
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1920 entries, 0 to 1919
Data columns (total 1 columns):
 #   Column  Non-Null Count  Dtype
---  ------  --------------  -----
 0   id      1920 non-null   int64
dtypes: int64(1)
memory usage: 15.1 KB


In [75]:
def create_cnn_model(input_shape):
    cnn_input = Input(shape=input_shape)
    
    x = Conv2D(32, (3, 3), activation='relu')(cnn_input)
    x = MaxPooling2D(pool_size=(2, 2))(x)
    x = BatchNormalization()(x)
    
    x = Conv2D(64, (3, 3), activation='relu')(x)
    x = MaxPooling2D(pool_size=(2, 2))(x)
    x = BatchNormalization()(x)
    
    x = Conv2D(128, (3, 3), activation='relu')(x)
    x = MaxPooling2D(pool_size=(2, 2))(x)
    x = BatchNormalization()(x)
    
    x = GlobalAveragePooling2D()(x)

    cnn_output = Dense(128, activation='relu')(x)
    
    return Model(inputs=cnn_input, outputs=cnn_output)

# Define the neural network for metadata
def create_metadata_model(input_shape):
    metadata_input = Input(shape=input_shape)
    print(metadata_input)
    
    y = Dense(64, activation='relu')(metadata_input)
    y = Dropout(0.3)(y)
    y = Dense(32, activation='relu')(y)
    
    return Model(inputs=metadata_input, outputs=y)

# Combine CNN and metadata models
def create_combined_model(image_shape, metadata_shape):
    cnn_model = create_cnn_model(image_shape)
    metadata_model = create_metadata_model(metadata_shape)
    
    # Concatenate the outputs of the CNN and metadata model
    combined = Concatenate()([cnn_model.output, metadata_model.output])
    
    # Add some fully connected layers after the concatenation
    z = Dense(128, activation='relu', kernel_regularizer=l2(0.001))(combined)
    z = Dropout(0.1)(z)
    z = Dense(64, activation='relu', kernel_regularizer=l2(0.001))(z)
    
    # Final output layer for predicting stable height
    output = Dense(6, activation='softmax')(z)
    
    # Create the complete model
    model = Model(inputs=[cnn_model.input, metadata_model.input], outputs=output)
    
    return model

# Create the model
image_shape = (128, 128, 3)  
metadata_shape = (3,)  

In [34]:
def Simple_Model(image_shape, num_categories):
    # Define the input shape
    cnn_input = Input(shape=image_shape)

    # Define the architecture
    x = Conv2D(32, (3, 3), activation='relu', padding='same')(cnn_input)
    x = MaxPooling2D(pool_size=(2, 2))(x)
    x = BatchNormalization()(x)

    x = Conv2D(64, (3, 3), activation='relu', padding='same')(x)
    x = MaxPooling2D(pool_size=(2, 2))(x)
    x = BatchNormalization()(x)

    x = Conv2D(128, (3, 3), activation='relu', padding='same')(x)
    x = MaxPooling2D(pool_size=(2, 2))(x)
    x = BatchNormalization()(x)

    # Global Average Pooling instead of Flatten
    x = GlobalAveragePooling2D()(x)

    # Dense layers with increased regularization
    x = Dense(128, activation='relu', kernel_regularizer=l2(0.001))(x)
    x = Dropout(0.1)(x)  # Increased dropout
    x = Dense(64, activation='relu', kernel_regularizer=l2(0.001))(x)

    # Output layer
    output = Dense(num_categories, activation='softmax')(x)

    # Create the model
    cnn_model = Model(inputs=cnn_input, outputs=output)

    return cnn_model

In [67]:
# Model to predict shapeset
features = ["shapeset", "type", "total_height", "instability_type", "cam_angle"]

X_train_images = images 
X_test_images = test_images


label_encoder = LabelEncoder()

for feature in features:
    print(feature)
    y_train = metadata[feature]
    print("before",np.unique(y_train))
    y_train = label_encoder.fit_transform(y_train)
    print("after",np.unique(y_train))
    model = Simple_Model(image_shape, len(np.unique(y_train)))

    # Compile the model
    model.compile(optimizer=Adam(), loss='sparse_categorical_crossentropy', metrics=['accuracy'])

    # Train the model
    model.fit(X_train_images, y_train, batch_size=32, epochs=20, validation_split=0.2)   
    predictions = model.predict(X_test_images)
    predicted_classes = predictions.argmax(axis=1)
    print("before",np.unique(predicted_classes))
    predicted_classes = label_encoder.inverse_transform(predicted_classes)
    print("after",np.unique(predicted_classes))
    test_metadata[feature]= predicted_classes


shapeset
before [1 2]
after [0 1]
Epoch 1/20
[1m192/192[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m76s[0m 303ms/step - accuracy: 0.6624 - loss: 0.8384 - val_accuracy: 0.6673 - val_loss: 0.7911
Epoch 2/20
[1m192/192[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m56s[0m 289ms/step - accuracy: 0.6704 - loss: 0.7471 - val_accuracy: 0.6348 - val_loss: 0.7527
Epoch 3/20
[1m192/192[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m49s[0m 253ms/step - accuracy: 0.7119 - loss: 0.6251 - val_accuracy: 0.4030 - val_loss: 0.8305
Epoch 4/20
[1m192/192[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m48s[0m 251ms/step - accuracy: 0.7453 - loss: 0.5370 - val_accuracy: 0.7233 - val_loss: 0.5400
Epoch 5/20
[1m192/192[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m48s[0m 249ms/step - accuracy: 0.7877 - loss: 0.4645 - val_accuracy: 0.7956 - val_loss: 0.4318
Epoch 6/20
[1m192/192[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m48s[0m 252ms/step - accuracy: 0.8256 - loss: 0.4028 - val_accuracy: 0.

In [68]:
test_metadata.describe()

Unnamed: 0,id,shapeset,type,total_height,instability_type,cam_angle
count,1920.0,1920.0,1920.0,1920.0,1920.0,1920.0
mean,508859.54375,1.669792,1.699479,4.443229,1.205208,1.25
std,288604.389809,0.470411,0.458604,1.32633,0.805514,0.433126
min,95.0,1.0,1.0,2.0,0.0,1.0
25%,265807.75,1.0,1.0,3.0,1.0,1.0
50%,502188.5,2.0,2.0,5.0,1.0,1.0
75%,757418.0,2.0,2.0,6.0,2.0,1.25
max,999651.0,2.0,2.0,6.0,2.0,2.0


In [69]:
test_metadata.to_csv('update_test_metadata.csv', index=False) 

In [77]:
# Final model 

combined_model = create_combined_model(image_shape, metadata_shape)

# Compile the model
combined_model.compile(optimizer=Adam(learning_rate=0.001), loss=SparseCategoricalCrossentropy(), metrics=[SparseCategoricalAccuracy()])
X_train_images = images  # Preprocessed image data
# X_train_metadata = metadata[["shapeset","type","total_height","instability_type","cam_angle"]]  # Select relevant metadata columns
X_train_metadata = metadata[["shapeset","total_height","cam_angle"]]  # Select relevant metadata columns

y_train = metadata['stable_height']-1  # Target variable (stable height)
metadata_input = layers.Input(shape=(3,)) 

<KerasTensor shape=(None, 3), dtype=float32, sparse=False, name=keras_tensor_332>


In [78]:
combined_model.fit([X_train_images, X_train_metadata], y_train, epochs=20, batch_size=32, validation_split=0.2)

Epoch 1/20
[1m192/192[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m83s[0m 255ms/step - loss: 1.8997 - sparse_categorical_accuracy: 0.2505 - val_loss: 2.4386 - val_sparse_categorical_accuracy: 0.2337
Epoch 2/20
[1m192/192[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m42s[0m 218ms/step - loss: 1.7138 - sparse_categorical_accuracy: 0.2578 - val_loss: 1.7727 - val_sparse_categorical_accuracy: 0.2415
Epoch 3/20
[1m192/192[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m42s[0m 220ms/step - loss: 1.6532 - sparse_categorical_accuracy: 0.2438 - val_loss: 1.6239 - val_sparse_categorical_accuracy: 0.2233
Epoch 4/20
[1m192/192[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 216ms/step - loss: 1.6108 - sparse_categorical_accuracy: 0.2491 - val_loss: 1.6151 - val_sparse_categorical_accuracy: 0.2311
Epoch 5/20
[1m192/192[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m43s[0m 225ms/step - loss: 1.5820 - sparse_categorical_accuracy: 0.2478 - val_loss: 1.5817 - val_sparse_categorical_ac

<keras.src.callbacks.history.History at 0x27fc3c92870>

In [79]:
test_metadata_new = pd.read_csv('update_test_metadata.csv')
y_test = pd.read_csv('./COMP90086_2024_Project_test/sample-solution.csv')
df_merged = pd.merge(y_test, test_metadata_new, on='id', how='inner')

In [82]:
from sklearn.metrics import accuracy_score

label_encoder = LabelEncoder()

X_test_images = test_images
X_test_metadata = df_merged[["shapeset","total_height","cam_angle"]]
y_test = df_merged['stable_height']
y_test = label_encoder.fit_transform(y_test)

predictions = combined_model.predict([X_test_images, X_test_metadata])
predicted_classes = predictions.argmax(axis=1)

df_merged['stable_height']= predicted_classes

accuracy = accuracy_score(y_test, predicted_classes)
print(accuracy)

[1m60/60[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 57ms/step
0.15260416666666668


In [83]:
df_merged[['id','stable_height']].to_csv('result.csv', index=False) 