In [1]:
# importing the zipfile module
from zipfile import ZipFile

# loading the temp.zip and creating a zip object
with ZipFile("processed-images.zip", 'r') as zObject:

    # Extracting all the members of the zip
    # into a specific location.
    zObject.extractall(
        path="")

In [1]:
import os
import pandas as pd
import numpy as np

# Load the CSV data
df = pd.read_csv('images.csv', header=None, names=["image_path", "label", "timestamp", "moon_phase", "wind", "weather", "temp", "humidity", "pressure"])

# Filter out rows where image paths do not exist
df = df[df['image_path'].apply(os.path.exists)]

# Convert timestamp to datetime with UTC conversion, with error handling
df['timestamp'] = pd.to_datetime(df['timestamp'], errors='coerce', utc=True)

# Drop rows with invalid timestamps (NaT)
df = df.dropna()

# Extract features from the timestamp
df['hour'] = df['timestamp'].dt.hour  # Hour of the day
df['day_of_week'] = df['timestamp'].dt.dayofweek  # Day of the week
df['is_night'] = (df['hour'] < 6) | (df['hour'] > 18)  # Simple binary night/day indicator

# Handle categorical features (moon_phase, weather) with one-hot encoding
df = pd.get_dummies(df, columns=['moon_phase', 'weather'], drop_first=True)

from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
df[['wind', 'temp', 'humidity', 'pressure']] = scaler.fit_transform(df[['wind', 'temp', 'humidity', 'pressure']])
from tensorflow.keras.preprocessing import image
from concurrent.futures import ThreadPoolExecutor, as_completed

# Image loading function with normalization
def load_image(image_path, target_size=(224, 224)):
    img = image.load_img(image_path, target_size=target_size)
    img_array = image.img_to_array(img)
    img_array = img_array / 255.0  # Normalize pixel values to [0, 1]
    return img_array.astype(np.float32)  # Ensure it's in float32 format

# Load all images in parallel using ThreadPoolExecutor
def load_images_in_parallel(image_paths, batch_size=32, target_size=(224, 224)):
    images = []
    with ThreadPoolExecutor() as executor:
        for i in range(0, len(image_paths), batch_size):
            batch_paths = image_paths[i:i + batch_size]
            batch_images = list(executor.map(lambda path: load_image(path, target_size), batch_paths))
            images.extend(batch_images)
    return np.array(images)

# Load images and convert to NumPy array
images = load_images_in_parallel(df['image_path'].values)

# Extract features as a NumPy array
metadata_columns = ['hour', 'day_of_week', 'is_night', 'wind', 'temp', 'humidity', 'pressure'] + \
                   [col for col in df.columns if col.startswith('moon_phase_') or col.startswith('weather_')]
metadata = df[metadata_columns].values  # Convert to NumPy array
metadata = metadata.astype(np.float32)  # Ensure float32 dtype

# Convert the label to a NumPy array (binary classification: deer or not-deer)
labels = (df['label'] == 'deer').astype(int).values  # Convert to 0 or 1 (0: not deer, 1: deer)


from sklearn.model_selection import train_test_split

# Split the data into training and validation sets
X_train_images, X_val_images, X_train_metadata, X_val_metadata, y_train, y_val = train_test_split(
    images, metadata, labels, test_size=0.2, random_state=42
)


from tensorflow.keras import layers, models, optimizers
from concurrent.futures import ThreadPoolExecutor
import tensorflow as tf

# Define the model with dropout and learning rate scheduler
image_input = layers.Input(shape=(224, 224, 3))  # Shape of the images (224x224 RGB images)

# Image model
x = layers.Conv2D(32, (3, 3), activation='relu')(image_input)
x = layers.MaxPooling2D()(x)
x = layers.Conv2D(64, (3, 3), activation='relu')(x)
x = layers.MaxPooling2D()(x)
x = layers.Conv2D(128, (3, 3), activation='relu')(x)
x = layers.MaxPooling2D()(x)
x = layers.Flatten()(x)

# Combine image model and metadata
metadata_input = layers.Input(shape=(metadata.shape[1],))  # Shape of metadata (after one-hot encoding)
combined = layers.concatenate([x, metadata_input])

# Add a fully connected layer, dropout for regularization, and output layer
x = layers.Dense(128, activation='relu')(combined)
x = layers.Dropout(0.5)(x)  # Dropout to reduce overfitting
x = layers.Dense(1, activation='sigmoid')(x)  # Sigmoid for binary classification

# Define the model
model = models.Model(inputs=[image_input, metadata_input], outputs=x)

# Compile the model with a learning rate scheduler
initial_lr = 0.001
lr_schedule = tf.keras.optimizers.schedules.ExponentialDecay(
    initial_lr, decay_steps=10000, decay_rate=0.9, staircase=True
)

optimizer = optimizers.Adam(learning_rate=lr_schedule)

model.compile(optimizer=optimizer, loss='binary_crossentropy', metrics=['accuracy'])

from sklearn.metrics import classification_report
from tensorflow.keras.callbacks import EarlyStopping

# Define EarlyStopping callback
early_stopping = EarlyStopping(
    monitor='val_accuracy',  # Metric to monitor (e.g., 'val_loss' or 'val_accuracy')
    patience=5,          # Number of epochs with no improvement to wait before stopping
    restore_best_weights=True  # Restore model weights from the epoch with the best value of the monitored metric
)

# Train the model
history = model.fit(
    [X_train_images, X_train_metadata],  # Input data (images and metadata)
    y_train,  # Labels
    validation_data=([X_val_images, X_val_metadata], y_val),  # Validation data
    epochs=50,
    batch_size=16,
    callbacks=[early_stopping],  # Include the EarlyStopping callback here
)

# Model evaluation
val_loss, val_acc = model.evaluate([X_val_images, X_val_metadata], y_val)
print(f"Validation Loss: {val_loss}")
print(f"Validation Accuracy: {val_acc}")

# Predictions and classification report
y_pred = (model.predict([X_val_images, X_val_metadata]) > 0.5).astype(int)  # Convert predictions to 0 or 1

# Classification report (Precision, Recall, F1-Score)
print("\nClassification Report:")
print(classification_report(y_val, y_pred))


2024-11-20 12:53:18.015036: I metal_plugin/src/device/metal_device.cc:1154] Metal device set to: Apple M2
2024-11-20 12:53:18.015084: I metal_plugin/src/device/metal_device.cc:296] systemMemory: 8.00 GB
2024-11-20 12:53:18.015092: I metal_plugin/src/device/metal_device.cc:313] maxCacheSize: 2.67 GB
2024-11-20 12:53:18.015503: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:305] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2024-11-20 12:53:18.015535: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:271] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)


Epoch 1/50


2024-11-20 12:53:20.149376: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:117] Plugin optimizer for device_type GPU is enabled.


[1m107/107[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 102ms/step - accuracy: 0.6397 - loss: 0.7990 - val_accuracy: 0.7042 - val_loss: 0.5833
Epoch 2/50
[1m107/107[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 92ms/step - accuracy: 0.7274 - loss: 0.5588 - val_accuracy: 0.6878 - val_loss: 0.5630
Epoch 3/50
[1m107/107[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 90ms/step - accuracy: 0.7604 - loss: 0.5402 - val_accuracy: 0.7441 - val_loss: 0.5281
Epoch 4/50
[1m107/107[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 88ms/step - accuracy: 0.7513 - loss: 0.5165 - val_accuracy: 0.7488 - val_loss: 0.5308
Epoch 5/50
[1m107/107[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 88ms/step - accuracy: 0.8125 - loss: 0.4707 - val_accuracy: 0.8122 - val_loss: 0.4208
Epoch 6/50
[1m107/107[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 91ms/step - accuracy: 0.7984 - loss: 0.4421 - val_accuracy: 0.7700 - val_loss: 0.4302
Epoch 7/50
[1m107/107[0m 