In [None]:
import pandas as pd
import numpy as np
import os
import time
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import joblib
import tensorflow as tf
from tensorflow.keras.preprocessing.image import img_to_array, load_img
from tensorflow.keras.applications.densenet import DenseNet121, preprocess_input as preprocess_input_densenet
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing import image

In [None]:
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

Mounted at /content/drive


In [None]:
base_dir = "/content/drive/MyDrive/Data 298B Project Data/Test Dataset - Workbook 2"
image_folder = f'{base_dir}/Sample Images'

In [None]:
# Loading the label encoding and scaler from previously made joblib files

label_encoder = joblib.load(f'{base_dir}/label_encoder_v2_hybrid_model.joblib')
scaler = joblib.load(f'{base_dir}/scaler.joblib')

In [None]:
# Loading the hybrid DenseNet121 model
start_time = time.time()

model = load_model(f'{base_dir}/Best_DenseNet121_Hybrid_Model.h5')

elapsed_time = time.time() - start_time
print(f"It took {elapsed_time:.4f} seconds to load the model.")

It took 6.4737 seconds to load the model.


In [None]:
# Loading the numerical data

numerical_df = pd.read_csv(f"{base_dir}/combined_data.csv")

In [None]:
# Specifying which numerical features to standardize
features_to_standardize = ['Avg Temp 14d', 'Avg Humidity 14d', 'Total Precipitation 14d', 'Avg Wind Speed 14d']
# Specifying all the numerical features to use in the model
all_numerical_features = ['Avg Temp 14d', 'Avg Humidity 14d', 'Total Precipitation 14d', 'Avg Wind Speed 14d', 'NDVI MODIS', 'NDVI - 1 MODIS', 'NDVI - 2 MODIS',
       'EVI MODIS', 'EVI - 1 MODIS', 'EVI - 2 MODIS', 'NDVI 1 Decrease',
       'NDVI 2 Decrease', 'EVI 1 Decrease', 'EVI 2 Decrease']

In [None]:
# Standardizing the specified numerical features using a previous joblib file

if features_to_standardize:
    # Loading the scaler
    loaded_scaler = joblib.load(f'{base_dir}/scaler.joblib')
    # Transforming the sample numerical data using the loaded scaler
    numerical_df[features_to_standardize] = loaded_scaler.transform(numerical_df[features_to_standardize])

In [None]:
# Functions to preprocess images based on the base DenseNet121 pre-trained model

def preprocess_image_densenet121(image_path):
    image = tf.io.read_file(image_path)
    image = tf.image.decode_jpeg(image, channels=3)
    image = tf.image.resize_with_pad(image, 224, 224, antialias=True)
    image = preprocess_input_densenet(image)
    image = np.expand_dims(image, axis=0)
    return image

In [None]:
# Adjusting the 'Id' column to include the full image path
numerical_df['Id'] = numerical_df['Id'].apply(lambda x: os.path.join(image_folder, x))

In [None]:
'''
# Generating predictions for each instance, and showing the confidence level of the predicted class
start_time = time.time()

# Preparing columns in the dataframe for predictions
numerical_df['Class Confidence Levels'] = np.nan
numerical_df['Class Prediction'] = np.nan

# Predicting and filling the dataframe with the predicted class and confidence levels
for index, row in numerical_df.iterrows():
    img_array = preprocess_image_densenet121(row['Id'])
    num_data = row[all_numerical_features].to_numpy().reshape(1, -1)
    num_data = np.array(num_data, dtype=np.float32)

    # Generating class probability predictions
    prediction = model.predict([img_array, num_data])[0]

    # Determining the predicted class and its confidence
    predicted_class_idx = np.argmax(prediction)
    predicted_class = label_encoder.inverse_transform([predicted_class_idx])[0]
    class_confidence = prediction[predicted_class_idx]

    # Updating the DataFrame with the prediction and confidence
    numerical_df.at[index, 'Class Confidence Levels'] = f"{predicted_class}: {class_confidence:.4f}"
    numerical_df.at[index, 'Class Prediction'] = predicted_class

elapsed_time = time.time() - start_time
print(f"It took {elapsed_time:.4f} seconds to generate predictions for each instance.")
'''

'\n# Generating predictions for each instance, and showing the confidence level of the predicted class\nstart_time = time.time()\n\n# Preparing columns in the dataframe for predictions\nnumerical_df[\'Class Confidence Levels\'] = np.nan\nnumerical_df[\'Class Prediction\'] = np.nan\n\n# Predicting and filling the dataframe with the predicted class and confidence levels\nfor index, row in numerical_df.iterrows():\n    img_array = preprocess_image_densenet121(row[\'Id\'])\n    num_data = row[all_numerical_features].to_numpy().reshape(1, -1)\n    num_data = np.array(num_data, dtype=np.float32)\n\n    # Generating class probability predictions\n    prediction = model.predict([img_array, num_data])[0]\n\n    # Determining the predicted class and its confidence\n    predicted_class_idx = np.argmax(prediction)\n    predicted_class = label_encoder.inverse_transform([predicted_class_idx])[0]\n    class_confidence = prediction[predicted_class_idx]\n\n    # Updating the DataFrame with the predicti

In [None]:
# Generating predictions for each instance, and showing the confidence level of every class

start_time = time.time()

# Preparing columns in the dataframe for predictions
numerical_df['Class Confidence Levels'] = np.nan
numerical_df['Class Prediction'] = np.nan

# Predicting and filling the dataframe with the predicted class and confidence levels
for index, row in numerical_df.iterrows():
    img_array = preprocess_image_densenet121(row['Id'])
    num_data = row[all_numerical_features].to_numpy().reshape(1, -1)
    num_data = np.array(num_data, dtype=np.float32)

    # Generating class probability predictions
    prediction = model.predict([img_array, num_data])[0]

    # Formatting the predicted confidence levels for all classes
    confidences = {label_encoder.classes_[i]: round(float(prediction[i]), 4) for i in range(len(prediction))}

    # Sorting confidences so that the highest confidence is first
    sorted_confidences = dict(sorted(confidences.items(), key=lambda item: item[1], reverse=True))

    # Determining the predicted class
    predicted_class = max(sorted_confidences, key=sorted_confidences.get)

    # Updating the dataframe with the prediction and confidence levels
    numerical_df.at[index, 'Class Confidence Levels'] = str(sorted_confidences)
    numerical_df.at[index, 'Class Prediction'] = predicted_class

elapsed_time = time.time() - start_time
print(f"It took {elapsed_time:.4f} seconds to generate predictions for all of the instances.")

It took 8.6912 seconds to generate predictions for all of the instances.


In [None]:
# Selecting specific columns to save
columns_to_save = ['Id', 'Latitude', 'Longitude', 'Date', 'Class Confidence Levels', 'Class Prediction']
export_df = numerical_df[columns_to_save]

In [None]:
# Saving the selected columns to a new CSV file
export_df.to_csv(f'{base_dir}/predictions_with_confidences.csv', index=False)

In [None]:
# Saving the selected columns to a new JSON file
export_df.to_json(f'{base_dir}/predictions_with_confidences.json', orient='records')