In [None]:
from google.colab import drive
drive.mount('/content/drive')
%cd drive/MyDrive

Mounted at /content/drive
/content/drive/MyDrive


In [None]:
"""
Using the trained models to make predictions
"""

import numpy as np
import pandas as pd
from keras.src.saving.saving_api import load_model

def normalise_data(data: list) -> list:
    """
    Performs min max normalisation on the list.
    :param data: The data that is going to be normalised
    :return: list returns a normalises list
    """
    min_val = np.min(data)
    max_val = np.max(data)
    normalised = (data - min_val) / (max_val - min_val)
    return normalised


def pre_processing(pos_file_name: str, y_column: str, extra_x_columns: list = None,
                   window_size: int = 5) -> tuple[list, list]:
    """
    Pre-processes the positional dataset ready for training or validation
    :param pos_file_name: The position file to load from
    :param y_column: The column to train on, the y list
    :param extra_x_columns: The extra columns to include within the x list
    :param window_size: The number of positions with each window
    :return: tuple[list, list]
    """

    # Load training data from CSV file
    dataframe = pd.read_csv(pos_file_name)

    print(f"Loaded dataset {pos_file_name} for column {y_column}")

    # Sort and convert datetime into seconds since Unix Epoch
    dataframe = dataframe.sort_values(by=["UserID", "UnixReceiveTime"])

    x_list = []
    y_list = []
    x_columns = ["Cog", "Sog", "Longitude",
                 "Latitude", "UnixReceiveTime"]
    if extra_x_columns is not None:
        x_columns.extend(extra_x_columns)

    # Converts the course to radians
    dataframe["Cog"] = np.radians(dataframe["Cog"])

    for _, vessel in dataframe.groupby("UserID"):
        vessel = vessel.reset_index()
        # Adds the columns to the x list
        x_section = [vessel.iloc[index:index + window_size]
                     [x_columns].values.tolist()
                     for index in range(len(vessel) - window_size)]

        # Adds the time since the first position to the x list
        for x_index in range(len(x_section)):
            for sequence_index in range(0, window_size):
                x_section[x_index][sequence_index][4] = abs(dataframe.at[x_index + sequence_index,
                                                                         "UnixReceiveTime"] -
                                                            x_section[x_index][sequence_index][4])

        x_list.extend(x_section)

        y_section = vessel.iloc[window_size:][y_column].values.flatten().tolist()
        y_list.extend(y_section)

    x_array = np.array(x_list)
    y_array = np.array(y_list)

    # Normalisation
    for index in range(x_array.shape[-1]):
        x_array[:, :, index] = normalise_data(x_array[:, :, index])

    print(f"Formatting Complete {pos_file_name} for column {y_column}")
    return x_array, y_array

def calculate_mae(prediction: list, actual: list) -> float:
    """
    Calculates the mean absolute error of the two lists.
    :param prediction: a list of predicted data
    :param actual: a list of actual data
    :return: float - the mean absolute error
    """

    act_len = len(actual)
    if act_len != len(prediction):
        raise ValueError("The lengths of the two lists must be the same.")

    error_sum = sum(abs(act - pred) for act, pred in zip(actual, prediction))
    mae = error_sum / act_len
    return mae


def calculate_mse(prediction: list, actual: list) -> float:
    """
    Calculates the mean squared error of the two lists.
    :param prediction: a list of predicted data
    :param actual: a list of actual data
    :return: float - the mean squared error
    """

    act_len = len(actual)
    if act_len != len(prediction):
        raise ValueError("The lengths of the two lists must be the same.")

    squared_diff = [(act - pred) ** 2 for act, pred in zip(actual, prediction)]
    mse = sum(squared_diff) / act_len
    return mse


def print_stats(prediction: list, actual: list,
                column: str, save_name: str) -> None:
    """
    Writes the mean squared error and mean absolute error between the prediction and actual lists.
    This is written to the save file.
    :param prediction: a list of predicted data
    :param actual: a list of actual data
    :param column: the column that is the actual or predicted data
    :param save_name: the file path to save the stats to
    :return: none
    """

    mae = calculate_mae(prediction, actual)
    mse = calculate_mse(prediction, actual)
    with open(save_name, 'a') as file:
        file.write(f"Column: {column}, MAE: {mae}, MSE: {mse}\n")


def predict(pos_file_name: str, lat_model: str, lon_model: str,
            cog_model: str, sog_model: str, stats_save_file: str,
            pred_save_file: str, windows_size=7) -> None:
    """
    Uses the trained models to make predictions
    :param pos_file_name: the position file to load from
    :param lat_model: the trained keras latitude model
    :param lon_model: the trained keras longitude model
    :param cog_model: the trained keras course model
    :param sog_model: the trained keras speed model
    :param stats_save_file: the file path to save the stats file to
    :param pred_save_file: the file path to save the predictions to
    :param windows_size: the size of the window (must match the trained keras file)
    :return: none
    """

    # Preprocesses the prediction files
    lat_x, lat_y = pre_processing(pos_file_name, "Latitude",
                                  window_size=windows_size,
                                  extra_x_columns=["RateOfTurn"])
    lon_x, lon_y = pre_processing(pos_file_name, "Longitude",
                                  window_size=windows_size,
                                  extra_x_columns=["RateOfTurn"])
    cog_x, cog_y = pre_processing(pos_file_name, "Cog",
                                  window_size=windows_size,
                                  extra_x_columns=["Labels",
                                                   "NavigationalStatus"])
    sog_x, sog_y = pre_processing(pos_file_name, "Sog",
                                  window_size=windows_size,
                                  extra_x_columns=["Labels",
                                                   "NavigationalStatus"])

    # Using the saved models, loads up the trained models
    lat_model = load_model(lat_model)
    lon_model = load_model(lon_model)
    cog_model = load_model(cog_model)
    sog_model = load_model(sog_model)

    # Creates predictions using the trained models
    pred_lat = lat_model.predict(lat_x, verbose=0).flatten().tolist()
    pred_lon = lon_model.predict(lat_x, verbose=0).flatten().tolist()
    pred_cog = cog_model.predict(cog_x, verbose=0).flatten().tolist()
    pred_sog = sog_model.predict(cog_x, verbose=0).flatten().tolist()

    # Prints the results comparing predictions to actual values
    # Saves these to csv files
    print_stats(pred_lat, lat_y, "Lat", stats_save_file)
    print_stats(pred_lon, lon_y, "Lon", stats_save_file)
    print_stats(pred_cog, cog_y, "Cog", stats_save_file)
    print_stats(pred_sog, sog_y, "Sog", stats_save_file)

    # Creates a csv file of the predictions
    predictions = {"Sog": pred_sog, "Longitude": pred_lon,
                   "Latitude": pred_lat, "Cog": pred_cog}
    pred_dataframe = pd.DataFrame.from_dict(predictions)
    pred_dataframe.to_csv(pred_save_file, index=False)


def main():
    print("\nPredicting...")

    predict("Demo/Labelled/SmallLabelled.csv",
            "Models/SmallModelLatitude.keras",
            "Models/SmallModelLongitude.keras",
            "Models/SmallModelCog.keras",
            "Models/SmallModelSog.keras",
            "Demo/Results/SmallLog.csv",
            "Demo/Results/SmallPredictions.csv")

    predict("Demo/Labelled/LargeLabelled.csv",
            "Models/LargeModelLatitude.keras",
            "Models/LargeModelLongitude.keras",
            "Models/LargeModelCog.keras",
            "Models/LargeModelSog.keras",
            "Demo/Results/LargeLog.csv",
            "Demo/Results/LargePredictions.csv")

main()



Predicting...
Loaded dataset Demo/Labelled/SmallLabelled.csv for column Latitude
Formatting Complete Demo/Labelled/SmallLabelled.csv for column Latitude
Loaded dataset Demo/Labelled/SmallLabelled.csv for column Longitude
Formatting Complete Demo/Labelled/SmallLabelled.csv for column Longitude
Loaded dataset Demo/Labelled/SmallLabelled.csv for column Cog
Formatting Complete Demo/Labelled/SmallLabelled.csv for column Cog
Loaded dataset Demo/Labelled/SmallLabelled.csv for column Sog
Formatting Complete Demo/Labelled/SmallLabelled.csv for column Sog
Loaded dataset Demo/Labelled/LargeLabelled.csv for column Latitude
Formatting Complete Demo/Labelled/LargeLabelled.csv for column Latitude
Loaded dataset Demo/Labelled/LargeLabelled.csv for column Longitude
Formatting Complete Demo/Labelled/LargeLabelled.csv for column Longitude
Loaded dataset Demo/Labelled/LargeLabelled.csv for column Cog
Formatting Complete Demo/Labelled/LargeLabelled.csv for column Cog
Loaded dataset Demo/Labelled/LargeLabe