# Model comparison

## Table of Contents
1. [Libraries](#libraries)
2. [Load models](#load_models)
3. [Load data](#load-data)
4. [Results](#results)

## 1. Libraries
Install and load necessary libraries.

In [None]:
# Import necessary libraries
from tensorflow import keras
import os
import pandas as pd
import numpy as np
import joblib
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import accuracy_score, mean_squared_error, classification_report, mean_absolute_error
from sklearn.preprocessing import StandardScaler
import tensorflow as tf
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.layers import Dense, Dropout, BatchNormalization
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau

## 2. Load models
Load final models of all three riders.

In [None]:
# Load data from google drive
from google.colab import drive
drive.mount('/content/drive')

## 2.1. Neural Network Models

In [None]:
# Load models
r1_model = keras.models.load_model('/content/drive/MyDrive/opencampus_all_files/models/r1_mse_nn_model.keras')

# Initialize the model
r3_model = Sequential()
r3_model.add(Dense(128, input_dim=6, activation='relu'))
r3_model.add(Dense(256, activation='relu'))  # Large hidden layer to capture more complex patterns
r3_model.add(Dropout(0.2))  # Dropout to prevent overfitting
r3_model.add(Dense(128, activation='relu'))
r3_model.add(Dropout(0.2))  # Dropout again after each hidden layer
r3_model.add(Dense(1))  # Regression output


# Load the weights
r3_model.load_weights('/content/drive/MyDrive/opencampus_all_files/models/r3_mse_nn_model.keras')

# Initialize the model
r7_model = Sequential()
r7_model.add(Dense(128, input_dim=6, activation='relu'))
r7_model.add(Dense(256, activation='relu'))  # Large hidden layer to capture more complex patterns
r7_model.add(Dropout(0.2))  # Dropout to prevent overfitting
r7_model.add(Dense(128, activation='relu'))
r7_model.add(Dropout(0.2))  # Dropout again after each hidden layer
r7_model.add(Dense(1))  # Regression output


# Load the weights
r7_model.load_weights('/content/drive/MyDrive/opencampus_all_files/models/r7_nn_model.keras')


## 2.2. Linear Models

In [None]:
# Load linear models
r1_model = joblib.load('/content/drive/MyDrive/opencampus_all_files/models/r1_init_lr_model.joblib')
r3_model = joblib.load('/content/drive/MyDrive/opencampus_all_files/models/r3_init_lr_model.joblib')
r7_model = joblib.load('/content/drive/MyDrive/opencampus_all_files/models/r7_lr_model.joblib')

## 3. Load data
Load test dataset for rider3.

In [None]:
# unzip test files
!unzip /content/drive/MyDrive/opencampus_all_files/rider3/Rider3_test.zip

In [None]:
# Scaler configuration
# Load the dataset
df = pd.read_csv('/content/drive/MyDrive/opencampus_all_files/rider3/combined_data_r3.csv')

# Feature and target variable selection
Time_real = df['Time']
X = df[['Elevation', 'Slope_prev', 'Slope_next',  'Angle', 'Distance', 'Cumulative_Slope']] # Cumulative slope
y = df['Speed']

# Split the dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)

# Normalize the features using StandardScaler
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

#### 3.1. Rider 1

In [None]:
# Define the folder containing the files
input_folder_path = '/content/content/Rider3_test'
output_folder_path = '/content/content/Rider3_test_r1/'

# Create output_folder_path
if not os.path.exists(output_folder_path):
    os.makedirs(output_folder_path)

# List all CSV files in the folder
csv_files = [f for f in os.listdir(input_folder_path) if f.endswith('.csv')]

# Iterate through each file in the folder
for file in csv_files:
    file_path = os.path.join(input_folder_path, file)

    # Read test file
    df = pd.read_csv(file_path)

    # Feature selection
    real_time = df['Time']
    X = df[['Elevation', 'Slope_prev', 'Slope_next', 'Angle', 'Distance', 'Cumulative_Slope']]
    y = df['Speed']

    # Make predictions on the new data
    X_new_scaled = scaler.transform(X)
    y_pred = r1_model.predict(X_new_scaled)

    # Add the predicted values as a new column 'Speed_pred' in the original DataFrame
    df['Speed_pred'] = y_pred

    # Calculate MAE between df['Speed_pred'] and df['Speed']
    mae = mean_absolute_error(df['Speed'], df['Speed_pred'])
    print(f"File: {file} | Mean Absolute Error: {mae}")

    # Initialize Time column
    df['Time_pred'] = float(df['Time'].iloc[0])

    # Compute predicted time
    for i in range(2, len(df)):
        if df.loc[i, 'Speed_pred'] < 0:
            df.loc[i, 'Speed_pred'] = 0
        if pd.notna(df.loc[i, 'Speed_pred']):
            if df.loc[i, 'Speed_pred'] != 0:
                df.loc[i, 'Time_pred'] = (
                    df.loc[i - 1, 'Time_pred'] +
                    (df.loc[i, 'Distance'] - df.loc[i - 1, 'Distance']) / df.loc[i, 'Speed_pred']
                )
            else:
                df.loc[i, 'Time_pred'] = df.loc[i - 1, 'Time_pred']

    # Save the processed DataFrame to a new file (optional)
    output_file_path = os.path.join(output_folder_path, f"nn_{file}")
    df.to_csv(output_file_path, index=False)

#### 3.2. Rider 3

In [None]:
# Define the folder containing the files
input_folder_path = '/content/content/Rider3_test'
output_folder_path = '/content/content/Rider3_test_r3/'

# Create output_folder_path
if not os.path.exists(output_folder_path):
    os.makedirs(output_folder_path)

# List all CSV files in the folder
csv_files = [f for f in os.listdir(input_folder_path) if f.endswith('.csv')]

# Iterate through each file in the folder
for file in csv_files:
    file_path = os.path.join(input_folder_path, file)

    # Read test file
    df = pd.read_csv(file_path)

    # Feature selection
    real_time = df['Time']
    X = df[['Elevation', 'Slope_prev', 'Slope_next', 'Angle', 'Distance', 'Cumulative_Slope']]
    y = df['Speed']

    # Make predictions on the new data
    X_new_scaled = scaler.transform(X)
    y_pred = r3_model.predict(X_new_scaled)

    # Add the predicted values as a new column 'Speed_pred' in the original DataFrame
    df['Speed_pred'] = y_pred

    # Calculate MAE between df['Speed_pred'] and df['Speed']
    mae = mean_absolute_error(df['Speed'], df['Speed_pred'])
    print(f"File: {file} | Mean Absolute Error: {mae}")

    # Initialize Time column
    df['Time_pred'] = float(df['Time'].iloc[0])

    # Compute predicted time
    for i in range(2, len(df)):
        if df.loc[i, 'Speed_pred'] < 0:
            df.loc[i, 'Speed_pred'] = 0
        if pd.notna(df.loc[i, 'Speed_pred']):
            if df.loc[i, 'Speed_pred'] != 0:
                df.loc[i, 'Time_pred'] = (
                    df.loc[i - 1, 'Time_pred'] +
                    (df.loc[i, 'Distance'] - df.loc[i - 1, 'Distance']) / df.loc[i, 'Speed_pred']
                )
            else:
                df.loc[i, 'Time_pred'] = df.loc[i - 1, 'Time_pred']

    # Save the processed DataFrame to a new file (optional)
    output_file_path = os.path.join(output_folder_path, f"nn_{file}")
    df.to_csv(output_file_path, index=False)

#### 3.3. Rider 7

In [None]:
# Define the folder containing the files
input_folder_path = '/content/content/Rider3_test'
output_folder_path = '/content/content/Rider3_test_r7/'

# Create output_folder_path
if not os.path.exists(output_folder_path):
    os.makedirs(output_folder_path)

# List all CSV files in the folder
csv_files = [f for f in os.listdir(input_folder_path) if f.endswith('.csv')]

# Iterate through each file in the folder
for file in csv_files:
    file_path = os.path.join(input_folder_path, file)

    # Read test file
    df = pd.read_csv(file_path)

    # Feature selection
    real_time = df['Time']
    X = df[['Elevation', 'Slope_prev', 'Slope_next', 'Angle', 'Distance', 'Cumulative_Slope']]
    y = df['Speed']

    # Make predictions on the new data
    X_new_scaled = scaler.transform(X)
    y_pred = r7_model.predict(X_new_scaled)

    # Add the predicted values as a new column 'Speed_pred' in the original DataFrame
    df['Speed_pred'] = y_pred

    # Calculate MAE between df['Speed_pred'] and df['Speed']
    mae = mean_absolute_error(df['Speed'], df['Speed_pred'])
    print(f"File: {file} | Mean Absolute Error: {mae}")

    # Initialize Time column
    df['Time_pred'] = float(df['Time'].iloc[0])

    # Compute predicted time
    for i in range(2, len(df)):
        if df.loc[i, 'Speed_pred'] < 0:
            df.loc[i, 'Speed_pred'] = 0
        if pd.notna(df.loc[i, 'Speed_pred']):
            if df.loc[i, 'Speed_pred'] != 0:
                df.loc[i, 'Time_pred'] = (
                    df.loc[i - 1, 'Time_pred'] +
                    (df.loc[i, 'Distance'] - df.loc[i - 1, 'Distance']) / df.loc[i, 'Speed_pred']
                )
            else:
                df.loc[i, 'Time_pred'] = df.loc[i - 1, 'Time_pred']

    # Save the processed DataFrame to a new file (optional)
    output_file_path = os.path.join(output_folder_path, f"nn_{file}")
    df.to_csv(output_file_path, index=False)

## 4. Results
Compare results of different models on the same files.

#### 4.1. Rider 1

In [None]:
# Summary statistics for processed files
output_folder_path = '/content/content/Rider3_test_r1/'
summary_r1 = []
processed_files = [f for f in os.listdir(output_folder_path) if f.endswith('.csv')]

for file in processed_files:
    file_path = os.path.join(output_folder_path, file)
    df = pd.read_csv(file_path)

    # Extract the last value of 'Time' and 'Time_pred'
    last_time = df['Time'].iloc[-1]
    last_time_pred = df['Time_pred'].iloc[-1]

    # Calculate percentage difference
    percentage_diff = ((last_time_pred - last_time) / last_time) * 100 if last_time != 0 else None

    # Append to summary
    summary_r1.append({
        'file': file,
        'last_time': last_time,
        'last_time_pred': last_time_pred,
        'percentage_diff': percentage_diff
    })

#### 4.2. Rider 3

In [None]:
# Summary statistics for processed files
output_folder_path = '/content/content/Rider3_test_r3/'
summary_r3 = []
processed_files = [f for f in os.listdir(output_folder_path) if f.endswith('.csv')]

for file in processed_files:
    file_path = os.path.join(output_folder_path, file)
    df = pd.read_csv(file_path)

    # Extract the last value of 'Time' and 'Time_pred'
    last_time = df['Time'].iloc[-1]
    last_time_pred = df['Time_pred'].iloc[-1]

    # Calculate percentage difference
    percentage_diff = ((last_time_pred - last_time) / last_time) * 100 if last_time != 0 else None

    # Append to summary
    summary_r3.append({
        'file': file,
        'last_time': last_time,
        'last_time_pred': last_time_pred,
        'percentage_diff': percentage_diff
    })

#### 4.3. Rider 7

In [None]:
# Summary statistics for processed files
output_folder_path = '/content/content/Rider3_test_r7/'
summary_r7 = []
processed_files = [f for f in os.listdir(output_folder_path) if f.endswith('.csv')]

for file in processed_files:
    file_path = os.path.join(output_folder_path, file)
    df = pd.read_csv(file_path)

    # Extract the last value of 'Time' and 'Time_pred'
    last_time = df['Time'].iloc[-1]
    last_time_pred = df['Time_pred'].iloc[-1]

    # Calculate percentage difference
    percentage_diff = ((last_time_pred - last_time) / last_time) * 100 if last_time != 0 else None

    # Append to summary
    summary_r7.append({
        'file': file,
        'last_time': last_time,
        'last_time_pred': last_time_pred,
        'percentage_diff': percentage_diff
    })

#### 4.4. Summary

In [None]:
# Print summary
summary_r1_df = pd.DataFrame(summary_r1).sort_values(by=['file'])
summary_r3_df = pd.DataFrame(summary_r3).sort_values(by=['file'])
summary_r7_df = pd.DataFrame(summary_r7).sort_values(by=['file'])

# Selecting and renaming columns from each dataframe
r1_selected = summary_r1_df[['file', 'last_time', 'last_time_pred']].rename(columns={
    'last_time': 'real_r3_time',
    'last_time_pred': 'pred_r1_time'
})

r3_selected = summary_r3_df[['last_time_pred']].rename(columns={
    'last_time_pred': 'pred_r3_time'
})

r7_selected = summary_r7_df[['last_time_pred']].rename(columns={
    'last_time_pred': 'pred_r7_time'
})

# Combining into one table
combined_df = r1_selected.copy()
combined_df['pred_r3_time'] = r3_selected['pred_r3_time']
combined_df['pred_r7_time'] = r7_selected['pred_r7_time']

# Display or use the combined DataFrame
print(combined_df)