In [8]:
import pandas as pd

df = pd.read_csv("./truck_fuel_efficiency_dataset.csv")


In [9]:
df

Unnamed: 0,Date,Brake_Counter,Stop_Counter,Cruise_Control,PTO_Time,Engine_Load,Fuel_Efficiency_kmpl
0,01-01-2023,63,40,0,5.002356,64.364785,22.522255
1,02-01-2023,9,0,1,0.993171,34.683037,36.920809
2,03-01-2023,94,13,0,4.980936,35.756102,28.350327
3,04-01-2023,69,6,0,7.937064,16.969616,38.046497
4,05-01-2023,64,22,0,0.905704,26.918691,33.733081
...,...,...,...,...,...,...,...
360,27-12-2023,79,5,0,5.630054,2.884018,40.454188
361,28-12-2023,84,1,0,8.407182,24.372602,33.504063
362,29-12-2023,87,29,0,8.845980,57.582111,21.182840
363,30-12-2023,27,17,0,4.110475,16.776012,40.414117


In [11]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score

# Load the dataset
data = pd.read_csv('truck_fuel_efficiency_dataset.csv', index_col='Date')

# Split the data into features (X) and target (y)
X = data.drop('Fuel_Efficiency_kmpl', axis=1)
y = data['Fuel_Efficiency_kmpl']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize and train the Random Forest regression model
model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Predict fuel efficiency on the test set
y_pred = model.predict(X_test)

# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f"Mean Squared Error: {mse:.2f}")
print(f"R-squared: {r2:.2f}")


Mean Squared Error: 2.06
R-squared: 0.97


In [1]:
import pandas as pd
import numpy as np
from datetime import datetime, timedelta

# Define the number of trucks and time periods
num_trucks = 5
num_time_periods = 24  # One day with hourly data

# Generate truck IDs
truck_ids = [f"TRK00{i+1}" for i in range(num_trucks)]

# Generate timestamps for one day
start_date = datetime(2023, 1, 1)
timestamps = [start_date + timedelta(hours=i) for i in range(num_time_periods)]

# Generate synthetic data
data = []
for truck_id in truck_ids:
    for timestamp in timestamps:
        fuel_consumption = np.random.uniform(8.0, 12.0)  # Random fuel consumption
        engine_type = np.random.choice(["Diesel", "Gasoline"])
        maintenance_history = np.random.choice(["No history", "Service recently"])
        weather_temp = np.random.uniform(10, 35)  # Random temperature
        avg_speed = np.random.uniform(40, 100)  # Random speed

        data.append([timestamp, truck_id, engine_type, maintenance_history, weather_temp, avg_speed, fuel_consumption])

# Create a pandas DataFrame
columns = ["Timestamp", "Truck_ID", "Engine_Type", "Maintenance_History", "Weather_Temperature", "Average_Speed", "Fuel_Consumption"]
df = pd.DataFrame(data, columns=columns)

# Save the DataFrame to a CSV file
df.to_csv("Vol_fuel_efficiency_data.csv", index=False)


In [2]:
df

Unnamed: 0,Timestamp,Truck_ID,Engine_Type,Maintenance_History,Weather_Temperature,Average_Speed,Fuel_Consumption
0,2023-01-01 00:00:00,TRK001,Gasoline,No history,23.889392,92.482529,9.712944
1,2023-01-01 01:00:00,TRK001,Gasoline,No history,22.023105,75.861589,10.728945
2,2023-01-01 02:00:00,TRK001,Diesel,No history,11.289818,54.941086,11.583613
3,2023-01-01 03:00:00,TRK001,Diesel,No history,10.137305,52.658868,11.176255
4,2023-01-01 04:00:00,TRK001,Diesel,No history,20.150700,86.757627,10.295938
...,...,...,...,...,...,...,...
115,2023-01-01 19:00:00,TRK005,Gasoline,No history,19.633839,57.852596,10.881121
116,2023-01-01 20:00:00,TRK005,Diesel,Service recently,25.326517,70.795798,11.658100
117,2023-01-01 21:00:00,TRK005,Gasoline,Service recently,23.263413,73.347791,9.326941
118,2023-01-01 22:00:00,TRK005,Gasoline,Service recently,24.613533,63.821579,11.992151


In [3]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

# Load the dataset
data = pd.read_csv("Vol_fuel_efficiency_data.csv")

# Feature columns (excluding timestamp and target)
feature_columns = ["Truck_ID", "Engine_Type", "Maintenance_History", "Weather_Temperature", "Average_Speed"]

# Splitting data into features (X) and target (y)
X = data[feature_columns]
y = data["Fuel_Consumption"]

# Convert categorical features into numerical using one-hot encoding
X = pd.get_dummies(X, columns=["Truck_ID", "Engine_Type", "Maintenance_History"])

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize and train a Random Forest regressor
model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Make predictions on the testing set
predictions = model.predict(X_test)

# Evaluate the model
mae = mean_absolute_error(y_test, predictions)
rmse = mean_squared_error(y_test, predictions, squared=False)
r2 = r2_score(y_test, predictions)

print(f"Mean Absolute Error: {mae:.2f}")
print(f"Root Mean Squared Error: {rmse:.2f}")
print(f"R-squared: {r2:.2f}")


Mean Absolute Error: 1.02
Root Mean Squared Error: 1.28
R-squared: -0.52


In [4]:
import pandas as pd
import numpy as np
from sklearn.model_selection import TimeSeriesSplit
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

# Load the dataset
data = pd.read_csv("Vol_fuel_efficiency_data.csv")

# Feature columns (excluding timestamp and target)
feature_columns = ["Truck_ID", "Engine_Type", "Maintenance_History", "Weather_Temperature", "Average_Speed"]

# Splitting data into features (X) and target (y)
X = data[feature_columns]
y = data["Fuel_Consumption"]

# Convert categorical features into numerical using one-hot encoding
X = pd.get_dummies(X, columns=["Truck_ID", "Engine_Type", "Maintenance_History"])

# Initialize the model
model = RandomForestRegressor(n_estimators=100, random_state=42)

# Initialize TimeSeriesSplit for cross-validation
tscv = TimeSeriesSplit(n_splits=5)

# Lists to store evaluation metrics
mae_list = []
rmse_list = []
r2_list = []

# Time series cross-validation
for train_index, test_index in tscv.split(X):
    X_train, X_test = X.iloc[train_index], X.iloc[test_index]
    y_train, y_test = y.iloc[train_index], y.iloc[test_index]

    # Train the model
    model.fit(X_train, y_train)

    # Make predictions
    predictions = model.predict(X_test)

    # Calculate evaluation metrics
    mae = mean_absolute_error(y_test, predictions)
    rmse = mean_squared_error(y_test, predictions, squared=False)
    r2 = r2_score(y_test, predictions)

    mae_list.append(mae)
    rmse_list.append(rmse)
    r2_list.append(r2)

# Calculate average metrics
avg_mae = np.mean(mae_list)
avg_rmse = np.mean(rmse_list)
avg_r2 = np.mean(r2_list)

print(f"Average Mean Absolute Error: {avg_mae:.2f}")
print(f"Average Root Mean Squared Error: {avg_rmse:.2f}")
print(f"Average R-squared: {avg_r2:.2f}")


Average Mean Absolute Error: 1.12
Average Root Mean Squared Error: 1.32
Average R-squared: -0.32


In [8]:
import pandas as pd
import numpy as np
from sklearn.model_selection import TimeSeriesSplit
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

# Load the dataset
data = pd.read_csv("Vol_fuel_efficiency_data.csv")

# Convert timestamp to datetime
data["Timestamp"] = pd.to_datetime(data["Timestamp"])

# Extract time-related features from the timestamp
data["Hour"] = data["Timestamp"].dt.hour
data["DayOfWeek"] = data["Timestamp"].dt.dayofweek
data["Month"] = data["Timestamp"].dt.month

# Feature columns (excluding target)
feature_columns = ["Hour", "DayOfWeek", "Month", "Truck_ID", "Engine_Type", "Maintenance_History", "Weather_Temperature", "Average_Speed"]

# Splitting data into features (X) and target (y)
X = data[feature_columns]
y = data["Fuel_Consumption"]

# Convert categorical features into numerical using one-hot encoding
X = pd.get_dummies(X, columns=["Truck_ID", "Engine_Type", "Maintenance_History"])

# Initialize the model
model = RandomForestRegressor(n_estimators=100, random_state=42)

# Initialize TimeSeriesSplit for cross-validation
tscv = TimeSeriesSplit(n_splits=5)

# Lists to store evaluation metrics
mae_list = []
rmse_list = []
r2_list = []

# Time series cross-validation
for train_index, test_index in tscv.split(X):
    X_train, X_test = X.iloc[train_index], X.iloc[test_index]
    y_train, y_test = y.iloc[train_index], y.iloc[test_index]

    # Train the model
    model.fit(X_train, y_train)

    # Make predictions
    predictions = model.predict(X_test)

    # Calculate evaluation metrics
    mae = mean_absolute_error(y_test, predictions)
    rmse = mean_squared_error(y_test, predictions, squared=False)
    r2 = r2_score(y_test, predictions)

    mae_list.append(mae)
    rmse_list.append(rmse)
    r2_list.append(r2)

# Calculate average metrics
avg_mae = np.mean(mae_list)
avg_rmse = np.mean(rmse_list)
avg_r2 = np.mean(r2_list)

print(f"Average Mean Absolute Error: {avg_mae:.2f}")
print(f"Average Root Mean Squared Error: {avg_rmse:.2f}")
print(f"Average R-squared: {avg_r2:.2f}")


Average Mean Absolute Error: 1.09
Average Root Mean Squared Error: 1.29
Average R-squared: -0.28


In [8]:
import pandas as pd
from prophet import Prophet
from sklearn.model_selection import TimeSeriesSplit
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

# Load the dataset
data = pd.read_csv("Vol_fuel_efficiency_data.csv")

# Convert timestamp to datetime
data["Timestamp"] = pd.to_datetime(data["Timestamp"])

# Rename columns for Prophet
data = data.rename(columns={"Timestamp": "ds", "Fuel_Consumption": "y"})

# Initialize TimeSeriesSplit for cross-validation
tscv = TimeSeriesSplit(n_splits=5)

# Lists to store evaluation metrics
mae_list = []
rmse_list = []
r2_list = []

# Time series cross-validation
for train_index, test_index in tscv.split(data):
    train_data = data.iloc[train_index]
    test_data = data.iloc[test_index]

    # Initialize a new Prophet model for each fold
    model = Prophet()

    # Fit the model
    model.fit(train_data)

    # Make predictions
    future = model.make_future_dataframe(periods=len(test_data), freq="H")
    forecast = model.predict(future)

    # Filter forecast for test period
    forecast_test = forecast[-len(test_data):]

    # Calculate evaluation metrics
    mae = mean_absolute_error(test_data["y"], forecast_test["yhat"])
    rmse = mean_squared_error(test_data["y"], forecast_test["yhat"], squared=False)
    r2 = r2_score(test_data["y"], forecast_test["yhat"])

    mae_list.append(mae)
    rmse_list.append(rmse)
    r2_list.append(r2)

# Calculate average metrics
avg_mae = sum(mae_list) / len(mae_list)
avg_rmse = sum(rmse_list) / len(rmse_list)
avg_r2 = sum(r2_list) / len(r2_list)

print(f"Average Mean Absolute Error: {avg_mae:.2f}")
print(f"Average Root Mean Squared Error: {avg_rmse:.2f}")
print(f"Average R-squared: {avg_r2:.2f}")


21:08:47 - cmdstanpy - INFO - Chain [1] start processing
21:08:47 - cmdstanpy - INFO - Chain [1] done processing
21:08:47 - cmdstanpy - INFO - Chain [1] start processing
21:08:47 - cmdstanpy - INFO - Chain [1] done processing
21:08:48 - cmdstanpy - INFO - Chain [1] start processing
21:08:48 - cmdstanpy - INFO - Chain [1] done processing
21:08:48 - cmdstanpy - INFO - Chain [1] start processing
21:08:48 - cmdstanpy - INFO - Chain [1] done processing
21:08:48 - cmdstanpy - INFO - Chain [1] start processing
21:08:48 - cmdstanpy - INFO - Chain [1] done processing


Average Mean Absolute Error: 1.02
Average Root Mean Squared Error: 1.19
Average R-squared: -0.08


In [9]:
train_data

Unnamed: 0,ds,Truck_ID,Engine_Type,Maintenance_History,Weather_Temperature,Average_Speed,y
0,2023-01-01 00:00:00,TRK001,Gasoline,No history,23.889392,92.482529,9.712944
1,2023-01-01 01:00:00,TRK001,Gasoline,No history,22.023105,75.861589,10.728945
2,2023-01-01 02:00:00,TRK001,Diesel,No history,11.289818,54.941086,11.583613
3,2023-01-01 03:00:00,TRK001,Diesel,No history,10.137305,52.658868,11.176255
4,2023-01-01 04:00:00,TRK001,Diesel,No history,20.150700,86.757627,10.295938
...,...,...,...,...,...,...,...
95,2023-01-01 23:00:00,TRK004,Gasoline,No history,15.254112,99.768248,11.748583
96,2023-01-01 00:00:00,TRK005,Gasoline,Service recently,16.452113,87.095474,10.512187
97,2023-01-01 01:00:00,TRK005,Diesel,Service recently,17.198463,94.527949,10.124191
98,2023-01-01 02:00:00,TRK005,Gasoline,No history,32.651878,79.317802,11.334660


In [11]:
test_data

Unnamed: 0,ds,Truck_ID,Engine_Type,Maintenance_History,Weather_Temperature,Average_Speed,y
100,2023-01-01 04:00:00,TRK005,Gasoline,Service recently,25.15328,69.414578,8.721176
101,2023-01-01 05:00:00,TRK005,Diesel,No history,10.223529,65.153089,8.287782
102,2023-01-01 06:00:00,TRK005,Gasoline,No history,25.055951,49.168053,10.826027
103,2023-01-01 07:00:00,TRK005,Diesel,No history,16.970301,49.483844,11.97847
104,2023-01-01 08:00:00,TRK005,Diesel,Service recently,30.188706,99.636096,8.870838
105,2023-01-01 09:00:00,TRK005,Diesel,No history,18.769134,85.075278,9.450428
106,2023-01-01 10:00:00,TRK005,Diesel,Service recently,21.347985,51.151355,8.300623
107,2023-01-01 11:00:00,TRK005,Gasoline,Service recently,23.331168,83.677698,9.46607
108,2023-01-01 12:00:00,TRK005,Diesel,No history,33.42811,68.568253,9.522479
109,2023-01-01 13:00:00,TRK005,Gasoline,No history,27.153489,81.842531,9.486867


In [12]:
forecast_test

Unnamed: 0,ds,trend,yhat_lower,yhat_upper,trend_lower,trend_upper,additive_terms,additive_terms_lower,additive_terms_upper,multiplicative_terms,multiplicative_terms_lower,multiplicative_terms_upper,yhat
24,2023-01-02 00:00:00,10.190809,8.727959,11.627244,10.190807,10.19081,0.0,0.0,0.0,0.0,0.0,0.0,10.190809
25,2023-01-02 01:00:00,10.195803,8.73634,11.695452,10.195799,10.195808,0.0,0.0,0.0,0.0,0.0,0.0,10.195803
26,2023-01-02 02:00:00,10.200798,8.736326,11.657976,10.200789,10.200807,0.0,0.0,0.0,0.0,0.0,0.0,10.200798
27,2023-01-02 03:00:00,10.205793,8.797379,11.670623,10.205779,10.205807,0.0,0.0,0.0,0.0,0.0,0.0,10.205793
28,2023-01-02 04:00:00,10.210787,8.931851,11.599734,10.210768,10.210808,0.0,0.0,0.0,0.0,0.0,0.0,10.210787
29,2023-01-02 05:00:00,10.215782,8.830786,11.628603,10.215756,10.215808,0.0,0.0,0.0,0.0,0.0,0.0,10.215782
30,2023-01-02 06:00:00,10.220776,8.853064,11.720633,10.220744,10.22081,0.0,0.0,0.0,0.0,0.0,0.0,10.220776
31,2023-01-02 07:00:00,10.225771,8.817075,11.720085,10.225733,10.225813,0.0,0.0,0.0,0.0,0.0,0.0,10.225771
32,2023-01-02 08:00:00,10.230766,8.781092,11.767053,10.230719,10.230815,0.0,0.0,0.0,0.0,0.0,0.0,10.230766
33,2023-01-02 09:00:00,10.23576,8.887232,11.568393,10.235706,10.235819,0.0,0.0,0.0,0.0,0.0,0.0,10.23576


In [13]:
tscv

TimeSeriesSplit(gap=0, max_train_size=None, n_splits=5, test_size=None)

In [14]:
model

<prophet.forecaster.Prophet at 0x1c31f361f50>

# Time series prediction task evaluation metrics

In [6]:
data

Unnamed: 0,ds,Truck_ID,Engine_Type,Maintenance_History,Weather_Temperature,Average_Speed,y
0,2023-01-01 00:00:00,TRK001,Gasoline,No history,23.889392,92.482529,9.712944
1,2023-01-01 01:00:00,TRK001,Gasoline,No history,22.023105,75.861589,10.728945
2,2023-01-01 02:00:00,TRK001,Diesel,No history,11.289818,54.941086,11.583613
3,2023-01-01 03:00:00,TRK001,Diesel,No history,10.137305,52.658868,11.176255
4,2023-01-01 04:00:00,TRK001,Diesel,No history,20.150700,86.757627,10.295938
...,...,...,...,...,...,...,...
115,2023-01-01 19:00:00,TRK005,Gasoline,No history,19.633839,57.852596,10.881121
116,2023-01-01 20:00:00,TRK005,Diesel,Service recently,25.326517,70.795798,11.658100
117,2023-01-01 21:00:00,TRK005,Gasoline,Service recently,23.263413,73.347791,9.326941
118,2023-01-01 22:00:00,TRK005,Gasoline,Service recently,24.613533,63.821579,11.992151


In [9]:
train_data

Unnamed: 0,ds,Truck_ID,Engine_Type,Maintenance_History,Weather_Temperature,Average_Speed,y
0,2023-01-01 00:00:00,TRK001,Gasoline,No history,23.889392,92.482529,9.712944
1,2023-01-01 01:00:00,TRK001,Gasoline,No history,22.023105,75.861589,10.728945
2,2023-01-01 02:00:00,TRK001,Diesel,No history,11.289818,54.941086,11.583613
3,2023-01-01 03:00:00,TRK001,Diesel,No history,10.137305,52.658868,11.176255
4,2023-01-01 04:00:00,TRK001,Diesel,No history,20.150700,86.757627,10.295938
...,...,...,...,...,...,...,...
95,2023-01-01 23:00:00,TRK004,Gasoline,No history,15.254112,99.768248,11.748583
96,2023-01-01 00:00:00,TRK005,Gasoline,Service recently,16.452113,87.095474,10.512187
97,2023-01-01 01:00:00,TRK005,Diesel,Service recently,17.198463,94.527949,10.124191
98,2023-01-01 02:00:00,TRK005,Gasoline,No history,32.651878,79.317802,11.334660


In [10]:
test_data

Unnamed: 0,ds,Truck_ID,Engine_Type,Maintenance_History,Weather_Temperature,Average_Speed,y
100,2023-01-01 04:00:00,TRK005,Gasoline,Service recently,25.15328,69.414578,8.721176
101,2023-01-01 05:00:00,TRK005,Diesel,No history,10.223529,65.153089,8.287782
102,2023-01-01 06:00:00,TRK005,Gasoline,No history,25.055951,49.168053,10.826027
103,2023-01-01 07:00:00,TRK005,Diesel,No history,16.970301,49.483844,11.97847
104,2023-01-01 08:00:00,TRK005,Diesel,Service recently,30.188706,99.636096,8.870838
105,2023-01-01 09:00:00,TRK005,Diesel,No history,18.769134,85.075278,9.450428
106,2023-01-01 10:00:00,TRK005,Diesel,Service recently,21.347985,51.151355,8.300623
107,2023-01-01 11:00:00,TRK005,Gasoline,Service recently,23.331168,83.677698,9.46607
108,2023-01-01 12:00:00,TRK005,Diesel,No history,33.42811,68.568253,9.522479
109,2023-01-01 13:00:00,TRK005,Gasoline,No history,27.153489,81.842531,9.486867


In [8]:
forecast_test

Unnamed: 0,ds,trend,yhat_lower,yhat_upper,trend_lower,trend_upper,additive_terms,additive_terms_lower,additive_terms_upper,multiplicative_terms,multiplicative_terms_lower,multiplicative_terms_upper,yhat
24,2023-01-02 00:00:00,10.190809,8.829824,11.632763,10.190807,10.19081,0.0,0.0,0.0,0.0,0.0,0.0,10.190809
25,2023-01-02 01:00:00,10.195803,8.6524,11.636455,10.195799,10.195808,0.0,0.0,0.0,0.0,0.0,0.0,10.195803
26,2023-01-02 02:00:00,10.200798,8.772421,11.524592,10.20079,10.200807,0.0,0.0,0.0,0.0,0.0,0.0,10.200798
27,2023-01-02 03:00:00,10.205793,8.74834,11.634987,10.205779,10.205807,0.0,0.0,0.0,0.0,0.0,0.0,10.205793
28,2023-01-02 04:00:00,10.210787,8.800186,11.652831,10.210768,10.210807,0.0,0.0,0.0,0.0,0.0,0.0,10.210787
29,2023-01-02 05:00:00,10.215782,8.700465,11.636608,10.215756,10.215809,0.0,0.0,0.0,0.0,0.0,0.0,10.215782
30,2023-01-02 06:00:00,10.220776,8.821337,11.697201,10.220744,10.220811,0.0,0.0,0.0,0.0,0.0,0.0,10.220776
31,2023-01-02 07:00:00,10.225771,8.830013,11.7185,10.225731,10.225815,0.0,0.0,0.0,0.0,0.0,0.0,10.225771
32,2023-01-02 08:00:00,10.230766,8.810512,11.666683,10.230719,10.230819,0.0,0.0,0.0,0.0,0.0,0.0,10.230766
33,2023-01-02 09:00:00,10.23576,8.790471,11.645255,10.235705,10.235822,0.0,0.0,0.0,0.0,0.0,0.0,10.23576


In [58]:
import numpy as np
import pandas as pd
from prophet import Prophet
from sklearn.model_selection import TimeSeriesSplit
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from pmdarima.metrics import smape

# mape function
def mean_absolute_percentage_error(y_true, y_pred):
    y_true = np.array(y_true)
    y_pred = np.array(y_pred)
    return np.mean(np.abs((y_true - y_pred) / y_true)) * 100


# Load the dataset
data = pd.read_csv("Vol_fuel_efficiency_data.csv")

# Convert timestamp to datetime
data["Timestamp"] = pd.to_datetime(data["Timestamp"])

# Rename columns for Prophet
data = data.rename(columns={"Timestamp": "ds", "Fuel_Consumption": "y"})

# Initialize TimeSeriesSplit for cross-validation
tscv = TimeSeriesSplit(n_splits=5)

# Lists to store evaluation metrics
mse_list = []
mae_list = []
rmse_list = []
mape_list = []
smape_list = []
r2_list = []
mfe_list = []
mad_list = []
theils_u_list = []
forecast_bias_list = []
forecast_efficiency_list = []

# Time series cross-validation
for train_index, test_index in tscv.split(data):
    train_data = data.iloc[train_index]
    test_data = data.iloc[test_index]

    # Initialize a new Prophet model for each fold
    model = Prophet()

    # Fit the model
    model.fit(train_data)

    # Make predictions
    future = model.make_future_dataframe(periods=len(test_data), freq="H")
    forecast = model.predict(future)

    # Filter forecast for test period
    forecast_test = forecast[-len(test_data):]

    # Calculate evaluation metrics
    
    # 1. mean squared error
    mse = mean_squared_error(test_data["y"], forecast_test["yhat"])
    
    # 2. mean absolute error
    mae = mean_absolute_error(test_data["y"], forecast_test["yhat"])
    
    # 3. root mean squared error
    rmse = mean_squared_error(test_data["y"], forecast_test["yhat"], squared=False)
    
    # 4. Mean Absolute Percentage Error
    mape = mean_absolute_percentage_error(test_data["y"], forecast_test["yhat"])
    
    # 5. r2 score
    r2_value = r2_score(test_data["y"], forecast_test["yhat"])
    
    # mean forecast error
    mfe = np.mean(forecast_test["yhat"] - test_data["y"])
    
    
    # Append metrics to respective lists
    mse_list.append(mse)
    mae_list.append(mae)
    rmse_list.append(rmse)
    mape_list.append(mape)
    r2_list.append(r2_value)
    mfe_list.append(mfe)


# Calculate average metrics
avg_mse = sum(mse_list) / len(mse_list)
avg_mae = sum(mae_list) / len(mae_list)
avg_rmse = sum(rmse_list) / len(rmse_list)
avg_mape = sum(mape_list) / len(mape_list)
avg_r2 = sum(r2_list) / len(r2_list)
avg_mfe = sum(mfe_list) / len(mfe_list)

print(f"Average Mean Squared Error: {avg_mse:.2f}")
print(f"Average Mean Absolute Error: {avg_mae:.2f}")
print(f"Average Root Mean Squared Error: {avg_rmse:.2f}")
print(f"Average Mean Absolute Percentage Error: {avg_mape:.2f}")
print(f"Average R-squared: {avg_r2:.2f}")
print(f"Average Mean Forecast Error: {avg_mfe:.2f}")


14:31:51 - cmdstanpy - INFO - Chain [1] start processing
14:31:51 - cmdstanpy - INFO - Chain [1] done processing
14:31:51 - cmdstanpy - INFO - Chain [1] start processing
14:31:51 - cmdstanpy - INFO - Chain [1] done processing
14:31:51 - cmdstanpy - INFO - Chain [1] start processing
14:31:51 - cmdstanpy - INFO - Chain [1] done processing
14:31:51 - cmdstanpy - INFO - Chain [1] start processing
14:31:51 - cmdstanpy - INFO - Chain [1] done processing
14:31:51 - cmdstanpy - INFO - Chain [1] start processing
14:31:51 - cmdstanpy - INFO - Chain [1] done processing


Average Mean Squared Error: 1.43
Average Mean Absolute Error: 1.02
Average Root Mean Squared Error: 1.19
Average Mean Absolute Percentage Error: 10.32
Average R-squared: -0.08
Average Mean Forecast Error: nan


#### example2: Truck fuel efficiency prediction

In [3]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import numpy as np
from statsmodels.tools.eval_measures import rmse, meanabs
from sklearn.metrics import mean_absolute_percentage_error, mean_squared_log_error
from scipy.stats import variation
from pmdarima.metrics import smape




# Load the dataset
data = pd.read_csv('truck_fuel_efficiency_dataset.csv', index_col='Date')

# Split the data into features (X) and target (y)
X = data.drop('Fuel_Efficiency_kmpl', axis=1)
y = data['Fuel_Efficiency_kmpl']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize and train the Random Forest regression model
model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Predict fuel efficiency on the test set
y_pred = model.predict(X_test)

# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)
rmse_value = rmse(y_test, y_pred)
mape = mean_absolute_percentage_error(y_test, y_pred)
smape_score = smape(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
mfe = np.mean(y_pred - y_test)

rmse_forecast = np.sqrt(mean_squared_error(y_test, y_pred))
rmse_naive = np.sqrt(mean_squared_error(y_test[1:], y_test[:-1]))  # Naïve forecast
theils_u = rmse_forecast / rmse_naive
forecast_bias = np.mean(y_pred - y_test)



print(f"Mean Squared Error (MSE): {mse:.2f}")
print(f"Mean Absolute Error (MAE): {mae:.2f}")
print(f"Root Mean Squared Error (RMSE): {rmse_value:.2f}")
print(f"Mean Absolute Percentage Error (MAPE): {mape:.2f}")
print(f"Symmetric Mean Absolute Percentage Error (SMAPE): {smape_score:.2f}")
print(f"R-squared (R2): {r2:.2f}")
print(f"Mean Forecast Error (MFE): {mfe:.2f}")
print(f"Theil's U statistic: {theils_u:.2f}")
print(f"Forecast Bias: {forecast_bias:.2f}")

Mean Squared Error (MSE): 2.06
Mean Absolute Error (MAE): 1.13
Root Mean Squared Error (RMSE): 1.43
Mean Absolute Percentage Error (MAPE): 0.04
Symmetric Mean Absolute Percentage Error (SMAPE): 4.35
R-squared (R2): 0.97
Mean Forecast Error (MFE): -0.06
Theil's U statistic: 0.13
Forecast Bias: -0.06


#### 5. Symmetric Mean Absolute Percentage Error (SMAPE)

In [4]:
from pmdarima.metrics import smape

smape_score = smape(y_test, y_pred)
print(f"Symmetric Mean Absolute Percentage Error (SMAPE): {smape_score:.2f}")

Symmetric Mean Absolute Percentage Error (SMAPE): 4.35


#### 6. R2 score

In [5]:
from sklearn.metrics import r2_score

# Assuming y_pred and y_test are the predicted values and true values, respectively
r2 = r2_score(y_test, y_pred)
print("R-squared (R2) Score:", r2)

R-squared (R2) Score: 0.9719781384334707


#### 7. mean forecast error

In [6]:
import numpy as np

# Assuming y_pred and y_test are the predicted values and true values, respectively
mfe = np.mean(y_pred - y_test)
print("Mean Forecast Error (MFE):", mfe)


Mean Forecast Error (MFE): -0.05838593795726235


#### 8. Theil's U statistic

In [7]:
import numpy as np
from sklearn.metrics import mean_squared_error


rmse_forecast = np.sqrt(mean_squared_error(y_test, y_pred))
rmse_naive = np.sqrt(mean_squared_error(y_test[1:], y_test[:-1]))  # Naïve forecast
theils_u = rmse_forecast / rmse_naive

print("Theil's U Statistic:", theils_u)


Theil's U Statistic: 0.1260026047339642


#### 9. Forecast Bias

In [8]:
import numpy as np

# Assuming y_pred and y_test are the predicted values and true values, respectively
forecast_bias = np.mean(y_pred - y_test)

print("Forecast Bias:", forecast_bias)


Forecast Bias: -0.05838593795726235


#### 10. Forecast efficiency

In [12]:
import numpy as np
from sklearn.metrics import mean_squared_error


mse_forecast = mean_squared_error(y_test, y_pred)

# Calculate the mean of y_test to use as the benchmark prediction
mean_y_test = np.mean(y_test)
benchmark_predictions = np.full_like(y_test, mean_y_test)

mse_benchmark = mean_squared_error(y_test, benchmark_predictions)

forecast_efficiency = mse_benchmark / mse_forecast

print("Forecast Efficiency:", forecast_efficiency)


Forecast Efficiency: 35.686422817620766
