In [53]:
# model_pipeline.py

import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import joblib

# Load data
df = pd.read_csv("fleet_data.csv")


df.head()


Unnamed: 0,ExtractDate,OrganisationURI,OrganisationLabel,ServiceTypeURI,ServiceTypeLabel,Fleet,Vehicle,Fuel,Dist.Run,MPG,Type
0,11/03/2015,http://opendatacommunities.org/id/metropolitan...,Leeds,http://id.esd.org.uk/subject/7539,Fleet Management,84600,GULLY TANK,865.55,1216K*,5.05,D
1,11/03/2015,http://opendatacommunities.org/id/metropolitan...,Leeds,http://id.esd.org.uk/subject/7539,Fleet Management,86904,LOADING SH,292.87,1691M*,30.42,D
2,11/03/2015,http://opendatacommunities.org/id/metropolitan...,Leeds,http://id.esd.org.uk/subject/7539,Fleet Management,AY61ESG,SMALL VAN,864.35,8904M*,47.9,D
3,11/03/2015,http://opendatacommunities.org/id/metropolitan...,Leeds,http://id.esd.org.uk/subject/7539,Fleet Management,AY61ESN,SMALL VAN,482.71,14980M*,153.5,D
4,11/03/2015,http://opendatacommunities.org/id/metropolitan...,Leeds,http://id.esd.org.uk/subject/7539,Fleet Management,98052,MINIBUS,65.0,ONLY,ONE AUDIT,D


In [7]:
# Clean 'Dist.Run'
def extract_distance(value):
    try:
        return float(''.join(filter(str.isdigit, value)))
    except:
        return np.nan

df['Distance'] = df['Dist.Run'].apply(extract_distance)

In [9]:

# Clean 'MPG'
def clean_mpg(value):
    try:
        return float(value)
    except:
        return np.nan



In [17]:
df['Distance'] = df['Dist.Run'].apply(extract_distance)
df['MPG'] = df['MPG'].apply(clean_mpg)
df.dropna(subset=['Fuel', 'Distance', 'MPG'], inplace=True)
df['Type_encoded'] = df['Type'].astype('category').cat.codes
df['CO2_Emissions'] = df['Fuel'] * 2.68

In [19]:

# Features and target
X = df[['Fuel', 'Distance', 'MPG', 'Type_encoded']]
y = df['CO2_Emissions']

# Train/test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train model
model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Predict
y_pred = model.predict(X_test)



In [59]:
# Evaluation
mae = mean_absolute_error(y_test, y_pred)
rmse = np.sqrt(mean_squared_error(y_test, y_pred))
#percent_error = np.mean(np.abs((y_test - y_pred) / y_test)) * 100
r2 = r2_score(y_test, y_pred)

print(f"MAE: {mae:.2f}")
print(f"RMSE: {rmse:.2f}")
#print(f"Percent Error: {percent_error:.2f}%")
print(f"R² Score: {r2:.4f}")

MAE: 453.28
RMSE: 6732.70
R² Score: 0.7444


In [27]:

joblib.dump(model, "carbon_rf_model.pkl")
print("✅ Model trained and saved as carbon_rf_model.pkl")

✅ Model trained and saved as carbon_rf_model.pkl


In [45]:
# import streamlit as st
# import urllib
# import requests
# def get_route_alternatives(origin, destination, api_key):
#     if not api_key:
#         st.warning("❗ Google Maps API key is missing.")
#         return []

#     origin_encoded = urllib.parse.quote(origin)
#     destination_encoded = urllib.parse.quote(destination)

#     url = (
#         f"https://maps.googleapis.com/maps/api/directions/json"
#         f"?origin={origin_encoded}&destination={destination_encoded}&alternatives=true&key={api_key}"
#     )
#     r = requests.get(url)
#     routes = []

#     if r.status_code == 200:
#         data = r.json()
#         if data.get("status") != "OK":
#             st.error(f"Google Maps API Error: {data.get('status')} - {data.get('error_message', '')}")
#             return []
#         for route in data.get("routes", []):
#             leg = route["legs"][0]
#             routes.append({
#                 "summary": route.get("summary", "Unnamed"),
#                 "distance_km": leg["distance"]["value"] / 1000,
#                 "duration_min": leg["duration"]["value"] / 60,
#                 "start_location": leg["start_location"],
#                 "end_location": leg["end_location"]
#             })
#     else:
#         st.error(f"Google Maps API HTTP Error: {r.status_code} - {r.text}")
#     return routes

In [47]:
# origin = "Leeds, UK"
# destination = "Manchester, UK"
# api_key = "AIzaSyB3i8YnGjpF6O0Wt5N5HQ2bTAf_f6TluRI"
# print(get_route_alternatives(origin, destination, api_key))

In [49]:
# OPENWEATHER_API_KEY = "df013fcd7e7e4aee7ff6a234c6c81621"
# GOOGLE_MAPS_API_KEY = "AIzaSyB3i8YnGjpF6O0Wt5N5HQ2bTAf_f6TluRI"