In [1]:
import pandas as pd
import numpy as np


<h2 id="data">Data Information</h2>

### `FuelConsumption.csv`:
We have downloaded a fuel consumption dataset, **`FuelConsumption.csv`**, which contains model-specific fuel consumption ratings and estimated carbon dioxide emissions for new light-duty vehicles for retail sale in Canada. [Dataset source](http://open.canada.ca/data/en/dataset/98f1a129-f628-4ce4-b24d-6f16bf24dd64)

- **MODELYEAR** e.g. 2014
- **MAKE** e.g. Acura
- **MODEL** e.g. ILX
- **VEHICLE CLASS** e.g. SUV
- **ENGINE SIZE** e.g. 4.7
- **CYLINDERS** e.g 6
- **TRANSMISSION** e.g. A6
- **FUELTYPE** e.g. z
- **FUEL CONSUMPTION in CITY(L/100 km)** e.g. 9.9
- **FUEL CONSUMPTION in HWY (L/100 km)** e.g. 8.9
- **FUEL CONSUMPTION COMB (L/100 km)** e.g. 9.2
- **CO2 EMISSIONS (g/km)** e.g. 182   --> low --> 0


In [2]:
group = pd.read_csv("FuelConsumption.csv")

# take a look at the dataset
group

Unnamed: 0,MODELYEAR,MAKE,MODEL,VEHICLECLASS,ENGINESIZE,CYLINDERS,TRANSMISSION,FUELTYPE,FUELCONSUMPTION_CITY,FUELCONSUMPTION_HWY,FUELCONSUMPTION_COMB,FUELCONSUMPTION_COMB_MPG,CO2EMISSIONS
0,2014,ACURA,ILX,COMPACT,2.0,4,AS5,Z,9.9,6.7,8.5,33,196
1,2014,ACURA,ILX,COMPACT,2.4,4,M6,Z,11.2,7.7,9.6,29,221
2,2014,ACURA,ILX HYBRID,COMPACT,1.5,4,AV7,Z,6.0,5.8,5.9,48,136
3,2014,ACURA,MDX 4WD,SUV - SMALL,3.5,6,AS6,Z,12.7,9.1,11.1,25,255
4,2014,ACURA,RDX AWD,SUV - SMALL,3.5,6,AS6,Z,12.1,8.7,10.6,27,244
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1062,2014,VOLVO,XC60 AWD,SUV - SMALL,3.0,6,AS6,X,13.4,9.8,11.8,24,271
1063,2014,VOLVO,XC60 AWD,SUV - SMALL,3.2,6,AS6,X,13.2,9.5,11.5,25,264
1064,2014,VOLVO,XC70 AWD,SUV - SMALL,3.0,6,AS6,X,13.4,9.8,11.8,24,271
1065,2014,VOLVO,XC70 AWD,SUV - SMALL,3.2,6,AS6,X,12.9,9.3,11.3,25,260


In [3]:
group.describe()

Unnamed: 0,MODELYEAR,ENGINESIZE,CYLINDERS,FUELCONSUMPTION_CITY,FUELCONSUMPTION_HWY,FUELCONSUMPTION_COMB,FUELCONSUMPTION_COMB_MPG,CO2EMISSIONS
count,1067.0,1067.0,1067.0,1067.0,1067.0,1067.0,1067.0,1067.0
mean,2014.0,3.346298,5.794752,13.296532,9.474602,11.580881,26.441425,256.228679
std,0.0,1.415895,1.797447,4.101253,2.79451,3.485595,7.468702,63.372304
min,2014.0,1.0,3.0,4.6,4.9,4.7,11.0,108.0
25%,2014.0,2.0,4.0,10.25,7.5,9.0,21.0,207.0
50%,2014.0,3.4,6.0,12.6,8.8,10.9,26.0,251.0
75%,2014.0,4.3,8.0,15.55,10.85,13.35,31.0,294.0
max,2014.0,8.4,12.0,30.2,20.5,25.8,60.0,488.0


In [4]:
group.isnull().sum()

MODELYEAR                   0
MAKE                        0
MODEL                       0
VEHICLECLASS                0
ENGINESIZE                  0
CYLINDERS                   0
TRANSMISSION                0
FUELTYPE                    0
FUELCONSUMPTION_CITY        0
FUELCONSUMPTION_HWY         0
FUELCONSUMPTION_COMB        0
FUELCONSUMPTION_COMB_MPG    0
CO2EMISSIONS                0
dtype: int64

In [5]:
group1=group[["ENGINESIZE","CYLINDERS","FUELCONSUMPTION_CITY","FUELCONSUMPTION_HWY","FUELCONSUMPTION_COMB","FUELCONSUMPTION_COMB_MPG","CO2EMISSIONS"]]

In [6]:
group1

Unnamed: 0,ENGINESIZE,CYLINDERS,FUELCONSUMPTION_CITY,FUELCONSUMPTION_HWY,FUELCONSUMPTION_COMB,FUELCONSUMPTION_COMB_MPG,CO2EMISSIONS
0,2.0,4,9.9,6.7,8.5,33,196
1,2.4,4,11.2,7.7,9.6,29,221
2,1.5,4,6.0,5.8,5.9,48,136
3,3.5,6,12.7,9.1,11.1,25,255
4,3.5,6,12.1,8.7,10.6,27,244
...,...,...,...,...,...,...,...
1062,3.0,6,13.4,9.8,11.8,24,271
1063,3.2,6,13.2,9.5,11.5,25,264
1064,3.0,6,13.4,9.8,11.8,24,271
1065,3.2,6,12.9,9.3,11.3,25,260


In [7]:
group1.corr()

Unnamed: 0,ENGINESIZE,CYLINDERS,FUELCONSUMPTION_CITY,FUELCONSUMPTION_HWY,FUELCONSUMPTION_COMB,FUELCONSUMPTION_COMB_MPG,CO2EMISSIONS
ENGINESIZE,1.0,0.934011,0.832225,0.778746,0.819482,-0.808554,0.874154
CYLINDERS,0.934011,1.0,0.796473,0.724594,0.776788,-0.77043,0.849685
FUELCONSUMPTION_CITY,0.832225,0.796473,1.0,0.965718,0.995542,-0.935613,0.898039
FUELCONSUMPTION_HWY,0.778746,0.724594,0.965718,1.0,0.985804,-0.893809,0.861748
FUELCONSUMPTION_COMB,0.819482,0.776788,0.995542,0.985804,1.0,-0.927965,0.892129
FUELCONSUMPTION_COMB_MPG,-0.808554,-0.77043,-0.935613,-0.893809,-0.927965,1.0,-0.906394
CO2EMISSIONS,0.874154,0.849685,0.898039,0.861748,0.892129,-0.906394,1.0


In [8]:
msk = np.random.rand(len(group1)) < 0.8
train = group1[msk]
test = group1[~msk]

In [9]:
from sklearn.linear_model import LinearRegression
groupletu=LinearRegression()
x = np.asanyarray(train[['ENGINESIZE','CYLINDERS','FUELCONSUMPTION_CITY','FUELCONSUMPTION_HWY']])
y = np.asanyarray(train[['CO2EMISSIONS']])
groupletu.fit (x, y)

In [10]:
##deploying a model
import joblib 
joblib.dump(groupletu,"carbon_dioxide_rate.pkl")
groupletu_model=joblib.load("carbon_dioxide_rate.pkl")
groupletu_model

In [19]:
import numpy as np
import joblib
import pymongo

# Connect to MongoDB (adjust if needed for cloud MongoDB)
client = pymongo.MongoClient("mongodb://localhost:27017/")
db = client["co2_database"]
collection = db["co2_emissions"]

# Function to safely take numeric input
def masunga_input(prompt):
    while True:
        try:
            value = float(input(prompt))
            return value
        except ValueError:
            print("⚠️ Invalid input. Please enter a numeric value.")

# Function to predict and store CO2 emissions
def predict_co2_emissions():
    try:
        # Load trained model
        model = joblib.load("carbon_dioxide_rate.pkl")  # Ensure this file exists in your working directory

        print("\n🚗 Enter the following vehicle details to predict CO2 emissions:")

        # Collect user input
        engine_size = masunga_input("Engine Size (ENGINESIZE): ")
        cylinders = int(masunga_input("Number of Cylinders (CYLINDERS): "))
        fuel_consumption_city = masunga_input("Fuel Consumption in City (FUELCONSUMPTION_CITY): ")
        fuel_consumption_hwy = masunga_input("Fuel Consumption on Highway (FUELCONSUMPTION_HWY): ")

        # Prepare input for model
        user_input = np.array([[engine_size, cylinders, fuel_consumption_city, fuel_consumption_hwy]])

        # Predict CO2 emissions
        predicted_co2 = model.predict(user_input)[0].item()  # Extract float from numpy

        print(f"\n🌍 Estimated Carbon Dioxide Emission Rate: **{predicted_co2:.2f} g/km**")

        # Save result in MongoDB
        emission_record = {
            "engine_size": engine_size,
            "cylinders": cylinders,
            "fuel_consumption_city": fuel_consumption_city,
            "fuel_consumption_hwy": fuel_consumption_hwy,
            "predicted_co2": predicted_co2,
        }

        collection.insert_one(emission_record)
        print("✅ Prediction saved to MongoDB successfully!")

    except FileNotFoundError:
        print("❌ Error: The model file 'carbon_dioxide_rate.pkl' was not found.")
    except Exception as e:
        print(f"❌ An unexpected error occurred: {e}")

# Run the prediction function
if __name__ == "__main__":
    predict_co2_emissions()



🚗 Enter the following vehicle details to predict CO2 emissions:


Engine Size (ENGINESIZE):  12
Number of Cylinders (CYLINDERS):  3
Fuel Consumption in City (FUELCONSUMPTION_CITY):  4
Fuel Consumption on Highway (FUELCONSUMPTION_HWY):  5



🌍 Estimated Carbon Dioxide Emission Rate: **240.32 g/km**
✅ Prediction saved to MongoDB successfully!


In [12]:
#pip install flask

In [13]:
%pwd

'C:\\Users\\abel2\\Downloads\\WORK1\\multipleLinearRegression\\fuel_regression\\mbogo_co2'

In [14]:
#pip install pandas numpy scikit-learn joblib pymongo streamlit streamlit-jupyter

In [23]:
import streamlit as st
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import pymongo
import joblib

# ------------------ App Configuration ------------------ #
st.set_page_config(page_title="CO2 Emissions Explorer", layout="wide")
st.title("🌍 CO2 Emissions Analysis App")
st.markdown("Visualize and predict CO2 emissions based on vehicle fuel consumption.")

# ------------------ Load Data ------------------ #
@st.cache_data
def load_data():
    return pd.read_csv("FuelConsumption.csv")

df = load_data()

# ------------------ MongoDB Connection ------------------ #
try:
    client = pymongo.MongoClient("mongodb://localhost:27017/")
    db = client["co2_database"]
    collection = db["co2_emissions"]
except Exception as e:
    collection = None
    st.error(f"❌ Failed to connect to MongoDB: {e}")

# ------------------ Data Editor ------------------ #
st.subheader("📝 Edit/View Raw CO2 Emissions Data")
edited_df = st.data_editor(df)

# ------------------ Filtered Data Display ------------------ #
st.subheader("📊 CO2 Emissions Data")
st.dataframe(edited_df)

# ------------------ Bar Chart: Average CO2 Emissions ------------------ #
st.subheader("📉 Average CO2 Emissions")
avg_emissions = edited_df["CO2EMISSIONS"].mean()
st.metric(label="Average CO2 Emissions", value=f"{avg_emissions:.2f} Million Metric Tons")

# ------------------ CO2 Prediction Section ------------------ #
st.subheader("🤖 Predict CO2 Emissions from Vehicle Features")

with st.form("prediction_form"):
    engine_size = st.number_input("Engine Size (L)", 0.5, 10.0, 2.0, 0.1)
    cylinders = st.slider("Number of Cylinders", 2, 16, 4)
    fuel_city = st.number_input("Fuel Consumption - City (L/100km)", 1.0, 30.0, 10.0)
    fuel_hwy = st.number_input("Fuel Consumption - Highway (L/100km)", 1.0, 30.0, 7.0)
    submitted = st.form_submit_button("Predict CO2 Emission")

    if submitted:
        try:
            model = joblib.load("carbon_dioxide_rate.pkl")
            input_data = np.array([[engine_size, cylinders, fuel_city, fuel_hwy]])
            prediction = model.predict(input_data)[0].item()
            st.success(f"✅ Estimated CO2 Emission: **{prediction:.2f} g/km**")

            # Save to MongoDB
            if collection:
                record = {
                    "engine_size": engine_size,
                    "cylinders": cylinders,
                    "fuel_consumption_city": fuel_city,
                    "fuel_consumption_highway": fuel_hwy,
                    "predicted_co2": prediction,
                }
                collection.insert_one(record)
                st.info("📦 Prediction saved to MongoDB!")

        except FileNotFoundError:
            st.error("❌ Model file `carbon_dioxide_rate.pkl` not found.")
        except Exception as e:
            st.error(f"❌ Error during prediction: {e}")

# ------------------ Footer ------------------ #
st.markdown("---")
st.caption("Created by Abel Mbogo Masunga • Data Scientist 🌐")




# 🌍 CO2 Emissions Analysis App

Visualize and predict CO2 emissions based on vehicle fuel consumption.

2025-05-27 17:32:06.931 No runtime found, using MemoryCacheStorageManager


### 📝 Edit/View Raw CO2 Emissions Data



### 📊 CO2 Emissions Data

Unnamed: 0,MODELYEAR,MAKE,MODEL,VEHICLECLASS,ENGINESIZE,CYLINDERS,TRANSMISSION,FUELTYPE,FUELCONSUMPTION_CITY,FUELCONSUMPTION_HWY,FUELCONSUMPTION_COMB,FUELCONSUMPTION_COMB_MPG,CO2EMISSIONS
0,2014,ACURA,ILX,COMPACT,2.0,4,AS5,Z,9.9,6.7,8.5,33,196
1,2014,ACURA,ILX,COMPACT,2.4,4,M6,Z,11.2,7.7,9.6,29,221
2,2014,ACURA,ILX HYBRID,COMPACT,1.5,4,AV7,Z,6.0,5.8,5.9,48,136
3,2014,ACURA,MDX 4WD,SUV - SMALL,3.5,6,AS6,Z,12.7,9.1,11.1,25,255
4,2014,ACURA,RDX AWD,SUV - SMALL,3.5,6,AS6,Z,12.1,8.7,10.6,27,244
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1062,2014,VOLVO,XC60 AWD,SUV - SMALL,3.0,6,AS6,X,13.4,9.8,11.8,24,271
1063,2014,VOLVO,XC60 AWD,SUV - SMALL,3.2,6,AS6,X,13.2,9.5,11.5,25,264
1064,2014,VOLVO,XC70 AWD,SUV - SMALL,3.0,6,AS6,X,13.4,9.8,11.8,24,271
1065,2014,VOLVO,XC70 AWD,SUV - SMALL,3.2,6,AS6,X,12.9,9.3,11.3,25,260


### 📉 Average CO2 Emissions



### 🤖 Predict CO2 Emissions from Vehicle Features



---

> Created by Abel Mbogo Masunga • Data Scientist 🌐