# CO2 Emissions Prediction Model
This project analyzes vehicle characteristics such as engine size, cylinder count, and fuel consumption to predict CO2 emissions.

## Import Libraries

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

## Data Loading and Exploration

In [None]:
df = pd.read_csv('FuelConsumption.csv')
df.head()

## Data Preparation

In [None]:
# Select features for CO2 emissions prediction
cdf = df[['ENGINESIZE','CYLINDERS','FUELCONSUMPTION_COMB','CO2EMISSIONS']]
cdf.head()

 ## Exploratory Data Analysis 

In [None]:

x1 = cdf.FUELCONSUMPTION_COMB
x2 = cdf.ENGINESIZE
x3 = cdf.CYLINDERS
y = cdf.CO2EMISSIONS

plt.scatter(x1, y)
plt.xlabel('Fuel Consumption')
plt.ylabel('CO2 Emissions')
plt.show()

plt.scatter(x2, y)
plt.xlabel('Engine Size')
plt.ylabel('CO2 Emissions')

plt.show()

plt.scatter(x3, y)
plt.xlabel('Cylinders')
plt.ylabel('CO2 Emissions')

plt.show()

## Data Split

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
# Prepare features (X) and target (y)
X = cdf[['ENGINESIZE', 'CYLINDERS', 'FUELCONSUMPTION_COMB']]
y = cdf['CO2EMISSIONS']

# Split the data (80% training, 20% testing)
x_train, x_test, y_train, y_test = train_test_split(
    X, y, 
    test_size=0.2, 
    random_state=42  # Set seed for reproducibility
)

## Training the Model
We are using a multiple regression model.

In [None]:
from sklearn import linear_model
regr = linear_model.LinearRegression()
regr.fit(x_train, y_train)

print("Coeffs:", regr.coef_)
print("intercept", regr.intercept_)

## Model Evaluation

In [None]:
from sklearn.metrics import  r2_score
y_test_hat = regr.predict(x_test)

# Calculate R² score
r2 = r2_score(y_test, y_test_hat)

# Calculate Mean Squared Error
mse = np.mean((y_test - y_test_hat) ** 2)
rmse = np.sqrt(mse)

# Calculate Mean Absolute Error
mae = np.mean(np.abs(y_test - y_test_hat))


print("MODEL PERFORMANCE METRICS:")
print(f"R² Score: {r2:.6f}")
print(f"Mean Squared Error (MSE): {mse:.6f}")
print(f"Root Mean Squared Error (RMSE): {rmse:.6f}")
print(f"Mean Absolute Error (MAE): {mae:.6f}")

# Results Interpretation:

R² Score of 0.8760: The model explains approximately 87.60% of the variance in GDPs.

Good model performance: R² > 0.8 indicates a very good predictive model


# Example:
Assume a vehicle with following features:
Engine Size = 4,
Number of Cylinders = 4,
Fuel Consumption = 8

In [None]:
while True:
    try:
        exmp_eng_size = float(input('Enter Engine Size: '))
        if 1<= exmp_eng_size <=8:
            break
        else:
            print('Engine Size must be between 1 and 8')
        
    except ValueError:
        print("Please enter a valid number (e.g., 5.5).")

while True:
    try:
        exmp_cyl = int(input('Enter number of cylinders: '))
        
        if 1<= exmp_eng_size <=12:
            
            break
        else:
            print('Engine Size must be between 1 and 12')

    except ValueError:
        print("Please enter a whole number (e.g., 2).")
        
while True:
    try:
        exmp_fuel_cons = float(input('Fuel Consumption: '))
        if 4 <= exmp_fuel_cons :
            break
        else:
            print("Fuel Consumption must be bigger than 4")
    except ValueError:
        print("Please enter a whole number (e.g., 12.5).")

# Prepare input
x_exmp = [[exmp_eng_size, exmp_cyl, exmp_fuel_cons]]


# Make prediction
y_exmp = regr.predict(x_exmp)

# Format output nicely
print(f'\nPrediction Results:')
print(f'Estimated Emission: {y_exmp[0]:,.2f}')