In [35]:
import warnings
warnings.filterwarnings("ignore")
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score

In [36]:
df = pd.DataFrame(pd.read_csv('datasets/Fuel_Consumption.csv'))

In [37]:
df.head()

Unnamed: 0,Year,MAKE,MODEL,VEHICLE CLASS,ENGINE SIZE,CYLINDERS,TRANSMISSION,FUEL,FUEL CONSUMPTION,COEMISSIONS
0,2000,ACURA,1.6EL,COMPACT,1.6,4,A4,X,10.5,216
1,2000,ACURA,1.6EL,COMPACT,1.6,4,M5,X,9.8,205
2,2000,ACURA,3.2TL,MID-SIZE,3.2,6,AS5,Z,13.7,265
3,2000,ACURA,3.5RL,MID-SIZE,3.5,6,A4,Z,15.0,301
4,2000,ACURA,INTEGRA,SUBCOMPACT,1.8,4,A4,X,11.4,230


In [38]:
# identify categorical columns
categorical_columns = ['MAKE', 'MODEL', 'VEHICLE CLASS', 'TRANSMISSION', 'FUEL']

# Apply Label encoding

label_encoders = {}

for col in categorical_columns:
    le = LabelEncoder()
    df[col] = le.fit_transform(df[col])
    label_encoders[col] = le

In [39]:
df.head()

Unnamed: 0,Year,MAKE,MODEL,VEHICLE CLASS,ENGINE SIZE,CYLINDERS,TRANSMISSION,FUEL,FUEL CONSUMPTION,COEMISSIONS
0,2000,0,0,0,1.6,4,1,3,10.5,216
1,2000,0,0,0,1.6,4,6,3,9.8,205
2,2000,0,1,2,3.2,6,4,4,13.7,265
3,2000,0,2,2,3.5,6,1,4,15.0,301
4,2000,0,164,9,1.8,4,1,3,11.4,230


In [40]:
# Define features and target variable
x = df.drop(['FUEL CONSUMPTION'], axis=1)
y = df['FUEL CONSUMPTION']

In [41]:
# split the data into training and testing sets
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=100)

In [42]:
# train the model
model = LinearRegression()
model.fit(x_train, y_train)

In [43]:
# make predictions on the test set
y_pred = model.predict(x_test)

In [44]:
# evaluate the model
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
print(f"Mean Squared Error: {mse}")
print(f"R-squared: {r2}")

Mean Squared Error: 0.1391504021832355
R-squared: 0.9852186331063967


**Using Decision Tree Classifier**

In [45]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, classification_report

In [46]:
# Define features and target variable
y = df['FUEL']  
x = df.drop(['FUEL'], axis=1)  

In [47]:
# split the data into training and testing sets
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=100)  

In [48]:
# train the model
model = DecisionTreeClassifier(random_state=100)    
model.fit(x_train, y_train)

In [49]:
# make predictions on the test set
y_pred = model.predict(x_test)

In [50]:
# evaluate the model
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy}")
print("\nClassification Report:")
print(classification_report(y_test, y_pred))

Accuracy: 0.9296875

Classification Report:
              precision    recall  f1-score   support

           0       0.67      1.00      0.80         2
           3       0.96      0.95      0.95        99
           4       0.85      0.85      0.85        27

    accuracy                           0.93       128
   macro avg       0.83      0.93      0.87       128
weighted avg       0.93      0.93      0.93       128



In [51]:
# Trim spaces from column names
df.columns = df.columns.str.strip()

In [52]:
# Make a prediction for new data
new_data = pd.DataFrame({
    "Year": [2000],
    "MAKE": ["ACURA"],
    "MODEL": ["1.6EL"],
    "VEHICLE CLASS": ["COMPACT"],
    "ENGINE SIZE": [1.6],
    "CYLINDERS": [4],
    "TRANSMISSION": ["A4"],
    "FUEL CONSUMPTION": [10],
    "COEMISSIONS ": [216]
})

# Apply label encoding to the categorical columns
for col in categorical_columns:
    if col != "FUEL":  # Skip FUEL since it's not in our features anymore
        new_data[col] = label_encoders[col].transform(new_data[col])

# Make prediction
prediction = model.predict(new_data)
print(f"Predicted Fuel Type: {label_encoders['FUEL'].inverse_transform(prediction)[0]}")

Predicted Fuel Type: X


**Another Method**

In [53]:
x1 = df.drop(['COEMISSIONS'], axis=1)
y1 = pd.qcut(df['COEMISSIONS'], q=3, labels=['low', 'medium', 'high'])


In [54]:
# split the data into training and testing sets
x1_train, x1_test, y1_train, y1_test = train_test_split(x1, y1, test_size=0.2, random_state=100)


In [55]:
# train the model
model1 = DecisionTreeClassifier(random_state=100)
model1.fit(x1_train, y1_train)

In [56]:
# make predictions on the test set
y1_pred = model1.predict(x1_test)

In [57]:
# evaluate the model
accuracy = accuracy_score(y1_test, y1_pred)
print(f"Accuracy: {accuracy}")
print("\nClassification Report:")
print(classification_report(y1_test, y1_pred))

Accuracy: 0.984375

Classification Report:
              precision    recall  f1-score   support

        high       0.98      0.98      0.98        43
         low       1.00      1.00      1.00        50
      medium       0.97      0.97      0.97        35

    accuracy                           0.98       128
   macro avg       0.98      0.98      0.98       128
weighted avg       0.98      0.98      0.98       128



In [58]:
# To check the Label assigned
for col, le in label_encoders.items():
    print(f"Lables for {col}:")
    print(dict(zip(le.classes_, le.transform(le.classes_))))
    print()


Lables for MAKE:
{'ACURA': 0, 'AUDI': 1, 'BMW': 2, 'BUICK': 3, 'CADILLAC': 4, 'CHEVROLET': 5, 'CHRYSLER': 6, 'DAEWOO': 7, 'DODGE': 8, 'FERRARI': 9, 'FORD': 10, 'GMC': 11, 'HONDA': 12, 'HYUNDAI': 13, 'INFINITI': 14, 'ISUZU': 15, 'JAGUAR': 16, 'JEEP': 17, 'KIA': 18, 'LAND ROVER': 19, 'LEXUS': 20, 'LINCOLN': 21, 'MAZDA': 22, 'MERCEDES-BENZ': 23, 'NISSAN': 24, 'OLDSMOBILE': 25, 'PLYMOUTH': 26, 'PONTIAC': 27, 'PORSCHE': 28, 'SAAB': 29, 'SATURN': 30, 'SUBARU': 31, 'SUZUKI': 32, 'TOYOTA': 33, 'VOLKSWAGEN': 34, 'VOLVO': 35}

Lables for MODEL:
{'1.6EL': 0, '3.2TL': 1, '3.5RL': 2, '300M': 3, '323 CONVERTIBLE': 4, '323Ci': 5, '323i': 6, '328Ci': 7, '328i': 8, '360 MODENA F1': 9, '456 MGT': 10, '456 MGTA': 11, '4RUNNER 4X4': 12, '528i': 13, '528i TOURING': 14, '540i': 15, '540i TOURING': 16, '550 MARANELLO': 17, '626': 18, '740i': 19, '740iL': 20, '750iL': 21, '9-3 TURBO': 22, '9-5 TURBO': 23, '9-5 WAGON TURBO': 24, '911 CARRERA': 25, '911 CARRERA 4': 26, 'A4': 27, 'A4 QUATTRO': 28, 'A6': 29, 'A6 

In [59]:
def get_user_input():
    print("Enter the following details:")
    year = int(input("Enter the Year (Encoded): "))
    make = int(input("Enter the Make (Encoded): " ))
    model = int(input("Enter the Model (Encoded): "))
    vehicle_class = int(input("Enter the Vehicle Class (Encoded): "))
    engine_size = float(input("Enter the Engine Size (Encoded): "))
    cylinders = int(input("Enter the Cylinders (Encoded): "))
    transmission = int(input("Enter the Transmission (Encoded): "))
    fuel = float(input("Enter the Fuel  (Encoded): "))
    co2_emissions = float(input("Enter the CO2 Emissions (Encoded): "))
    # return year, make, model, vehicle_class, engine_size, cylinders, transmission, fuel_consumption, co2_emissions

# Convert to Numpy Array and reshape
    user_input_array = np.array([year, make, model, vehicle_class, engine_size, cylinders, transmission, fuel, co2_emissions]).reshape(1, -1)

    return user_input_array

#Get user input and make prediction
user_input = get_user_input()
prediction = model.predict(user_input)[0]
print(f"Predicted Fuel Consumption: {prediction:.2f} liters per 100 km")


Enter the following details:


Predicted Fuel Consumption: 3.00 liters per 100 km


In [60]:
for col, le in label_encoders.items():
    print(f"Lables for {col}: {list(le.classes_)}")

Lables for MAKE: ['ACURA', 'AUDI', 'BMW', 'BUICK', 'CADILLAC', 'CHEVROLET', 'CHRYSLER', 'DAEWOO', 'DODGE', 'FERRARI', 'FORD', 'GMC', 'HONDA', 'HYUNDAI', 'INFINITI', 'ISUZU', 'JAGUAR', 'JEEP', 'KIA', 'LAND ROVER', 'LEXUS', 'LINCOLN', 'MAZDA', 'MERCEDES-BENZ', 'NISSAN', 'OLDSMOBILE', 'PLYMOUTH', 'PONTIAC', 'PORSCHE', 'SAAB', 'SATURN', 'SUBARU', 'SUZUKI', 'TOYOTA', 'VOLKSWAGEN', 'VOLVO']
Lables for MODEL: ['1.6EL', '3.2TL', '3.5RL', '300M', '323 CONVERTIBLE', '323Ci', '323i', '328Ci', '328i', '360 MODENA F1', '456 MGT', '456 MGTA', '4RUNNER 4X4', '528i', '528i TOURING', '540i', '540i TOURING', '550 MARANELLO', '626', '740i', '740iL', '750iL', '9-3 TURBO', '9-5 TURBO', '9-5 WAGON TURBO', '911 CARRERA', '911 CARRERA 4', 'A4', 'A4 QUATTRO', 'A6', 'A6 AVANT QUATTRO', 'A6 QUATTRO', 'A8 QUATTRO', 'ACCENT', 'ACCORD', 'ACCORD DX', 'ACCORD EX/LX', 'ALERO', 'ALTIMA', 'ASTRO AWD CARGO', 'ASTRO AWD PASSENGER', 'ASTRO CARGO', 'ASTRO PASSENGER', 'AVALON', 'B3000', 'B4000', 'B4000 4X4', 'BLAZER', 'BLAZE

In [63]:
# identify categorical columns
categorical_columns = ['MAKE', 'MODEL', 'VEHICLE CLASS', 'TRANSMISSION', 'FUEL']

# DEcode only categorical values
decode_values = {
    col: label_encoders[col].inverse_transform([int(user_input [0, i])])[0]
    for i, col in enumerate(categorical_columns, start=1) # skip year (index 0)
}

# Print user inputs in original form
print("\nUser Inputs (Original Values):")
for col, val in decode_values.items():
    print(f"{col}: {val}")  

# Print predicted fuel consumption
print(f"\nPredicted Fuel Consumption: {prediction:.2f} liters per 100 km")
    


User Inputs (Original Values):
MAKE: AUDI
MODEL: 3.2TL
VEHICLE CLASS: FULL-SIZE
TRANSMISSION: A4
FUEL: Z

Predicted Fuel Consumption: 3.00 liters per 100 km
