In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error

# Load the dataset
url = "http://archive.ics.uci.edu/ml/machine-learning-databases/auto-mpg/auto-mpg.data"
columns = ["MPG", "Cylinders", "Displacement", "Horsepower", "Weight", "Acceleration", "Model Year", "Origin"]
df = pd.read_csv(url, delim_whitespace=True, names=columns, na_values='?')


In [None]:
df = pd.read_csv('your_dataset.csv', index_col=0)

In [9]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Float64Index: 398 entries, 18.0 to 31.0
Data columns (total 8 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   MPG           398 non-null    int64  
 1   Cylinders     398 non-null    float64
 2   Displacement  392 non-null    float64
 3   Horsepower    398 non-null    float64
 4   Weight        398 non-null    float64
 5   Acceleration  398 non-null    int64  
 6   Model Year    398 non-null    int64  
 7   Origin        398 non-null    object 
dtypes: float64(4), int64(3), object(1)
memory usage: 28.0+ KB


In [4]:
# Fill missing values
df['Horsepower'] = df['Horsepower'].fillna(df['Horsepower'].median())



In [17]:
Total = df.isnull().sum().sort_values(ascending=False)
Percent = (df.isnull().sum()*100/df.isnull().count()).sort_values(ascending=False)
missing_data = pd.concat([Total, Percent], axis = 1, keys = ['Total', 'Percentage of Missing Values'])
missing_data

Unnamed: 0,Total,Percentage of Missing Values
MPG,0,0.0
Cylinders,0,0.0
Displacement,0,0.0
Horsepower,0,0.0
Weight,0,0.0
Acceleration,0,0.0
Model Year,0,0.0
Origin,0,0.0


In [16]:
df['Displacement'] = df['Displacement'].fillna(df['Displacement'].mean())

In [18]:
Total = df.isnull().sum().sort_values(ascending=False)
Percent = (df.isnull().sum()*100/df.isnull().count()).sort_values(ascending=False)
missing_data = pd.concat([Total, Percent], axis = 1, keys = ['Total', 'Percentage of Missing Values'])
missing_data

Unnamed: 0,Total,Percentage of Missing Values
MPG,0,0.0
Cylinders,0,0.0
Displacement,0,0.0
Horsepower,0,0.0
Weight,0,0.0
Acceleration,0,0.0
Model Year,0,0.0
Origin,0,0.0


In [32]:
df

Unnamed: 0,MPG,Cylinders,Displacement,Horsepower,Weight,Acceleration,Model Year,Origin
18.0,8,307.0,130.0,3504.0,12.0,70,1,chevrolet chevelle malibu
15.0,8,350.0,165.0,3693.0,11.5,70,1,buick skylark 320
18.0,8,318.0,150.0,3436.0,11.0,70,1,plymouth satellite
16.0,8,304.0,150.0,3433.0,12.0,70,1,amc rebel sst
17.0,8,302.0,140.0,3449.0,10.5,70,1,ford torino
...,...,...,...,...,...,...,...,...
27.0,4,140.0,86.0,2790.0,15.6,82,1,ford mustang gl
44.0,4,97.0,52.0,2130.0,24.6,82,2,vw pickup
32.0,4,135.0,84.0,2295.0,11.6,82,1,dodge rampage
28.0,4,120.0,79.0,2625.0,18.6,82,1,ford ranger


In [19]:
# Split into features (X) and target (y)
X = df.drop(columns=['MPG','Origin'])
y = df['MPG']



In [20]:
X 

Unnamed: 0,Cylinders,Displacement,Horsepower,Weight,Acceleration,Model Year
18.0,307.0,130.0,3504.0,12.0,70,1
15.0,350.0,165.0,3693.0,11.5,70,1
18.0,318.0,150.0,3436.0,11.0,70,1
16.0,304.0,150.0,3433.0,12.0,70,1
17.0,302.0,140.0,3449.0,10.5,70,1
...,...,...,...,...,...,...
27.0,140.0,86.0,2790.0,15.6,82,1
44.0,97.0,52.0,2130.0,24.6,82,2
32.0,135.0,84.0,2295.0,11.6,82,1
28.0,120.0,79.0,2625.0,18.6,82,1


In [21]:
y

18.0    8
15.0    8
18.0    8
16.0    8
17.0    8
       ..
27.0    4
44.0    4
32.0    4
28.0    4
31.0    4
Name: MPG, Length: 398, dtype: int64

In [22]:
# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

# Model training
model = LinearRegression()
model.fit(X_train, y_train)




In [23]:
# Model prediction
y_pred = model.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
print(f"Model Mean Squared Error: {mse}")

Model Mean Squared Error: 0.26748907834267277


In [24]:
pip install flask

Note: you may need to restart the kernel to use updated packages.




In [26]:
pip install werkzeug==2.0.3

Collecting werkzeug==2.0.3
  Downloading Werkzeug-2.0.3-py3-none-any.whl (289 kB)
Installing collected packages: werkzeug
  Attempting uninstall: werkzeug
    Found existing installation: werkzeug 3.0.3
    Uninstalling werkzeug-3.0.3:
      Successfully uninstalled werkzeug-3.0.3
Successfully installed werkzeug-2.0.3
Note: you may need to restart the kernel to use updated packages.




In [27]:
from flask import Flask, request, jsonify
import numpy as np

app = Flask(__name__)

# Use the trained model
@app.route('/predict', methods=['POST'])
def predict_mpg():
    # Get the JSON data
    data = request.get_json()
    
    # Extract variables
    cylinders = data['Cylinders']
    displacement = data['Displacement']
    horsepower = data['Horsepower']
    weight = data['Weight']
    acceleration = data['Acceleration']
    model_year = data['Model Year']
    
    
    # Create feature vector for prediction
    features = np.array([[cylinders, displacement, horsepower, weight, acceleration, model_year, origin]])
    
    # Predict MPG
    prediction = model.predict(features)
    
    return jsonify({'MPG': prediction[0]})

if __name__ == '__main__':
    app.run(debug=True)

ImportError: cannot import name 'BaseResponse' from 'werkzeug.wrappers' (C:\Users\mayur\AppData\Roaming\Python\Python39\site-packages\werkzeug\wrappers\__init__.py)

In [30]:
import requests

url = 'http://127.0.0.1:5000/predict'
data = {
    'Cylinders': 4,
    'Displacement': 140,
    'Horsepower': 90,
    'Weight': 2400,
    'Acceleration': 19,
    'Model Year': 1972
    
}

response = requests.post(url, json=data)
print(response.json())

ConnectionError: HTTPConnectionPool(host='127.0.0.1', port=5000): Max retries exceeded with url: /predict (Caused by NewConnectionError('<urllib3.connection.HTTPConnection object at 0x000002E09768CD00>: Failed to establish a new connection: [WinError 10061] No connection could be made because the target machine actively refused it'))

In [33]:
import tkinter as tk
from tkinter import messagebox
import numpy as np

def predict_mpg():
    try:
        # Get values from the entry fields
        cylinders = float(cyl_entry.get())
        displacement = float(disp_entry.get())
        horsepower = float(hp_entry.get())
        weight = float(weight_entry.get())
        acceleration = float(accel_entry.get())
        model_year = float(year_entry.get())
        
        
        # Create feature vector for prediction
        features = np.array([[cylinders, displacement, horsepower, weight, acceleration, model_year]])
        
        # Predict MPG
        prediction = model.predict(features)
        messagebox.showinfo('Prediction', f'Predicted MPG: {prediction[0]:.2f}')
    except Exception as e:
        messagebox.showerror('Error', str(e))

# Tkinter GUI
root = tk.Tk()
root.title("MPG Predictor")

# Create labels and entry fields
tk.Label(root, text="Cylinders").grid(row=0)
tk.Label(root, text="Displacement").grid(row=1)
tk.Label(root, text="Horsepower").grid(row=2)
tk.Label(root, text="Weight").grid(row=3)
tk.Label(root, text="Acceleration").grid(row=4)
tk.Label(root, text="Model Year").grid(row=5)


cyl_entry = tk.Entry(root)
disp_entry = tk.Entry(root)
hp_entry = tk.Entry(root)
weight_entry = tk.Entry(root)
accel_entry = tk.Entry(root)
year_entry = tk.Entry(root)


cyl_entry.grid(row=0, column=1)
disp_entry.grid(row=1, column=1)
hp_entry.grid(row=2, column=1)
weight_entry.grid(row=3, column=1)
accel_entry.grid(row=4, column=1)
year_entry.grid(row=5, column=1)


# Predict button
predict_btn = tk.Button(root, text="Predict MPG", command=predict_mpg)
predict_btn.grid(row=7, column=1)

# Run the application
root.mainloop()

