In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

# Load the dataset
df = pd.read_csv("C:\\Users\\LENOVO\\Downloads\\CarPrice_Assignment.csv")

# Explore the dataset
print("Dimensions:", df.shape)
print("Data Types:\n", df.dtypes)
print("Summary Statistics:\n", df.describe())
print("Missing Values:\n", df.isnull().sum())



# Drop the car_ID column as it's not a feature for prediction
df.drop(['car_ID'], axis=1, inplace=True)

# Feature scaling using Min-Max scaling
scaler = MinMaxScaler()
numeric_features = df.select_dtypes(include=[np.number]).columns.tolist()
numeric_features.remove('price')  # price is the target variable
df[numeric_features] = scaler.fit_transform(df[numeric_features])

# Identify categorical features
categorical_features = df.select_dtypes(include=[object]).columns.tolist()

# Perform one-hot encoding
df = pd.get_dummies(df, columns=categorical_features, drop_first=True)

# Separate target variable 'price' from the features
X = df.drop('price', axis=1)
y = df['price']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create an instance of the Linear Regression model
model = LinearRegression()

# Train the model
model.fit(X_train, y_train)

# Make predictions on the testing data
y_pred = model.predict(X_test)

# Evaluate the model's performance
mae = mean_absolute_error(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print("Mean Absolute Error (MAE):", mae)
print("Mean Squared Error (MSE):", mse)
print("R-squared Score:", r2)

# Example: Predict the price of a car with specific feature values
# Define the new car data as a list
new_car = [
    3,      # symboling
    88.6,   # wheelbase
    168.8,  # carlength
    64.1,   # carwidth
    48.8,   # carheight
    2548,   # curbweight
    130,    # enginesize
    3.47,   # boreratio
    2.68,   # stroke
    9.00,   # compressionratio
    111,    # horsepower
    5000,   # peakrpm
    21,     # citympg
    27,     # highwaympg
    1,      # fueltype_gas
    0,      # aspiration_turbo
    1,      # doornumber_two
    0,      # carbody_wagon
    1,      # drivewheel_fwd
    0       # enginelocation_rear
]

# List of feature names in the same order as new_car list
feature_names = [
    'symboling', 'wheelbase', 'carlength', 'carwidth', 'carheight',
    'curbweight', 'enginesize', 'boreratio', 'stroke', 'compressionratio',
    'horsepower', 'peakrpm', 'citympg', 'highwaympg', 'fueltype_gas',
    'aspiration_turbo', 'doornumber_two', 'carbody_wagon', 'drivewheel_fwd',
    'enginelocation_rear'
]

# Convert the new_car list to a DataFrame
new_car_df = pd.DataFrame([new_car], columns=feature_names)

# Make sure new_car_df has the same columns as X_train
for col in X_train.columns:
    if col not in new_car_df.columns:
        new_car_df[col] = 0  # Add missing columns with default value 0

# Ensure the columns are in the same order as the training data
new_car_df = new_car_df[X_train.columns]

# Apply the same scaling to the numeric features
new_car_df[numeric_features] = scaler.transform(new_car_df[numeric_features])

# Predict the price for the new car
new_car_price_pred = model.predict(new_car_df)

# Convert the prediction to a list
new_car_price_pred_list = new_car_price_pred.tolist()

print("Predicted Price for the new car:", new_car_price_pred_list[0])


Dimensions: (205, 26)
Data Types:
 car_ID                int64
symboling             int64
CarName              object
fueltype             object
aspiration           object
doornumber           object
carbody              object
drivewheel           object
enginelocation       object
wheelbase           float64
carlength           float64
carwidth            float64
carheight           float64
curbweight            int64
enginetype           object
cylindernumber       object
enginesize            int64
fuelsystem           object
boreratio           float64
stroke              float64
compressionratio    float64
horsepower            int64
peakrpm               int64
citympg               int64
highwaympg            int64
price               float64
dtype: object
Summary Statistics:
            car_ID   symboling   wheelbase   carlength    carwidth   carheight  \
count  205.000000  205.000000  205.000000  205.000000  205.000000  205.000000   
mean   103.000000    0.834146   98.75658

  new_car_df[col] = 0  # Add missing columns with default value 0
  new_car_df[col] = 0  # Add missing columns with default value 0
  new_car_df[col] = 0  # Add missing columns with default value 0
  new_car_df[col] = 0  # Add missing columns with default value 0
  new_car_df[col] = 0  # Add missing columns with default value 0
  new_car_df[col] = 0  # Add missing columns with default value 0
  new_car_df[col] = 0  # Add missing columns with default value 0
  new_car_df[col] = 0  # Add missing columns with default value 0
  new_car_df[col] = 0  # Add missing columns with default value 0
  new_car_df[col] = 0  # Add missing columns with default value 0
  new_car_df[col] = 0  # Add missing columns with default value 0
  new_car_df[col] = 0  # Add missing columns with default value 0
  new_car_df[col] = 0  # Add missing columns with default value 0
  new_car_df[col] = 0  # Add missing columns with default value 0
  new_car_df[col] = 0  # Add missing columns with default value 0
  new_car_