In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression, Lasso
from sklearn.metrics import mean_squared_error, r2_score

## Step 1: Load the Dataset

In [2]:
data = pd.read_csv(r'C:\Users\musta\Downloads\datasets\car_age_price.csv')

## Step 2: Explore and Preprocess the Data

In [3]:
data.head()

Unnamed: 0,Year,Price
0,2018,465000
1,2019,755000
2,2019,700000
3,2018,465000
4,2018,465000


In [4]:
data.isnull().sum()

Year     0
Price    0
dtype: int64

In [5]:
X = data['Year'].values.reshape(-1, 1)
y = data['Price'].values


## Step 3: Split the Data into Training and Testing Sets

In [7]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

## Step 4: Train the Linear Regression Model

In [8]:
linear_model = LinearRegression()
linear_model.fit(X_train, y_train)

LinearRegression()

## Step 5: Train the Lasso Regression Model

In [9]:
lasso_model = Lasso(alpha=0.1)
lasso_model.fit(X_train, y_train)

Lasso(alpha=0.1)

## Step 6: Evaluate Model Performance

In [10]:
linear_pred = linear_model.predict(X_test)
lasso_pred = lasso_model.predict(X_test)

linear_mse = mean_squared_error(y_test, linear_pred)
linear_r2 = r2_score(y_test, linear_pred)

lasso_mse = mean_squared_error(y_test, lasso_pred)
lasso_r2 = r2_score(y_test, lasso_pred)

print("Linear Regression:")
print("MSE:", linear_mse)
print("R-squared:", linear_r2)

print("\nLasso Regression:")
print("MSE:", lasso_mse)
print("R-squared:", lasso_r2)

Linear Regression:
MSE: 4326906256.829671
R-squared: 0.36759313425902185

Lasso Regression:
MSE: 4326901608.19506
R-squared: 0.36759381368868127


## Step 7: Predict the Price for a 2022 Model

In [11]:
if linear_r2 > lasso_r2:
    chosen_model = linear_model
else:
    chosen_model = lasso_model

In [12]:
year_2022 = [[2022]]
predicted_price = chosen_model.predict(year_2022)
print("Predicted price for a 2022 model:", predicted_price)

Predicted price for a 2022 model: [743601.39177768]
