In [1]:
import pandas as pd
import requests
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import Ridge
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.pipeline import Pipeline
import numpy as np

# Fetch data from URL
url = "https://cf-courses-data.s3.us.cloud-object-storage.appdomain.cloud/IBMSkillsNetwork-AI0271EN-SkillsNetwork/labs/v1/m3/data/used_car_price_analysis.csv"
# response = requests.get(url)
#
# if response.status_code != 200:
#     print("Failed to retrieve data. Status code:", response.status_code)
# else:
#     # Read CSV into DataFrame
df = pd.read_csv(url)

# Data Preparation
df = df.dropna(axis=0, how='any', inplace=False)
x = df[['year', 'mileage', 'tax', 'mpg', 'engineSize']]
y = df['price']

# Split the dataset (80% training, 20% testing)
X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)

# Polynomial Features (degree 2)
poly = PolynomialFeatures(degree=2, include_bias=False)
X_train_poly = poly.fit_transform(X_train)
X_test_poly = poly.transform(X_test)

# Define Ridge Regression Model with Grid Search over alpha values
param_grid = {'ridge__alpha': np.logspace(-4, 4, 100)}

ridge = Ridge(alpha=0.5)

# Pipeline to orchestrate polynomial transformation and regression
ridge_pipeline = Pipeline([
        ('poly', poly),
           ('ridge', ridge)
    ])

ridge_cv = GridSearchCV(
ridge_pipeline, param_grid=param_grid, cv=4, scoring='neg_mean_squared_error', verbose=1, n_jobs=-1
    )



# Fit on the training data
ridge_cv.fit(X_train_poly, y_train)

# Predict on the test set
y_pred = ridge_cv.predict(X_test_poly)

# Evaluation Metrics
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f"R^2 Score: {r2}")

print(f"Mean Squared Error (MSE): {mse}")

# Best Parameters
print(f"Best Alpha: {ridge_cv.best_params_['ridge__alpha']}")

Fitting 4 folds for each of 100 candidates, totalling 400 fits
R^2 Score: -4.557210025968131
Mean Squared Error (MSE): 125977939.51151864
Best Alpha: 10000.0
