# Polynomial Regression

In [1]:
# Imports
import pandas as pd
from data_cleaning import clean_car_data
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import PolynomialFeatures
from sklearn.metrics import mean_squared_error, r2_score

In [2]:
# Load the Data
cars_raw = pd.read_csv("USA_cars_datasets.csv")

In [4]:
# Clean the data
cars = clean_car_data(cars_raw)

In [5]:
# One-hot Encoding for categorical variables

# Determine categorical columns
category_columns = cars.select_dtypes('category').columns

# Drop categorical columns
cars = pd.get_dummies(cars, columns=category_columns, drop_first=True)

In [7]:
# Separate the features and target variable
x = cars.drop(columns='price')
y = cars['price']

In [8]:
# Standardize the numeric variables
scaler = MinMaxScaler()

# Identify numeric columns
number_columns = x.select_dtypes('number').columns

# Standardize numeric features
x[number_columns] = scaler.fit_transform(cars[number_columns])

In [9]:
# Partition the Data
x_train, x_test, y_train, y_test = train_test_split(x, y, train_size=0.8, random_state=42)

In [11]:
# Implement Polynomial Regression
poly = PolynomialFeatures(degree=2)

In [12]:
# Transform training and testing features
x_train_poly = poly.fit_transform(x_train)
x_test_poly = poly.transform(x_test)