In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

In [None]:
carPrice_dataset = pd.read_csv('/kaggle/input/vehicle-dataset-from-cardekho/car data.csv')

carPrice_dataset.head()

In [None]:
carPrice_dataset.info()

In [None]:
carPrice_dataset.describe(include= 'all')

In [None]:
carPrice_dataset['Car_Name'].sample(10)

In [None]:
carPrice_dataset.drop('Car_Name', inplace = True, axis = 1)

In [None]:
carPrice_dataset['Owner'] = carPrice_dataset['Owner'].astype('object')

In [None]:
carPrice_dataset['Owner'].value_counts()

In [None]:
def transform_year(x):
        if x < 2012:
            return "Old"
        elif x < 2016:
            return "moderate"
        return "new"

carPrice_dataset['year_cat'] = carPrice_dataset['Year'].apply(transform_year).astype('object')

In [None]:
fig, axs = plt.subplots(nrows = 2, ncols=2)

sns.countplot(x='Fuel_Type', data=carPrice_dataset, ax = axs[0,0])
sns.countplot(x='Seller_Type', data=carPrice_dataset, ax = axs[0,1])
sns.countplot(x='year_cat', data=carPrice_dataset, ax = axs[1,0])
sns.countplot(x='Transmission', data=carPrice_dataset, ax = axs[1,1])

In [None]:
carPrice_dataset.drop('Year', inplace = True, axis = 1)

In [None]:
categorical_cols = [col for col in carPrice_dataset.columns if carPrice_dataset[col].dtype == 'object']

categorical_cols

In [None]:
numerical_cols = [col for col in carPrice_dataset.columns if carPrice_dataset[col].dtype != 'object']

numerical_cols

In [None]:
X = carPrice_dataset.drop('Selling_Price', axis = 1)
y = carPrice_dataset['Selling_Price']

X.head()

In [None]:
x_cat = pd.get_dummies(X[categorical_cols])

X = pd.concat([X.drop(categorical_cols, axis = 1), x_cat], axis = 1)

In [None]:
X.head()

In [None]:
x_train, x_test, y_train, y_test = train_test_split(X, y, test_size = 0.1, random_state = 0)

In [None]:
sc = StandardScaler()
x_train = sc.fit_transform(x_train)
x_test = sc.transform(x_test)

In [None]:
from sklearn.linear_model import RANSACRegressor, ARDRegression, HuberRegressor, LinearRegression,SGDRegressor
from sklearn.svm import SVR
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import AdaBoostRegressor, GradientBoostingRegressor, BaggingRegressor, RandomForestRegressor

models = [
    RandomForestRegressor(),
    RANSACRegressor(),
    HuberRegressor(),
    LinearRegression(),
    SGDRegressor(),
    SVR(),
    DecisionTreeRegressor(),
    AdaBoostRegressor(),
    GradientBoostingRegressor(),
    BaggingRegressor(),
    RandomForestRegressor(n_estimators = 10),
]

In [None]:
from sklearn.metrics import mean_squared_error, mean_absolute_error

for regressor in models:
    print("\n********************************************")
    print(f'Regressor: {regressor.__class__.__name__}')
    
    regressor.fit(x_train, y_train)
    
    pred = regressor.predict(x_test)
    mse, mae = mean_squared_error(pred, y_test), mean_absolute_error(pred, y_test)
    print(f'Mean Squared Error: {mse}\tMean Absolute Error: {mae}\n')