In [None]:
from google.colab import drive

drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split

file_path = '/content/drive/My Drive/ToyotaCorolla.csv'

In [None]:
data = pd.read_csv(file_path)

X = data.drop(columns=['Price'])
y = data['Price']

X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.4, random_state=42)

print("Training set shape:", X_train.shape, y_train.shape)
print("Validation set shape:", X_val.shape, y_val.shape)


Training set shape: (861, 38) (861,)
Validation set shape: (575, 38) (575,)


A

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeRegressor
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder

X = data[['Age_08_04', 'KM', 'Fuel_Type', 'HP', 'Automatic', 'Doors', 'Quarterly_Tax',
          'Mfr_Guarantee', 'Guarantee_Period', 'Airco', 'Automatic_airco', 'CD_Player',
          'Powered_Windows', 'Sport_Model', 'Tow_Bar']]
y = data['Price']

X = pd.get_dummies(X, columns=['Fuel_Type'], drop_first=True)

X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.4, random_state=1)

tree_reg = DecisionTreeRegressor(random_state=1)

tree_reg.fit(X_train, y_train)

print("Regression Tree Model:", tree_reg)


Regression Tree Model: DecisionTreeRegressor(random_state=1)


In [None]:
importance = tree_reg.feature_importances_
feature_names = X.columns

feature_importance_df = pd.DataFrame({'Feature': feature_names, 'Importance': importance})

feature_importance_df = feature_importance_df.sort_values(by='Importance', ascending=False)

print("Top three or four most important car specifications:")
print(feature_importance_df.head(4))


Top three or four most important car specifications:
           Feature  Importance
0        Age_08_04    0.844867
2               HP    0.053789
1               KM    0.049601
9  Automatic_airco    0.013358


In [None]:
from sklearn.metrics import mean_squared_error

y_train_pred = tree_reg.predict(X_train)

y_val_pred = tree_reg.predict(X_val)

mse_train = mean_squared_error(y_train, y_train_pred)

mse_val = mean_squared_error(y_val, y_val_pred)

print("Mean Squared Error (Training Set):", mse_train)
print("Mean Squared Error (Validation Set):", mse_val)


Mean Squared Error (Training Set): 0.0
Mean Squared Error (Validation Set): 2227068.1339130434


In [None]:
from sklearn.model_selection import GridSearchCV
from sklearn.tree import DecisionTreeRegressor
from sklearn.metrics import mean_squared_error

param_grid = {
    'max_depth': [2, 4, 6, 8, 10],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4]
}

tree_reg = DecisionTreeRegressor(random_state=1)

grid_search = GridSearchCV(tree_reg, param_grid, cv=5, scoring='neg_mean_squared_error')
grid_search.fit(X_train, y_train)

best_tree_reg = grid_search.best_estimator_

y_pred_val = best_tree_reg.predict(X_val)

mse_val = mean_squared_error(y_val, y_pred_val)

print("Mean Squared Error on the Validation Set:", mse_val)


Mean Squared Error on the Validation Set: 1473298.779229101


B

In [None]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import train_test_split

X = data[['Age_08_04', 'KM', 'Fuel_Type', 'HP', 'Automatic', 'Doors', 'Quarterly_Tax',
          'Mfr_Guarantee', 'Guarantee_Period', 'Airco', 'Automatic_airco', 'CD_Player',
          'Powered_Windows', 'Sport_Model', 'Tow_Bar']]
y = data['Bin_Price']