In [70]:
import pandas as pd
import numpy as np

from sklearn.preprocessing import OneHotEncoder, MinMaxScaler


from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor, AdaBoostRegressor
from xgboost import XGBRegressor


from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.model_selection import train_test_split

In [30]:
df = pd.read_csv('Computer_Data.csv')
df.head()

Unnamed: 0.1,Unnamed: 0,price,speed,hd,ram,screen,cd,multi,premium,ads,trend
0,1,1499,25,80,4,14,no,no,yes,94,1
1,2,1795,33,85,2,14,no,no,yes,94,1
2,3,1595,25,170,4,15,no,no,yes,94,1
3,4,1849,25,170,8,14,no,no,no,94,1
4,5,3295,33,340,16,14,no,no,yes,94,1


### Checking shapde of the data :

In [58]:
df.shape

(6259, 10)

There are :  
Rows : 6259  
Columns : 10

### Checking for Null values :

In [59]:
df.isnull().sum()

price      0
speed      0
hd         0
ram        0
screen     0
cd         0
multi      0
premium    0
ads        0
trend      0
dtype: int64

There is no null values in the daatset

#### Dropping the unnecessary column from the data:

In [31]:
df.drop('Unnamed: 0', axis = 'columns', inplace = True)

In [32]:
df.head()

Unnamed: 0,price,speed,hd,ram,screen,cd,multi,premium,ads,trend
0,1499,25,80,4,14,no,no,yes,94,1
1,1795,33,85,2,14,no,no,yes,94,1
2,1595,25,170,4,15,no,no,yes,94,1
3,1849,25,170,8,14,no,no,no,94,1
4,3295,33,340,16,14,no,no,yes,94,1


#### Splitting the data into dependent and independent variables:

In [33]:
x = df.drop('price', axis = 'columns')
y = df['price']

#### Seperating the categorical and Numerical Columns :

In [34]:
cat_col = x.select_dtypes(include = 'object')
num_col = x.select_dtypes(exclude = 'object')

In [35]:
cat_col.head()

Unnamed: 0,cd,multi,premium
0,no,no,yes
1,no,no,yes
2,no,no,yes
3,no,no,no
4,no,no,yes


In [36]:
num_col.head()

Unnamed: 0,speed,hd,ram,screen,ads,trend
0,25,80,4,14,94,1
1,33,85,2,14,94,1
2,25,170,4,15,94,1
3,25,170,8,14,94,1
4,33,340,16,14,94,1


#### OneHotEncoding for the Categorical Columns:

In [42]:
ohe = OneHotEncoder(drop = 'first')

In [43]:
data1 = ohe.fit_transform(cat_col)

In [44]:
data1.toarray()

array([[0., 0., 1.],
       [0., 0., 1.],
       [0., 0., 1.],
       ...,
       [1., 0., 1.],
       [1., 0., 1.],
       [1., 0., 1.]])

In [45]:
data1 = pd.DataFrame(data1.toarray(), columns = ohe.get_feature_names_out(cat_col.columns))

In [46]:
data1.head()

Unnamed: 0,cd_yes,multi_yes,premium_yes
0,0.0,0.0,1.0
1,0.0,0.0,1.0
2,0.0,0.0,1.0
3,0.0,0.0,0.0
4,0.0,0.0,1.0


#### MinMaxScaler for the Numerical columns:

In [47]:
scaller = MinMaxScaler()

In [49]:
data2 = scaller.fit_transform(num_col)

In [52]:
data2 = pd.DataFrame(data2, columns = scaller.get_feature_names_out(num_col.columns))

In [53]:
data2.head()

Unnamed: 0,speed,hd,ram,screen,ads,trend
0,0.0,0.0,0.066667,0.0,0.183333,0.0
1,0.106667,0.002475,0.0,0.0,0.183333,0.0
2,0.0,0.044554,0.066667,0.333333,0.183333,0.0
3,0.0,0.044554,0.2,0.0,0.183333,0.0
4,0.106667,0.128713,0.466667,0.0,0.183333,0.0


In [56]:
X = pd.concat([data1, data2], axis = 'columns')

In [57]:
X.head()

Unnamed: 0,cd_yes,multi_yes,premium_yes,speed,hd,ram,screen,ads,trend
0,0.0,0.0,1.0,0.0,0.0,0.066667,0.0,0.183333,0.0
1,0.0,0.0,1.0,0.106667,0.002475,0.0,0.0,0.183333,0.0
2,0.0,0.0,1.0,0.0,0.044554,0.066667,0.333333,0.183333,0.0
3,0.0,0.0,0.0,0.0,0.044554,0.2,0.0,0.183333,0.0
4,0.0,0.0,1.0,0.106667,0.128713,0.466667,0.0,0.183333,0.0


In [69]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.25)

In [71]:
def eval_model(true, predicted):
    mse = mean_squared_error(true, predicted)
    rmse = np.sqrt(mean_squared_error(true , predicted))
    mae = mean_absolute_error(true, predicted)
    r2_scr = r2_score(true, predicted)
    return mse, rmse, mae, r2_scr 

In [83]:
models = {
    'Linear Regression':LinearRegression(),
    'Decision Tree Regressor': DecisionTreeRegressor(),
    'Random Forest Regressor': RandomForestRegressor(),
    'Ada Boost Regressor': AdaBoostRegressor(),
    'XGB Regressor': XGBRegressor()
}

mdl = []
scr = []

for i in range(len(list(models.keys()))):
    model = list(models.values())[i]
    model.fit(X_train,y_train)

    # Recording prediction of the model
    pred = model.predict(X_test)

    # Capturing all evalution score of Model:
    mse, rmse, mae, r2_scr = eval_model(y_test, pred)

    print('-------------------------------------------------------------------------\n \n')
    mdl.append(list(models.keys())[i])
    scr.append(rmse)
    print('{} Model has :'.format(list(models.keys())[i]))
    print('Root Mean Squared Error : {}'.format(rmse))
    print('Mean Squared Error : {}'.format(mse))
    print('Mean Absolute Error : {}'.format(mae))
    print('R2 Score : {}'.format(r2_scr))

    print('-------------------------------------------------------------------------\n \n')


-------------------------------------------------------------------------
 

Linear Regression Model has :
Root Mean Squared Error : 278.4351667647062
Mean Squared Error : 77526.14209128976
Mean Absolute Error : 205.1612139787958
R2 Score : 0.7740079554773142
-------------------------------------------------------------------------
 

-------------------------------------------------------------------------
 

Decision Tree Regressor Model has :
Root Mean Squared Error : 185.92236953926243
Mean Squared Error : 34567.127495094064
Mean Absolute Error : 132.66213525026623
R2 Score : 0.8992353339768426
-------------------------------------------------------------------------
 

-------------------------------------------------------------------------
 

Random Forest Regressor Model has :
Root Mean Squared Error : 163.41168895942621
Mean Squared Error : 26703.380088572256
Mean Absolute Error : 121.20267031382295
R2 Score : 0.9221584964878468
------------------------------------------------

In [84]:
mdl

['Linear Regression',
 'Decision Tree Regressor',
 'Random Forest Regressor',
 'Ada Boost Regressor',
 'XGB Regressor']

In [85]:
scr

[278.4351667647062,
 185.92236953926243,
 163.41168895942621,
 315.8783724102738,
 144.5861547125903]

In [90]:
a = list(zip(mdl, scr))

In [99]:
model_report = pd.DataFrame(a, columns = ['Model','Root Mean Squared Error']).sort_values(by = 'Root Mean Squared Error',ascending=True)

In [100]:
model_report

Unnamed: 0,Model,Root Mean Squared Error
4,XGB Regressor,144.586155
2,Random Forest Regressor,163.411689
1,Decision Tree Regressor,185.92237
0,Linear Regression,278.435167
3,Ada Boost Regressor,315.878372


Based On RMSE :  
    XGB Regressor model has best fit.  
    Random Forest Regression model also has good fit.  
    Ada Boost Regressor model has the worst fit.