In [1]:
import pandas as pd 
import numpy as np 
import matplotlib.pyplot as plt 
import seaborn as sns

In [2]:
df = pd.read_csv("laptop.csv")
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000 entries, 0 to 999
Data columns (total 7 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   Brand             1000 non-null   object 
 1   Processor_Speed   1000 non-null   float64
 2   RAM_Size          1000 non-null   int64  
 3   Storage_Capacity  1000 non-null   int64  
 4   Screen_Size       1000 non-null   float64
 5   Weight            1000 non-null   float64
 6   Price             1000 non-null   float64
dtypes: float64(4), int64(2), object(1)
memory usage: 54.8+ KB


### **Data Preparation**
1. Handle categorical columns
2. Split data into Features & Target
3. Split data into train & test

In [3]:
from sklearn.preprocessing import OneHotEncoder

# 1. Create Onehot Object and set the param
onehot = OneHotEncoder(sparse_output=False, drop='first')

# 2. fit and transform the brand column and save it in variable
# result : array of one hot
encoded_onehot = onehot.fit_transform(df[['Brand']]) # use 2 [] because it single column and will read it in series

# 3. Create column from one hot object
col_onehot = [col for col in onehot.get_feature_names_out(['Brand'])]

# Join the name column and the array in DF
df_onehot = pd.DataFrame(data=encoded_onehot, columns=col_onehot)

# Join the df and the onehot
df = pd.concat([df, df_onehot], axis=1)

# drop brand column
df = df.drop(columns='Brand')

In [4]:
#from sklearn.preprocessing import LabelEncoder

#labelcoder = LabelEncoder()

#df['Brand'] = labelcoder.fit_transform(df['Brand'])

#df.head()

In [5]:
X = df[['Processor_Speed','RAM_Size','Storage_Capacity']]
y = df['Price']

In [6]:
from sklearn.model_selection import train_test_split 

X_train, X_test, y_train, y_test = train_test_split(X,y, test_size=0.3)

### **Model Building**
1. Import Model
2. Hyperparameter tuning using gridsearch

In [7]:
from sklearn.ensemble import RandomForestRegressor

rfmodel = RandomForestRegressor()


In [8]:
# 1. import gridsearchcv
from sklearn.model_selection import GridSearchCV

# 2 Built option for hyperparameter tuning
parameter_gridfr = { 'max_depth': [2,5,10,15,20,25,50,70,100,120,150],
                'max_features': ["auto","log2"],
                'n_estimators': [2,4,6,8,10,12,25,30,50,80,100]}

# 3.make variable to combine lib gridsearchcv, tuning specifitaction and model
gridfr = GridSearchCV(rfmodel, parameter_gridfr)

In [9]:
#4. Fit the model into data
gridfr.fit(X_train, y_train)

605 fits failed out of a total of 1210.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
605 fits failed with the following error:
Traceback (most recent call last):
  File "c:\Users\LENOVO\anakonda\Lib\site-packages\sklearn\model_selection\_validation.py", line 888, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "c:\Users\LENOVO\anakonda\Lib\site-packages\sklearn\base.py", line 1466, in wrapper
    estimator._validate_params()
  File "c:\Users\LENOVO\anakonda\Lib\site-packages\sklearn\base.py", line 666, in _validate_params
    validate_parameter_constraints(
  File "c:\Users\LENOVO\anakonda\Lib\site-packages\sklearn\utils\_param_validation.py", line 95, in validate_parameter_constraints
    raise InvalidParam

In [10]:
# 5. See the best parameters

gridfr.best_params_

{'max_depth': 25, 'max_features': 'log2', 'n_estimators': 80}

In [11]:
# 6. Save the y_predict into variable

y_pred = gridfr.predict(X_test)

In [12]:
# 7. Evaluate the model

from sklearn.metrics import mean_absolute_error

mean_absolute_error(y_test, y_pred)

348.6762192062144

### **Export Model**

In [13]:
import joblib

joblib.dump(gridfr, "rf_model.pkl")

['rf_model.pkl']