## IMPORTS

In [39]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import RandomizedSearchCV
from sklearn.metrics import *
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC

## DATA ANALYTICS

In [41]:
data = pd.read_csv('mobile_price_classification.csv')
data.head()

Unnamed: 0,battery_power,blue,clock_speed,dual_sim,fc,four_g,int_memory,m_dep,mobile_wt,n_cores,...,px_height,px_width,ram,sc_h,sc_w,talk_time,three_g,touch_screen,wifi,price_range
0,842,0,2.2,0,1,0,7,0.6,188,2,...,20,756,2549,9,7,19,0,0,1,1
1,1021,1,0.5,1,0,1,53,0.7,136,3,...,905,1988,2631,17,3,7,1,1,0,2
2,563,1,0.5,1,2,1,41,0.9,145,5,...,1263,1716,2603,11,2,9,1,1,0,2
3,615,1,2.5,0,0,0,10,0.8,131,6,...,1216,1786,2769,16,8,11,1,0,0,2
4,1821,1,1.2,0,13,1,44,0.6,141,2,...,1208,1212,1411,8,2,15,1,1,0,1


In [42]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2000 entries, 0 to 1999
Data columns (total 21 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   battery_power  2000 non-null   int64  
 1   blue           2000 non-null   int64  
 2   clock_speed    2000 non-null   float64
 3   dual_sim       2000 non-null   int64  
 4   fc             2000 non-null   int64  
 5   four_g         2000 non-null   int64  
 6   int_memory     2000 non-null   int64  
 7   m_dep          2000 non-null   float64
 8   mobile_wt      2000 non-null   int64  
 9   n_cores        1841 non-null   object 
 10  pc             2000 non-null   int64  
 11  px_height      2000 non-null   int64  
 12  px_width       2000 non-null   int64  
 13  ram            2000 non-null   int64  
 14  sc_h           2000 non-null   int64  
 15  sc_w           2000 non-null   int64  
 16  talk_time      2000 non-null   int64  
 17  three_g        2000 non-null   int64  
 18  touch_sc

In [43]:
data.drop(columns='n_cores',inplace=True)

## TRAIN TEST SPLIT

In [44]:
x = data.iloc[:,:-1].values
y = data.iloc[:,-1].values

In [45]:
x_train,x_test,y_train,y_test = train_test_split(x,y,test_size=0.3,random_state=9)

## NORMALISATION

In [46]:
STD = StandardScaler()
x_train = STD.fit_transform(x_train)
x_test = STD.fit_transform(x_test)

## LOGISTIC REGRESSION

In [47]:
LR = LogisticRegression()
LR.fit(x_train,y_train)


LogisticRegression()

In [48]:
y_pred =LR.predict(x_test)
print(y_pred)

[1 0 1 3 2 2 0 0 2 3 1 0 1 0 2 1 2 2 0 3 3 1 0 2 2 2 1 3 0 1 1 1 0 1 0 1 3
 1 3 3 3 3 0 0 0 2 0 3 2 3 0 2 3 3 1 0 2 2 3 3 0 1 0 0 1 1 2 3 0 0 1 3 3 3
 0 1 3 2 1 0 2 0 0 1 3 0 2 0 3 0 1 1 0 3 2 3 3 1 3 3 1 1 1 0 0 3 2 3 1 2 3
 2 0 1 3 0 3 2 1 0 2 2 2 3 2 2 1 2 1 3 3 1 2 1 3 0 2 0 2 1 3 3 3 2 3 0 3 0
 2 2 0 3 3 1 0 1 2 3 3 3 2 1 0 3 3 2 2 1 0 1 1 2 2 1 3 1 3 3 2 2 0 0 0 2 2
 1 0 3 3 2 1 1 0 1 3 2 1 1 3 3 1 2 1 2 0 3 2 3 0 1 0 0 3 3 0 1 2 0 2 0 0 3
 3 2 0 3 0 0 0 3 1 1 3 1 2 2 1 1 3 1 0 2 0 0 1 1 3 3 0 1 1 3 1 3 1 2 3 2 1
 1 2 0 1 0 2 1 1 0 0 0 1 2 1 0 1 1 0 2 3 2 1 0 0 2 3 3 1 1 3 1 0 0 3 3 1 1
 1 0 0 3 0 2 0 3 0 2 0 1 3 0 0 1 2 3 1 2 0 1 3 1 2 3 2 3 1 1 2 2 1 2 2 3 1
 2 3 3 0 2 2 0 0 2 0 1 3 2 2 1 3 0 3 2 0 0 2 0 0 1 0 1 3 0 1 2 2 1 3 2 2 3
 2 2 1 0 3 1 0 3 1 0 3 2 0 0 2 1 2 2 2 3 1 0 0 2 1 3 3 1 1 2 2 0 3 0 1 3 0
 0 3 3 2 1 2 0 1 0 3 2 1 2 3 0 1 2 3 3 2 1 1 3 1 1 2 2 3 3 3 2 3 0 3 3 1 2
 1 2 0 2 0 2 3 1 3 0 1 3 1 3 0 3 0 3 1 0 0 2 2 1 0 1 1 0 3 1 0 2 0 1 3 3 3
 0 2 1 0 3 0 2 1 2 1 3 0 

In [49]:
LR_acc = accuracy_score(y_pred,y_test)*100
print(f"Accuracy Score for Logistic Regression: {LR_acc}")

Accuracy Score for Logistic Regression: 93.83333333333333


In [50]:
params = {'C':np.logspace(-5,5,7),
         'penalty':['l1','l2']}
rnd_search = RandomizedSearchCV(LR,params,cv=9)
rnd_search.fit(x_train,y_train)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

RandomizedSearchCV(cv=9, estimator=LogisticRegression(),
                   param_distributions={'C': array([1.00000000e-05, 4.64158883e-04, 2.15443469e-02, 1.00000000e+00,
       4.64158883e+01, 2.15443469e+03, 1.00000000e+05]),
                                        'penalty': ['l1', 'l2']})

In [51]:
print("Best Params:",rnd_search.best_params_)

Best Params: {'penalty': 'l2', 'C': 46.41588833612782}


## KNN

In [52]:
c = KNeighborsClassifier(n_neighbors=5)
c.fit(x_train,y_train)

KNeighborsClassifier()

In [53]:
y_pred2=c.predict(x_test)

In [54]:
print(classification_report(y_test,y_pred2))
print(confusion_matrix(y_test,y_pred2))

              precision    recall  f1-score   support

           0       0.59      0.65      0.62       152
           1       0.34      0.42      0.38       148
           2       0.37      0.36      0.36       149
           3       0.71      0.50      0.59       151

    accuracy                           0.48       600
   macro avg       0.50      0.48      0.49       600
weighted avg       0.51      0.48      0.49       600

[[99 49  3  1]
 [44 62 36  6]
 [23 49 54 23]
 [ 2 20 54 75]]


In [55]:
KN_acc=accuracy_score(y_pred2,y_test)*100
print(f"Accuracy Score for KNN Classifier : {KN_acc}")


Accuracy Score for KNN Classifier : 48.333333333333336


In [56]:
x_train_knn = data.iloc[:,:-1].values
y_train_knn = data.iloc[:,-1].values
x_tr,x_te,y_tr,y_te = train_test_split(x_train_knn,y_train_knn,random_state=9,test_size=0.3)

In [57]:
knn = KNeighborsClassifier(n_neighbors=1)
knn.fit(x_tr,y_tr)
y_pred = knn.predict(x_te)
accuracy_score(y_pred,y_te)

0.9033333333333333

In [58]:
kval_range = list(range(1,31))
params = dict(n_neighbors = kval_range)
grid = GridSearchCV(knn,params,cv=10,scoring='accuracy')
grid.fit(x_tr,y_tr)

GridSearchCV(cv=10, estimator=KNeighborsClassifier(n_neighbors=1),
             param_grid={'n_neighbors': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12,
                                         13, 14, 15, 16, 17, 18, 19, 20, 21, 22,
                                         23, 24, 25, 26, 27, 28, 29, 30]},
             scoring='accuracy')

In [59]:
print("Best Parameters :", grid.best_params_)

Best Parameters : {'n_neighbors': 11}


## NAIVE BAYES 

In [31]:
params_grid = {'var_smoothing':np.logspace(0,-9,num=100)}
grid = GridSearchCV(estimator=GaussianNB(),param_grid=params_grid,cv=10)
grid.fit(x_train,y_train)

GridSearchCV(cv=10, estimator=GaussianNB(),
             param_grid={'var_smoothing': array([1.00000000e+00, 8.11130831e-01, 6.57933225e-01, 5.33669923e-01,
       4.32876128e-01, 3.51119173e-01, 2.84803587e-01, 2.31012970e-01,
       1.87381742e-01, 1.51991108e-01, 1.23284674e-01, 1.00000000e-01,
       8.11130831e-02, 6.57933225e-02, 5.33669923e-02, 4.32876128e-02,
       3.51119173e-02, 2.84803587e-02, 2.3...
       1.23284674e-07, 1.00000000e-07, 8.11130831e-08, 6.57933225e-08,
       5.33669923e-08, 4.32876128e-08, 3.51119173e-08, 2.84803587e-08,
       2.31012970e-08, 1.87381742e-08, 1.51991108e-08, 1.23284674e-08,
       1.00000000e-08, 8.11130831e-09, 6.57933225e-09, 5.33669923e-09,
       4.32876128e-09, 3.51119173e-09, 2.84803587e-09, 2.31012970e-09,
       1.87381742e-09, 1.51991108e-09, 1.23284674e-09, 1.00000000e-09])})

In [32]:
print("Best Parameters : ",grid.best_params_)
print("Best Accuracy :",grid.best_score_)

Best Parameters :  {'var_smoothing': 0.1}
Best Accuracy : 0.8214285714285714


## DECISION TREE 

In [33]:
max_depth_range = np.arange(1, 21)
min_samples_leaf_range = [1, 5, 10, 20, 50, 100]

In [34]:
tree_param = [{'criterion': ['entropy', 'gini'], 'max_depth': max_depth_range},
              {'min_samples_leaf': min_samples_leaf_range}]

## Grid Search CV

In [64]:
grid = GridSearchCV(estimator=DecisionTreeClassifier(), param_grid=tree_param, cv=10)
grid.fit(x_train,y_train)

GridSearchCV(cv=10, estimator=DecisionTreeClassifier(),
             param_grid=[{'criterion': ['entropy', 'gini'],
                          'max_depth': array([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17,
       18, 19, 20])},
                         {'min_samples_leaf': [1, 5, 10, 20, 50, 100]}])

In [61]:
print(f"Best Parameters : {grid.best_params_}")
print(f"Best Accuracy : {grid.best_score_}")

Best Parameters : {'criterion': 'entropy', 'max_depth': 12}
Best Accuracy : 0.8528571428571429


## Randomized Search CV

In [62]:
r_grid = RandomizedSearchCV(estimator=DecisionTreeClassifier(), param_distributions=tree_param, cv=10)
r_grid.fit(x_train,y_train)

RandomizedSearchCV(cv=10, estimator=DecisionTreeClassifier(),
                   param_distributions=[{'criterion': ['entropy', 'gini'],
                                         'max_depth': array([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17,
       18, 19, 20])},
                                        {'min_samples_leaf': [1, 5, 10, 20, 50,
                                                              100]}])

In [63]:
print(f"Best Parameters : {r_grid.best_params_}")
print(f"Best Accuracy : {r_grid.best_score_}")

Best Parameters : {'max_depth': 19, 'criterion': 'entropy'}
Best Accuracy : 0.8442857142857143
