**CLASSIFYING MOBILE PRICE USING VARIOUS CLASSIFICATION MODLES**

**DATASET DESCRIPTION**
* battery_power: Total energy a battery can store in one time measured in mAh
* blue: Has bluetooth or not
* clock_speed: speed at which microprocessor executes instructions
* dual_sim: Has dual sim support or not
* fc: Front Camera mega pixels
* four_g: Has 4G or not
* int_memory: Internal Memory in Gigabytes
* m_dep: Mobile Depth in cm
* mobile_wt: Weight of mobile phone
* n_cores: Number of cores of processor
* pc: Primary Camera mega pixels
* px_height: Pixel Resolution Height
* px_width: Pixel Resolution Width
* ram: Random Access Memory in Mega Byte
* sc_h: Screen Height of mobile in cm
* sc_w: Screen Width of mobile in cm
* talk_time: longest time that a single battery charge will last when you are
* three_g: Has 3G or not
* touch_screen: Has touch screen or not
* wifi: Has wifi or not
* price_range: This is the target variable with value of 0(low cost), 1(medium cost), 2(high cost) and 3(very high cost).

In [None]:
#importing libraries 
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
#importing dataset
pd.set_option('max_columns',None)
ds = pd.read_csv('../input/mobile-price-classification/train.csv')
ds.head()

In [None]:
#3000 columns and 21 rows
ds.shape

In [None]:
ds.info()

In [None]:
ds.isnull().any()

**NO NULL VALUES**

In [None]:
ds.columns

In [None]:
plt.figure(figsize=(12,10))
col = ['blue','dual_sim','four_g','three_g','touch_screen','price_range']
c = 1
for i in col:
    if c < 7:
        plt.subplot(2,3,c)
        sns.countplot(x = i,data =ds)
        plt.xlabel(i)
    c += 1
    plt.tight_layout()

In [None]:
plt.figure(figsize=(12,10))
sns.relplot(x = 'price_range',y = 'ram',data = ds,kind = 'line')

**MORE THE RAM MEANS MORE THE PRICE**

In [None]:
plt.figure(figsize=(12,10))
sns.relplot(x = 'price_range',y = 'battery_power',data = ds,kind = 'line')

In [None]:
plt.figure(figsize=(12,10))
sns.relplot(x = 'price_range',y = 'int_memory',data = ds,kind = 'line')

In [None]:
plt.figure(figsize=(10,8))
sns.stripplot(y = 'ram' , x = 'price_range', data = ds,jitter = True)

In [None]:
fig = plt.figure(figsize=(15,12))
sns.heatmap(ds.corr())

**NO SIGNIFICANT CORRELATION**

In [None]:
#defining dependent and independent variables
x = ds.drop('price_range', axis=1)
y = ds['price_range']

In [None]:
#splitting data into training and testing set
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.2, random_state = 0)

**APPLYING MODELS**

In [None]:
#Logistic Regression
#training model
from sklearn.linear_model import LogisticRegression
lr = LogisticRegression()
lr.fit(x_train,y_train)

#getting confusion matrix
from sklearn.metrics import confusion_matrix
y_pred = lr.predict(x_test)
cm = confusion_matrix(y_test,y_pred)
print('confusion matrix:\n',cm)

#checking accuracy
from sklearn.metrics import accuracy_score
lra = accuracy_score(y_test,y_pred)
print('accuracy score = ',lra)

In [None]:
#KNN
#training model
from sklearn.neighbors import KNeighborsClassifier
knn = KNeighborsClassifier(n_neighbors = 5, metric = 'minkowski',p = 2)
knn.fit(x_train,y_train)

#getting confusion matrix
from sklearn.metrics import confusion_matrix
y_pred = knn.predict(x_test)
cm = confusion_matrix(y_test,y_pred)
print('confusion matrix:\n',cm)

#checking accuracy
from sklearn.metrics import accuracy_score
knna = accuracy_score(y_test,y_pred)
print('accuracy score = ',accuracy_score(y_test,y_pred))

In [None]:
#SVM
#training model
from sklearn.svm import SVC
svc = SVC(kernel = 'linear',random_state = 0)
svc.fit(x_train,y_train)

#getting confusion matrix
from sklearn.metrics import confusion_matrix
y_pred = svc.predict(x_test)
cm = confusion_matrix(y_test,y_pred)
print('confusion matrix:\n',cm)

#checking accuracy
from sklearn.metrics import accuracy_score
sva =accuracy_score(y_test,y_pred)
print('accuracy score = ',accuracy_score(y_test,y_pred))

In [None]:
#Kernel SVM
#training model
from sklearn.svm import SVC
svc = SVC(kernel = 'rbf', random_state = 0)
svc.fit(x_train,y_train)

#getting confusion matrix
from sklearn.metrics import confusion_matrix
y_pred = svc.predict(x_test)
cm = confusion_matrix(y_test,y_pred)
print('confusion matrix:\n',cm)

#checking accuracy
from sklearn.metrics import accuracy_score
sva2 = accuracy_score(y_test,y_pred)
print('accuracy score = ',accuracy_score(y_test,y_pred))

In [None]:
#applying hyperparameter tuning on SVM
from sklearn.model_selection import GridSearchCV
parameters = [{'C': [0.25, 0.5, 0.75, 1], 'kernel': ['linear']},
              {'C': [0.25, 0.5, 0.75, 1], 'kernel': ['rbf'], 'gamma': [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]}]
grid_search = GridSearchCV(estimator = svc,
                           param_grid = parameters,
                           scoring = 'accuracy',
                           cv = 10,
                           n_jobs = -1)
grid_search.fit(x_train, y_train)
best_accuracy = grid_search.best_score_
best_parameters = grid_search.best_params_
print("Best Accuracy: {:.2f} %".format(best_accuracy*100))
print("Best Parameters:", best_parameters)

In [None]:
#Naive Bayes
#training model
from sklearn.naive_bayes import GaussianNB
nb = GaussianNB()
nb.fit(x_train,y_train)

#getting confusion matrix
from sklearn.metrics import confusion_matrix
y_pred = nb.predict(x_test)
cm = confusion_matrix(y_test,y_pred)
print('confusion matrix:\n',cm)

#checking accuracy
from sklearn.metrics import accuracy_score
nba = accuracy_score(y_test,y_pred)
print('accuracy score = ',accuracy_score(y_test,y_pred))

In [None]:
#Decision Tree
#training model
from sklearn.tree import DecisionTreeClassifier
dt = DecisionTreeClassifier(criterion = 'entropy')
dt.fit(x_train,y_train)

#getting confusion matrix
from sklearn.metrics import confusion_matrix
y_pred = dt.predict(x_test)
cm = confusion_matrix(y_test,y_pred)
print('confusion matrix:\n',cm)

#checking accuracy
from sklearn.metrics import accuracy_score
dta = accuracy_score(y_test,y_pred)
print('accuracy score = ',accuracy_score(y_test,y_pred))

In [None]:
#Random Forest
#training model
from sklearn.ensemble import RandomForestClassifier
rf = RandomForestClassifier(n_estimators = 140, criterion = 'entropy',random_state = 0)
rf.fit(x_train,y_train)

#getting confusion matrix
from sklearn.metrics import confusion_matrix
y_pred = rf.predict(x_test)
cm = confusion_matrix(y_test,y_pred)
print('confusion matrix:\n',cm)

#checking accuracy
from sklearn.metrics import accuracy_score
rfa = accuracy_score(y_test,y_pred)
print('accuracy score = ',accuracy_score(y_test,y_pred))

In [None]:
#XGB Classifier
#training model
from xgboost import XGBClassifier
xc = XGBClassifier(use_label_encoder=False,)
xc.fit(x_train,y_train)

#getting confusion matrix
from sklearn.metrics import confusion_matrix
y_pred = xc.predict(x_test)
cm = confusion_matrix(y_test,y_pred)
print('confusion matrix:\n',cm)

#checking accuracy
from sklearn.metrics import accuracy_score
xca = accuracy_score(y_test,y_pred)
print('accuracy score = ',accuracy_score(y_test,y_pred))

In [None]:
#Cat Boost
#training model
from catboost import CatBoostClassifier
cb = CatBoostClassifier()
cb.fit(x_train,y_train)

#getting confusion matrix
from sklearn.metrics import confusion_matrix
y_pred = cb.predict(x_test)
cm = confusion_matrix(y_test,y_pred)
print('confusion matrix:\n',cm)

#checking accuracy
from sklearn.metrics import accuracy_score
cba = accuracy_score(y_test,y_pred)
print('accuracy score = ',accuracy_score(y_test,y_pred))

In [None]:
#comparing accuracies
plt.figure(figsize= (8,7))
ac = [lra,knna,sva,sva2,nba,dta,rfa,xca,cba,best_accuracy]
name = ['Logistic Regression','knn','svm','Kernel Svm','Naive Bayes','Decision Tree', 'Random Forest','XG Boost','Cat Boost','SVM (Hyperparameter Tuning)']
sns.barplot(x = ac,y = name,palette='pastel')
plt.title("Plotting the Model Accuracies", fontsize=16, fontweight="bold")

**BEST ACCURACY WAS OBTAINED FROM SVM**

In [None]:
df = pd.DataFrame({'models' : ['Logistic Regression','knn','svm','Kernel Svm','Naive Bayes','Decision Tree', 'Random Forest','XG Boost','Cat Boost','SVM (Hyperparameter Tuning)'], 'Accuracy Score':[lra,knna,sva,sva2,nba,dta,rfa,xca,cba,best_accuracy]})

In [None]:
df.sort_values(by = 'Accuracy Score', ascending = False)