#  **MOBILE PRICE CLASSIFICATION**


The dataset is about Mobile prices. It has 21 attributes. We have used different classification algorithms

Decision tree
Random forest
Naive Bayes and compared their results

In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
import numpy as np
import pandas as pd
import itertools
import matplotlib.pyplot as plt
import warnings
from sklearn import model_selection
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.model_selection import cross_val_score
from sklearn.metrics import confusion_matrix,accuracy_score
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import (FunctionTransformer , StandardScaler)
from sklearn.pipeline import Pipeline

In [None]:
%%bash
pip install seaborn

In [None]:
import seaborn as sns

In [None]:
dataset = pd.read_csv('/kaggle/input/mobile-price-classification/train.csv')


In [None]:
dataset.head(10)

In [None]:
dataset.dtypes

In [None]:
#summary statistics
dataset.describe()

In [None]:
#grouping based on battery power
dataset['blue'].value_counts() 

In [None]:
#rows and columns
dataset.shape

In [None]:
dataset.boxplot()

In [None]:
features = dataset.columns[:-1].tolist()

In [None]:
#univariable plot
for feat in features:
    skew = dataset[feat].skew()
    sns.distplot(dataset[feat], kde= False, label='Skew = %.3f' %(skew), bins=30)
    plt.legend(loc='best')
    plt.show()

In [None]:
#box plot for each feature
feature_names = dataset.columns
for i in range(len(feature_names)-1):
    figure  = plt.figure()
    ax= sns.boxplot(x='blue', y=feature_names[i], data=dataset)


In [None]:
#multivariate plot - to examine correation between features
plt.figure(figsize=(8,8))
sns.pairplot(dataset[features],palette='coolwarm')
plt.show

In [None]:
#heatmap of correlations
corr =dataset[features].corr()
plt.figure(figsize=(10,10))
sns.heatmap(corr, cbar= True, square= True, annot= True, fmt='.2f', annot_kws={'size':10},
            xticklabels=features, yticklabels=features, alpha=0.7, cmap='coolwarm')
plt.show()

In [None]:
#checking for null values
dataset.info()

In [None]:
dataset['blue'].value_counts().plot(kind='bar')
plt.title("Target Frequency")
plt.xlabel("blue")
plt.ylabel("battery_power")
plt.show()

# X & Y array

In [None]:
X=dataset.drop('price_range',axis=1)

In [None]:
y=dataset['price_range']

# Splitting the data

In [None]:
from sklearn.model_selection import train_test_split

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=101)

# Creating & Training Decision Tree Model

In [None]:
from sklearn.tree import DecisionTreeClassifier
dtree = DecisionTreeClassifier()

In [None]:
dtree.fit(X_train,y_train)

In [None]:
dtree.score(X_test,y_test)

In [None]:
feature_names=['battery_power', 'blue', 'clock_speed', 'dual_sim', 'fc', 'four_g',
       'int_memory', 'm_dep', 'mobile_wt', 'n_cores', 'pc', 'px_height',
       'px_width', 'ram', 'sc_h', 'sc_w', 'talk_time', 'three_g',
       'touch_screen', 'wifi']


In [None]:
sns.countplot(dataset['battery_power'])
plt.show()

In [None]:
#data transformation
#feature scaling
sc_X = StandardScaler()
X_train= sc_X.fit_transform(X_train)
X_test = sc_X.transform(X_test)

In [None]:
#confusion matrix 
def plot_confusion_matrix(cm, classes,
                          normalize=False,
                          title='Confusion matrix',
                          cmap=plt.cm.Blues):
    """
    This function prints and plots the confusion matrix.
    Normalization can be applied by setting `normalize=True`.
    """
    plt.imshow(cm, interpolation='nearest', cmap=cmap)
    plt.title(title)
    plt.colorbar()
    tick_marks = np.arange(len(classes))
    plt.xticks(tick_marks, classes, rotation=45)
    plt.yticks(tick_marks, classes)

    if normalize:
        cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]

    thresh = cm.max() / 2.
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        plt.text(j, i, cm[i, j],
                 horizontalalignment="center",
                 color="white" if cm[i, j] > thresh else "black")

    plt.tight_layout()
    plt.ylabel('True label')
    plt.xlabel('Predicted label')
    plt.show()


In [None]:
# prepare models
models = []
models.append(("DecisionTree",DecisionTreeClassifier()))
models.append(("RandomForest",RandomForestClassifier()))
models.append(("NaiveBayes",GaussianNB()))

# evaluate each model in turn
results = []
names = []
seed=1
scoring = 'accuracy'
for name, model in models:
    kfold = model_selection.KFold(n_splits=10, random_state=None)
    cv_results = model_selection.cross_val_score(model, X, y, cv=kfold, scoring=scoring)
    results.append(cv_results)
    names.append(name)
    cross_val_result = "%s: %f (%f)" % (name, cv_results.mean(), cv_results.std())
    print("Print the Corss Validation Result {}".format(name))
    print(cross_val_result)
    model.fit(X_train,y_train)
    y_pred = model.predict(X_test)
    cm = confusion_matrix(y_test,y_pred)
    plot_confusion_matrix(cm=cm, classes=[0,1])
    acc_score = accuracy_score(y_test,y_pred)
    print("Accuracy Score of {} is {}".format(name,acc_score))
