In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

## Importing Libraries

In [None]:
import matplotlib.pyplot as plt              # Data Visualization
import seaborn as sns

In [None]:
from sklearn.model_selection import train_test_split     # For Train/Test Split

## Loading Data

In [None]:
Dataset = pd.read_csv("/kaggle/input/mobile-price-classification/train.csv")

In [None]:
Dataset.head()

In [None]:
Dataset.columns

In [None]:
Dataset.shape

## Analysing the Data

In [None]:
Dataset.info()                             # Checking Data types

In [None]:
Dataset.isnull().sum()                      # Checking Null-values

In [None]:
Dataset.describe()

## Data Visualization

In [None]:
# Displaying number of samples for each Disease
fig, ax = plt.subplots(figsize = (10, 4))
sns.countplot(x ='price_range', data=Dataset)
plt.xlabel("Class Label")
plt.ylabel("Number of Samples")
plt.show()

So, we have perfectly balanced Data

In [None]:
# Calculating Correlation between features
corrmat = Dataset.corr()                

In [None]:
# Visualizing Correlation between every feature
f, ax = plt.subplots(figsize =(9, 8)) 
sns.heatmap(corrmat, ax = ax, cmap ="YlGnBu", linewidths = 0.1) 

In [None]:
corrmat['price_range']

In [None]:
corrmat['price_range'] = abs(corrmat['price_range'])          # Converting all values to positives

In [None]:
corrmat['price_range']                                        # All positive values

In [None]:
new = corrmat.sort_values(by=['price_range'])                 # Sorting correlation values

In [None]:
new['price_range']                                            # Final list

In [None]:
# Selecting features with correlation more than 0.022
features = ['ram', 'battery_power', 'px_width', 'px_height', 'int_memory', 'sc_w', 'pc', 'touch_screen',
            'mobile_wt', 'three_g', 'sc_h', 'price_range']

In [None]:
Dataset = Dataset[features]

In [None]:
Dataset.head()

## Splitting Data and Targets

In [None]:
Target = np.array(Dataset.pop('price_range'))             # Target
Data   = np.array(Dataset)

In [None]:
print (Data)
print (Target)
print ("Shape of input Data is: ", Data.shape)
print ("Shape of input Data is: ", Target.shape)

## Creating Training and Testing Dataset

In [None]:
X_train, X_test, Y_train, Y_test = train_test_split(Data, Target, test_size=0.2, random_state=100)

In [None]:
print ("Shape of Train Data is:  ", X_train.shape)
print ("Shape of Test  Data is:  ", X_test.shape)
print ("Shape of Train Label is: ", Y_train.shape)
print ("Shape of Test  Label is: ", Y_test.shape)

## Implementing Decision Tree

In [None]:
from sklearn.tree import DecisionTreeClassifier

In [None]:
from sklearn.metrics import accuracy_score 

In [None]:
acc = []
x_axis_DT = range(3,12)
for i in range(3,12):
    DT = DecisionTreeClassifier(criterion = "gini",
                                random_state = 100,
                                max_depth=i, 
                                min_samples_leaf=5)
    DT.fit(X_train, Y_train)
    y_pred = DT.predict(X_test)
    accuracy = accuracy_score(Y_test,y_pred)*100
    acc.append(accuracy)
    print ("Accuracy for Decision for max depth ",i," is: ", accuracy) 

In [None]:
plt.subplots(figsize = (10, 4))
plt.plot(x_axis_DT,acc)
plt.xlabel('Maximum Depth')
plt.ylabel('Accuracy')
plt.show()

Choosing maximum depth = 6

## Implementing Random Forest

In [None]:
from sklearn.ensemble import RandomForestClassifier

In [None]:
acc_RF = []
x_axis_RF = range(5,31)
for i in range(5,31):
    RF = RandomForestClassifier(n_estimators = i, random_state = 0)
    RF.fit(X_train, Y_train)
    y_pred = RF.predict(X_test)
    accuracy = accuracy_score(Y_test,y_pred)*100
    acc_RF.append(accuracy)
    print ("Accuracy for Random Forest for max depth ",i," is: ", accuracy) 

In [None]:
plt.subplots(figsize = (10, 4))
plt.plot(x_axis_RF,acc_RF)
plt.xlabel('Number of Estimators')
plt.ylabel('Accuracy')
plt.show()

Choosing n_estimators = 26

## Implementing KNN Classifier

In [None]:
from sklearn.neighbors import KNeighborsClassifier 

In [None]:
acc_KNN = []
x_axis_KNN = range(5,16)
for i in range(5,16):
    knn = KNeighborsClassifier(n_neighbors=i)
    knn.fit(X_train, Y_train)
    y_pred = knn.predict(X_test)
    accuracy = accuracy_score(Y_test,y_pred)*100
    acc_KNN.append(accuracy)
    print ("Accuracy for KNN for k = ",i," is: ", accuracy) 

In [None]:
plt.subplots(figsize = (10, 4))
plt.plot(x_axis_KNN,acc_KNN)
plt.xlabel('Number of Estimators')
plt.ylabel('Accuracy')
plt.show()

Choosing k = 13

## Implementing Naive bayes

In [None]:
from sklearn.naive_bayes import MultinomialNB

In [None]:
NB = MultinomialNB()
NB.fit(Data, Target)
y_pred = NB.predict(X_test)
accuracy = accuracy_score(Y_test,y_pred)*100
print ("Accuracy of Naive Bayes Classifier is: ", accuracy) 

## Implementing Naive Bayes

In [None]:
from sklearn.svm import SVC   

In [None]:
svc = SVC(kernel='linear')
svc.fit(X_train, Y_train)
y_pred = svc.predict(X_test)
accuracy = accuracy_score(Y_test,y_pred)*100
print ("Accuracy of Naive Bayes Classifier is: ", accuracy) 