In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')
%matplotlib inline

import tensorflow.keras as keras
from sklearn.decomposition import PCA
from sklearn.decomposition import LatentDirichletAllocation as LDA
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tqdm import tqdm
from keras.layers import Dropout

In [None]:
train = pd.read_csv('/kaggle/input/mobile-price-classification/train.csv')
test = pd.read_csv('/kaggle/input/mobile-price-classification/test.csv')

In [None]:
train.head()


In [None]:
train.shape

In [None]:
train.columns

In [None]:
train.info()

In [None]:
train.describe()

# **To check the duplicated values**

In [None]:
duplicates=train.duplicated()
duplicates

In [None]:
duplicates.sum()

**Hence there are no duplicate values**

# Cheking for Missing values

In [None]:
train.isnull()


In [None]:
train.isnull().sum()


**Hence there are no missing values**

In [None]:
cat_data=train[['blue', 'dual_sim', 'four_g','three_g',
       'touch_screen', 'wifi', 'price_range']]
cat_data

In [None]:
num_data=train.drop(['blue', 'dual_sim', 'four_g',
                     'three_g','touch_screen', 'wifi', 'price_range'],axis=1)
num_data

In [None]:
for column in num_data.columns:
    plt.hist(num_data[column])
    plt.title(column)
    plt.show()

In [None]:
for cat_column in cat_data.columns:
    sns.barplot(cat_data[cat_column].value_counts().index,cat_data[cat_column].value_counts())
    plt.title(cat_column)
    plt.show()

Checking for the outliers

In [None]:
plt.figure(figsize = (20, 10))
x = 1 

for column in train.columns :
    plt.subplot(7, 3, x)
    sns.boxplot(train[column])
    x+= 1
plt.tight_layout()

In [None]:
corrmat=train.corr()
f,ax=plt.subplots(figsize=(12,9))
sns.heatmap(corrmat,vmax=0.8,square=True)

In [None]:
corrmat.style.background_gradient(cmap="viridis").set_precision(2)

In [None]:
sns.catplot(x="price_range",y="battery_power",data=train)



In [None]:
sns.catplot(x="price_range",y="ram",data=train)


In [None]:
fig,axes=plt.subplots(2,3,figsize=(18,10))
cat=['blue','dual_sim','four_g','three_g','touch_screen','wifi']
k=0
for i in range(2):
    for j in range(3):
        sns.boxplot(ax=axes[i,j],x="price_range",y="ram",data=train,hue=cat[k])
        k+=1

In [None]:
fig,axes=plt.subplots(2,4,figsize=(18,10))
imp=['battery_power','px_height','px_width','ram','fc','pc','int_memory','mobile_wt']
k=0
for i in range(2):
    for j in range(4):
        sns.boxplot(ax=axes[i][j],x="price_range",y=imp[k],data=train)
        k+=1

In [None]:
g=sns.PairGrid(data=train)
g.map(plt.scatter)

In [None]:
g=sns.pairplot(data=train[['ram','battery_power','px_height','px_width','pc','fc','price_range']],hue='price_range',palette='gist_earth_r')

In [None]:
fig, ax = plt.subplots(5,3,figsize=(45,45))
for i, j in zip(num_data,ax.flatten()):
    sns.distplot(train[i],ax=j)
plt.show()

In [None]:
fig, ax = plt.subplots(4,4,figsize=(40,40))
for i, j in zip(num_data,ax.flatten()):
    sns.boxplot(train[i],ax=j)
plt.show()


**Lets Do point plots to specify the relation between numerical data and price range**

In [None]:
fig=plt.figure(figsize=(10,5))
sns.pointplot(y="ram", x="price_range", data=train)

In [None]:
fig=plt.figure(figsize=(10,5))
sns.pointplot(y="battery_power", x="price_range", data=train)

In [None]:
fig=plt.figure(figsize=(10,5))
sns.pointplot(y="px_height", x="price_range", data=train)

In [None]:
fig=plt.figure(figsize=(10,5))
sns.pointplot(y="px_width", x="price_range", data=train)

In [None]:
pie,ax=plt.subplots(1,2,figsize=(15,15))
train['n_cores'].value_counts().plot.pie(autopct='%1.2f%%',ax=ax[0])
train['pc'].value_counts().plot.pie(autopct='%1.2f%%',ax=ax[1])
ax[0].set_title('n_cores')
ax[1].set_title('pc')

plt.show()


In [None]:
labels = ["Supported",'Not supported']
f,ax=plt.subplots(1,2,figsize=(15,15))
train['three_g'].value_counts().plot.pie(autopct='%1.2f%%',ax=ax[0], labels=labels)
train['four_g'].value_counts().plot.pie(autopct='%1.2f%%',ax=ax[1], labels=labels)
ax[0].set_title('three_g ')
ax[1].set_title(' four_g ')
plt.show()

In [None]:
labels = ["Supported",'Not supported']
f,ax=plt.subplots(1,2,figsize=(15,15))
train['touch_screen'].value_counts().plot.pie(autopct='%1.2f%%',ax=ax[0], labels=labels)
train['dual_sim'].value_counts().plot.pie(autopct='%1.2f%%',ax=ax[1], labels=labels)
ax[0].set_title('touch_screen ')
ax[1].set_title(' dual_sim ')
plt.show()

In [None]:
labels = ["supported",'Not supported']
f,ax=plt.subplots(1,2,figsize=(12,12))
train['wifi'].value_counts().plot.pie(autopct='%1.2f%%',ax=ax[0], labels=labels)
train['blue'].value_counts().plot.pie(autopct='%1.2f%%',ax=ax[1], labels=labels)
ax[0].set_title('wifi ')
ax[1].set_title(' bluetooth ')
plt.show()

In [None]:
sns.lineplot(x='price_range', y='px_width', data=train).set_title('the rang of price based on pc',fontsize=12);
plt.figure

In [None]:
sns.lineplot(x='price_range', y='px_height', data=train).set_title('the rang of price based on pc',fontsize=12);
plt.figure

In [None]:
sns.lineplot(x='price_range', y='battery_power', data=train).set_title('the rang of price based on battery_power',fontsize=12);
plt.figure

In [None]:
sns.lineplot(x='price_range', y='mobile_wt', data=train).set_title('the rang of price based on n_cores',fontsize=12);
plt.figure

In [None]:
sns.lineplot(x='price_range', y='ram', data=train).set_title('the rang of price based on n_cores',fontsize=12);
plt.figure

In [None]:
sns.lineplot(x='price_range', y='int_memory', data=train).set_title('the rang of price based on n_cores',fontsize=12);
plt.figure

In [None]:
sns.lineplot(x='price_range', y='fc', data=train).set_title('the rang of price based on n_cores',fontsize=12);
plt.figure

In [None]:
sns.lineplot(x='price_range', y='pc', data=train).set_title('the rang of price based on n_cores',fontsize=12);
plt.figure

In [None]:
X = train.drop(['price_range'], axis=1)
Y = train['price_range']

In [None]:
X_train, X_test, Y_train, Y_test = train_test_split(X , Y,test_size=0.25,random_state=52)

**PART** 2-Evaluation


Model1-ANN(metric-accuracy)

In [None]:
model = Sequential()
model.add(Dense(units = 16, activation='relu', input_shape=(20,)))

model.add(Dense(units = 16, activation='relu'))

model.add(Dense(units = 8, activation='relu'))
model.add(Dense(units = 4, activation = 'softmax'))
model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])


In [None]:
history = model.fit(X_train, Y_train, epochs=300, verbose=1, validation_data=(X_test, Y_test))

y_pred_seq =model.predict(X_test)

In [None]:
test = []
for i in range(len(y_pred_seq)):
    test.append(np.argmax(y_pred_seq[i]))
# accuracy of the model
from sklearn.metrics import accuracy_score
acc = accuracy_score(Y_test,test)
print("Accuracy of ANN  = " + str(acc*100))

Model2-MLPClassifier(metric-accuracy)


In [None]:
from sklearn import metrics
from sklearn.neural_network import MLPClassifier
from sklearn import preprocessing
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
mlp = MLPClassifier(solver='adam', alpha=1e-5,hidden_layer_sizes=(70,70,70), random_state=1)
pipe = Pipeline([('scaler', StandardScaler()), ('mlp', mlp)])
pipe.fit(X_train,Y_train)
y_pred_MLP =pipe.predict(X_test)
print("Accuracy for MLPClassifier:",metrics.accuracy_score(Y_test, y_pred_MLP)*100)

Model3-RNN(LSTM)-metric-accuracy


In [None]:
import tensorflow
from tensorflow.keras.layers import LSTM
from keras.layers import Dense, Activation, Flatten
model = tensorflow.keras.Sequential([
    tensorflow.keras.layers.Embedding(10000000, 16, input_length=20),
    tensorflow.keras.layers.Bidirectional(tensorflow.keras.layers.LSTM(32,input_shape=(1500,20),recurrent_dropout=0.2,return_sequences=True)),
    tensorflow.keras.layers.Flatten() ,
    tensorflow.keras.layers.Dense(24, activation='relu'),
    tensorflow.keras.layers.Dense(4, activation='softmax')
])
model.compile(loss='sparse_categorical_crossentropy',optimizer='adam',metrics=['accuracy'])


history_2 = model.fit(X_train, Y_train, epochs=5, batch_size=32,verbose=1, validation_data=(X_test, Y_test))

In [None]:
y_pred_LSTM=model.predict(X_test)
test = []
for i in range(len(y_pred_seq)):
    test.append(np.argmax(y_pred_seq[i]))
# accuracy of the model
from sklearn.metrics import accuracy_score
acc = accuracy_score(Y_test,test)
print("Accuracy of LSTM model is = " + str(acc*100))

Model4-RandomForestClassifier
Model5-LightGBM Classifier
Model6-XGBClassifier
Model7-Support Vector Machine

In [None]:
from sklearn.ensemble import RandomForestClassifier
import lightgbm as lgb


from xgboost import XGBClassifier

In [None]:
X_train.shape

In [None]:
lgb_clf = lgb.LGBMClassifier()
lgb_clf.fit(X_train, Y_train)


y_pred_lgb = lgb_clf.predict(X_test)


RFG_model=RandomForestClassifier()
RFG_model.fit(X_train,Y_train)
y_pred_RFG = RFG_model.predict(X_test)
XGB_model=XGBClassifier(colsample_bylevel=0.9,colsample_bytree=0.8,gamma=0.99,max_depth=5,min_child_weight=1,n_estimators=10,nthread=4,random_state=2,silent=True)
XGB_model.fit(X_train,Y_train)
y_pred_XGB = XGB_model.predict(X_test)

In [None]:
from sklearn import svm


svm_clf = svm.SVC(kernel='linear') 

svm_clf.fit(X_train, Y_train)

y_predsvm = svm_clf.predict(X_test)

In [None]:
from sklearn import metrics

print("Accuracy for support-vector machines:",metrics.accuracy_score(Y_test, y_predsvm))

print("Precesion for support-vector machines:",metrics.precision_score(Y_test, y_predsvm,pos_label='positive',average='micro'))

print("Recall for support-vector machines:",metrics.recall_score(Y_test, y_predsvm,pos_label='positive'
                                           ,average='micro'))
print("Accuracy for LightGBM Classifier:",metrics.accuracy_score(Y_test, y_pred_lgb))

print("Precesion for LightGBM Classifier:",metrics.precision_score(Y_test, y_pred_lgb,pos_label='positive'
                                           ,average='micro'))

print("Recall for LightGBM Classifier:",metrics.recall_score(Y_test, y_pred_lgb,pos_label='positive'
                                          , average='micro'))
print("Accuracy for Random Forest Classifier:",metrics.accuracy_score(Y_test, y_pred_RFG))

print("Precesion for Random Forest Classifier:",metrics.precision_score(Y_test, y_pred_RFG,pos_label='positive'
                                           ,average='micro'))

print("Recall for Random Forest Classifier:",metrics.recall_score(Y_test, y_pred_RFG,pos_label='positive'
                                           ,average='micro'))
print("Accuracy for XGBClassifier :",metrics.accuracy_score(Y_test, y_pred_XGB))

print("Precesion for XGBClassifier:",metrics.precision_score(Y_test, y_pred_XGB,pos_label='positive'
                                           ,average='micro'))

print("Recall for XGBClassifier:",metrics.recall_score(Y_test, y_pred_XGB,pos_label='positive'
                                           ,average='micro'))