# MOBILE PRICE CLASSIFICATION

## Context

Bob has started his own mobile company. He wants to give tough fight to big companies like Apple,Samsung etc.

He does not know how to estimate price of mobiles his company creates. In this competitive mobile phone market you cannot simply assume things. To solve this problem he collects sales data of mobile phones of various companies.

Bob wants to find out some relation between features of a mobile phone(eg:- RAM,Internal Memory etc) and its selling price. But he is not so good at Machine Learning. So he needs your help to solve this problem.

In this problem you do not have to predict actual price but a price range indicating how high the price is

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px

In [None]:
train_url = "/kaggle/input/mobile-price-classification/train.csv"
test_url  = "/kaggle/input/mobile-price-classification/test.csv"

In [None]:
df = pd.read_csv(train_url)
df_test  = pd.read_csv(test_url)
df_test.drop(["id"],axis=1,inplace=True)

In [None]:
df.head()

In [None]:
df.shape

In [None]:
df.isnull().sum()

In [None]:
df.describe()

In [None]:
plt.figure(figsize=(14,7))
sns.heatmap(df.corr(),annot=True,fmt=".0%",cmap="twilight_shifted_r")
plt.title("Correlation Map",fontsize=16)
plt.show()

In [None]:
cor_df = df[["ram","pc","fc","battery_power","px_height","px_width","price_range"]]

In [None]:
fig = px.parallel_coordinates(cor_df, color="price_range",
                    color_continuous_scale=px.colors.diverging.Tealrose, color_continuous_midpoint=2)
fig.show()

In [None]:
sns.pairplot(cor_df,hue="price_range",palette="twilight_shifted_r")
plt.show()

In [None]:
plt.figure(figsize=(15,5))
plt.subplot(1,3,1)
plt.hist(df["px_width"],bins=15,color="indigo",edgecolor="black",linewidth=3,density=True)
plt.title("Histogram of px_width",fontsize=15)
plt.ylabel("Frequency")
plt.subplot(1,3,2)
plt.hist(df["px_height"],bins=15,color="indigo",edgecolor="black",linewidth=3,density=True)
plt.title("Histogram of px_height",fontsize=15)
plt.ylabel("Frequency")
plt.subplot(1,3,3)
plt.hist2d(x=df["px_width"],y=df["px_height"])
plt.title("2D Histogram of px_width-px_height",fontsize=15)
plt.xlabel("px_width")
plt.ylabel("px_height")
plt.show()

In [None]:
plt.figure(figsize=(14,7))
plt.subplot(2,2,1)
sns.boxenplot(data=df,y="battery_power",x="price_range",palette="twilight_shifted_r")
plt.title("Boxenplot",fontsize=16)
plt.xlabel("Battery Power")
plt.ylabel("Price Range")
plt.subplot(2,2,2)
sns.boxenplot(data=df,y="ram",x="price_range",palette="twilight_shifted_r")
plt.title("Boxenplot",fontsize=16)
plt.xlabel("Ram")
plt.ylabel("Price Range")
plt.subplot(2,2,3)
sns.boxenplot(data=df,y="int_memory",x="price_range",palette="twilight_shifted_r")
plt.title("Boxenplot",fontsize=16)
plt.xlabel("Memory")
plt.ylabel("Price Range")
plt.subplot(2,2,4)
sns.boxenplot(data=df,y="talk_time",x="price_range",palette="twilight_shifted_r")
plt.title("Boxenplot",fontsize=16)
plt.xlabel("Talk Time")
plt.ylabel("Price Range")
plt.tight_layout()
plt.show()

In [None]:
plt.figure(figsize=(14,7))  
plt.subplot(2,2,1)
sns.countplot(data=df,x="blue",hue="dual_sim",palette="twilight_shifted_r")
plt.title("Count Plot",fontsize=16)
plt.xlabel("Bluetooth")
plt.ylabel("Count")
plt.xticks([0,1],["No","Yes"])

plt.subplot(2,2,2)
sns.countplot(data=df,x="four_g",hue="dual_sim",palette="twilight_shifted_r")
plt.title("Count Plot",fontsize=16)
plt.xlabel("4G")
plt.ylabel("Count")
plt.xticks([0,1],["No","Yes"])

plt.subplot(2,2,3)
sns.countplot(data=df,x="three_g",hue="dual_sim",palette="twilight_shifted_r")
plt.title("Count Plot",fontsize=16)
plt.xlabel("3G")
plt.ylabel("Count")
plt.xticks([0,1],["No","Yes"])

plt.subplot(2,2,4)
sns.countplot(data=df,x="touch_screen",hue="wifi",palette="twilight_shifted_r")
plt.title("Count Plot",fontsize=16)
plt.xlabel("Touch Screen")
plt.ylabel("Count")
plt.xticks([0,1],["No","Yes"])
plt.tight_layout()
plt.show()

In [None]:
df_cut = pd.DataFrame()
df_cut["Ram"] = pd.cut(df["ram"],3,labels=["low","medium","heigh"])
df_cut["Price Range"] = df["price_range"]

plt.figure(figsize=(14,7))  
sns.countplot(data=df_cut,x="Ram",hue="Price Range",palette="twilight_shifted_r")
plt.title("Count Plot",fontsize=16)
plt.xlabel("Ram")
plt.ylabel("Count")
plt.show()

In [None]:
df_cut["Battery Power"] = pd.cut(df["battery_power"],3,labels=["low","medium","heigh"])

plt.figure(figsize=(14,7))  
sns.countplot(data=df_cut,x="Battery Power",hue="Price Range",palette="twilight_shifted_r")
plt.title("Count Plot",fontsize=16)
plt.xlabel("Battery Power")
plt.ylabel("Count")
plt.show()

In [None]:
df_cut["Talk Time"] = pd.cut(df["talk_time"],3,labels=["low","medium","heigh"])

plt.figure(figsize=(14,7))  
sns.countplot(data=df_cut,x="Talk Time",hue="Price Range",palette="twilight_shifted_r")
plt.title("Count Plot",fontsize=16)
plt.xlabel("Talk Time")
plt.ylabel("Count")
plt.show()

In [None]:
df_cut["Inital Memory"] = pd.cut(df["int_memory"],3,labels=["low","medium","heigh"])

plt.figure(figsize=(14,7))  
sns.countplot(data=df_cut,x="Inital Memory",hue="Price Range",palette="twilight_shifted_r")
plt.title("Count Plot",fontsize=16)
plt.xlabel("Inital Memory")
plt.ylabel("Count")
plt.show()

In [None]:
plt.figure(figsize=(14,7))  
plt.figure(figsize=(14,7))  
df_cut.groupby(["Ram","Talk Time"])["Price Range"].mean().plot(kind="barh",color="lightgreen",edgecolor="green",linewidth=3.5)
plt.xlabel("Avg of Price Range")
plt.grid()
plt.show()

In [None]:
from sklearn.model_selection import train_test_split
X  = df.drop("price_range",axis=1)
y  = df["price_range"]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size =0.25,random_state=100)

In [None]:
from sklearn.metrics import plot_roc_curve,plot_confusion_matrix,accuracy_score,confusion_matrix
def model(model):
    global X,y,X_train, X_test, y_train, y_test
    print(type(model).__name__)
    pred = model.predict(X_test)
    acs = accuracy_score(y_test,pred)
    print("Accuracy Score             :",acs)
    
    plot_confusion_matrix(model,X_test,y_test,cmap="cividis")
    plt.title("Confusion Matrix")
    plt.show()

In [None]:
from sklearn.model_selection import cross_val_score
def CrossValidationScore(model_list):
    global X,y
    
    mean_cross_val_score = []
    model_name           = []
    
    for model in model_list:
        model_name.append(type(model).__name__)
        
    for i in model_list:
        scores = cross_val_score(i, X, y, cv=5)
        mean_cross_val_score.append(scores.mean())
        
    cvs = pd.DataFrame({"Model Name":model_name,"CVS":mean_cross_val_score})
    return cvs.style.background_gradient("Greens")

In [None]:
from sklearn.neighbors import KNeighborsClassifier
def best_param(lim):
    global X_train,y_train,X_test,y_test
    acc = []
    for i in lim:
        cls  = KNeighborsClassifier(n_neighbors=i).fit(X_train,y_train)
        pred = cls.predict(X_test)
        acs  = accuracy_score(y_test,pred)
        acc.append(acs)
    result = pd.DataFrame({"Param":lim,"Acc":acc})
    return result.style.background_gradient("Greens")

In [None]:
best_param(np.arange(1,20))

In [None]:
from sklearn.neighbors import KNeighborsClassifier
knn = KNeighborsClassifier(n_neighbors=13).fit(X_train,y_train)
print("Model Installed!")
print("Please Wait for Results..")
model(knn)

In [None]:
from sklearn.naive_bayes import GaussianNB
gnb = GaussianNB().fit(X_train,y_train)
print("Model Installed!")
print("Please Wait for Results..")
model(gnb) 

In [None]:
from sklearn.svm import SVC
svc = SVC().fit(X_train,y_train)
print("Model Installed!")
print("Please Wait for Results..")
model(svc) 

In [None]:
from sklearn.neural_network import MLPClassifier
ann = MLPClassifier(hidden_layer_sizes=(64,128,64),max_iter=1000).fit(X_train,y_train)
print("Model Installed!")
print("Please Wait for Results..")
model(ann) 

In [None]:
from sklearn.ensemble import ExtraTreesClassifier
etc = ExtraTreesClassifier().fit(X_train,y_train)
print("Model Installed!")
print("Please Wait for Results..")
model(etc) 

In [None]:
from sklearn.ensemble import RandomForestClassifier
rfc = RandomForestClassifier().fit(X_train,y_train)
print("Model Installed!")
print("Please Wait for Results..")
model(rfc) 

In [None]:
from sklearn.ensemble import GradientBoostingClassifier
gbc = GradientBoostingClassifier().fit(X_train,y_train)
print("Model Installed!")
print("Please Wait for Results..")
model(gbc) 

In [None]:
from xgboost import XGBClassifier
xgb = XGBClassifier().fit(X_train,y_train)
print("Model Installed!")
print("Please Wait for Results..")
model(xgb) 

In [None]:
model_list = [knn,gnb,svc,ann,etc,rfc,gbc,xgb]
CrossValidationScore(model_list)

In [None]:
svm  = SVC().fit(X,y)
pred_y = svm.predict(df_test)
df_test["Predict"] = pred_y

In [None]:
df_test["Predict"].head()

In [None]:
df_test["Predict"].tail()