In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns ; sns.set()

**Context**

Bob has started his own mobile company. He wants to give tough fight to big companies like Apple,Samsung etc.

He does not know how to estimate price of mobiles his company creates. In this competitive mobile phone market you cannot simply assume things. To solve this problem he collects sales data of mobile phones of various companies.

Bob wants to find out some relation between features of a mobile phone(eg:- RAM,Internal Memory etc) and its selling price. But he is not so good at Machine Learning. So he needs your help to solve this problem.

In this problem you do not have to predict actual price but a price range indicating how high the price is

### FEATURES THAT WE WILL USE 
 * battery_power 
 * blue
 * clock_speed
 * dual_sim
 * fc
 * four_g
 * int_memory
 * m_dep
 * mobile_wt
 * n_cores
 * pc
 * px_height
 * px_width
 * ram
 * sc_h
 * sc_w
 * talk_time
 * three_g
 * touch_screen
 * wifi
 
### OUR TARGET
 * price_range

In [None]:
train = pd.read_csv("/kaggle/input/mobile-price-classification/train.csv")
train.head()

In [None]:
test = pd.read_csv("/kaggle/input/mobile-price-classification/test.csv")
test.head()

In [None]:
train.describe()

In [None]:
train.info()

In [None]:
sns.countplot(x=train["price_range"])
plt.title("Counts of Each Price Range");

In [None]:
plt.figure(figsize=(20,18))
sns.heatmap(train.corr(),annot=True);

In [None]:
train.corr()["price_range"].sort_values(ascending=False).iloc[1:]

In [None]:
plt.figure(figsize=(8,6))
sns.distplot(train["ram"],kde=False,bins=25)
median_ram = train["ram"].median()
plt.axvline(median_ram,label="Median of Ram",color="red")
plt.title("RAM Distribution")
plt.legend()
plt.show()

In [None]:
plt.figure(figsize=(8,6))
sns.catplot(x="price_range", y="ram", data=train)
plt.title("Ram effects on Price Range")
plt.show()

In [None]:
plt.figure(figsize=(8,6))
sns.distplot(train["battery_power"],kde=False,bins=25)
median_power = train["battery_power"].median()
plt.axvline(median_power,label="Median of Battery Power",color="red")
plt.legend()
plt.show()

In [None]:
plt.figure(figsize=(8,6))
sns.catplot(x="price_range",y="battery_power",data=train,kind="violin")
plt.title("Battery Power effects on Price Range");

In [None]:
plt.figure(figsize=(8,6))
sns.catplot(x="price_range",col="wifi",data=train,kind="count",palette="Set2")
plt.show()

In [None]:
plt.figure(figsize=(8,6))
sns.catplot(x="price_range",col="blue",data=train,kind="count",palette="Set3")
plt.show()

In [None]:
y = train["price_range"].values
X = train.drop("price_range",axis=1).values

In [None]:
test = test.drop("id",axis=1).values

In [None]:
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import GridSearchCV, train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
import xgboost as xgb

In [None]:
X_train, X_val, y_train, y_val = train_test_split(X,y,test_size=0.2,random_state=0)
scaler = MinMaxScaler()

X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val)
test = scaler.transform(test)

In [None]:
# Parameter initialization for Grid Search Cross Validation Param_Grid

random_forest_pg = {"n_estimators" : [100,150,200,250,300],
                    "criterion" : ["gini","entropy"], 
                    "max_depth" : list(range(3,11)),
                    "max_features" : ["auto","sqrt"],
                    "bootstrap" : [True,False]}

################################################################

knn_pg = {"n_neighbors" : list(range(1,16,2)),
          "weights": ["uniform","distance"],
          "algorithm" : ["auto","ball_tree","kd_tree","brute"],
          "metric" : ['minkowski','euclidean','manhattan']}

################################################################

xgb_pg = {"n_estimators" : [100,150,200,250,300], 
          "max_depth" : list(range(3,12,2)),
          "learning_rate" : [0.01,0.05,0.1,0.2]}

################################################################

rfc = RandomForestClassifier(random_state=0)
knn = KNeighborsClassifier()
xgc = xgb.XGBClassifier(random_state=0)

In [None]:
rfc_gs = GridSearchCV(estimator=rfc,param_grid=random_forest_pg,cv=5)
rfc_gs.fit(X_train,y_train)
print("Random Forest Classifier")
print(rfc_gs.best_score_)
print(rfc_gs.best_params_)
print("#"*50)
knn_gs = GridSearchCV(estimator=knn,param_grid=knn_pg,cv=5)
knn_gs.fit(X_train,y_train)
print("KNN Classifier")
print(knn_gs.best_score_)
print(knn_gs.best_params_)
print("#"*50)
xgb_gs = GridSearchCV(estimator=xgc,param_grid=xgb_pg,cv=5)
xgb_gs.fit(X_train,y_train)
print("XGB Classifier")
print(xgb_gs.best_score_)
print(xgb_gs.best_params_)

In [None]:
from sklearn.metrics import accuracy_score

In [None]:
xgb_classifier = xgb.XGBClassifier(objective="binary:logistic",n_estimators=300,max_depth=3,learning_rate=0.2)
rfc= RandomForestClassifier(n_estimators=300,max_depth=9,max_features="auto",criterion="entropy",bootstrap=True)
knnc = KNeighborsClassifier(n_neighbors=15,weights="distance",metric="manhattan",algorithm="auto")

def look_train_val_scores(obj,X_train=X_train,y_train=y_train,X_val=X_val,y_val=y_val):
    
    obj.fit(X_train,y_train)
    preds_train = obj.predict(X_train)
    preds_val = obj.predict(X_val)
    acc_train = accuracy_score(y_train,preds_train)
    acc_val = accuracy_score(y_val,preds_val)
    
    return acc_train, acc_val

print(f"XGB train score : {look_train_val_scores(xgb_classifier)[0]}, val score : {look_train_val_scores(xgb_classifier)[1]}")
print()
print(f"RFC train score : {look_train_val_scores(rfc)[0]}, val score : {look_train_val_scores(rfc)[1]}")
print()
print(f"KNN train score : {look_train_val_scores(knnc)[0]}, val score : {look_train_val_scores(knnc)[1]}")

**XGBCLASSIFIER looks better!**

In [None]:
xgb_classifier.fit(X_train,y_train)
prediction = xgb_classifier.predict(test)

In [None]:
print(prediction)