In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
df=pd.read_csv("/kaggle/input/mobile-price-classification/train.csv")
df.head()

In [None]:
y=df['price_range']
X=df.drop(labels="price_range",axis=1)
X.head()

<h2>Feature Analysis</h2>

In [None]:
from sklearn.feature_selection import SelectKBest
from sklearn.feature_selection import chi2

In [None]:
#select best features
best_features=SelectKBest(score_func=chi2,k=10)
fit=best_features.fit(X,y)
df_scores=pd.DataFrame(fit.scores_)
df_specs=pd.DataFrame(X.columns)

df_combined=pd.concat([df_scores,df_specs],axis=1)
df_combined.columns=['Score','Spec']
print(df_combined.nlargest(10,'Score'))

<h2>Make a heatmap</h2>

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

#correlation matrix
corr_mat=df.corr()
top_corr_features=corr_mat.index
plt.figure(figsize=(20,20))
sns.heatmap(df[top_corr_features].corr(),annot=True,cmap="RdYlGn")

<h2>Now, we will train a support vector classifier</h2>

In [None]:
from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV


In [None]:
best_feat=df_combined.nlargest(10,'Score')
feat_names=best_feat['Spec']
feat_names=feat_names.to_numpy()

In [None]:
feat_names=feat_names.tolist()
X=df[feat_names]
X.head()

<h2>Hyperparameter tuning</h2>

In [None]:
from sklearn.preprocessing  import StandardScaler
from sklearn.model_selection import train_test_split

C=[0.01,0.03,0.1,0.3,1,3,10,30,100,300,1000]
gamma=[0.01,0.03,0.1,0.3,1,3,10,30,100]

X_train,X_test,y_train,y_test=train_test_split(X,y,random_state=42,test_size=0.2)
#feature normalization
scaler=StandardScaler()
cols=X.columns
X_train=pd.DataFrame(scaler.fit_transform(X_train),columns=cols)
X_train.head()

In [None]:
X_train.describe().apply(lambda s:s.apply('{0:.5f}'.format))


In [None]:
# params={'C':C,'gamma':gamma,'kernel':['poly','rbf']}
# grid_s=GridSearchCV(SVC(),params,refit=True,verbose=3)
# grid_s.fit(X_train,y_train)

In [None]:
# print(grid_s.best_estimator_)
# print(grid_s.best_params_)

In [None]:
model=SVC(C=3,gamma=0.01,kernel='rbf')
model.fit(X_train,y_train)

<h2>Make Predictions</h2>

In [None]:
from sklearn.metrics import confusion_matrix,classification_report,roc_curve,roc_auc_score,auc,accuracy_score

In [None]:
cols=X_test.columns
scaler=StandardScaler()
X_test=pd.DataFrame(scaler.fit_transform(X_test),columns=cols)
X_test.head()

In [None]:
predictions=model.predict(X_test)

In [None]:
def score(y_test,predictions):
    print(classification_report(y_test,predictions))
    print(pd.DataFrame(predictions).value_counts())
    print("Accuracy score ={score}".format(score=accuracy_score(y_test,predictions)))

def plot_matrix(y_test,predictions):
    matrix=confusion_matrix(y_test,predictions)
    matrix=matrix.transpose()
    cm_df=pd.DataFrame(matrix,index=["0","1","2","3"],columns=["0","1","2","3"])
    sns.heatmap(cm_df,annot_kws={"size":16},annot=True,fmt="d")
    
def plot_roc(y_test,predictions):
    actual_vals=pd.DataFrame(pd.get_dummies(y_test))
    predictions=pd.DataFrame(pd.get_dummies(predictions))
    print('0: {}'.format(predictions[predictions[0]==1][0].sum()))
    print('1 :{}'.format(predictions[predictions[1]==1][1].sum()))
    print('2 :{}'.format(predictions[predictions[2]==1][2].sum()))
    predictions.head()
    #compute roc curve and roc area for each curve
    fpr=dict()
    tpr=dict()
    roc_auc=dict()
    n_classes=4

    #loop for each class
    for i in range(n_classes):
        fpr[i],tpr[i],_=roc_curve(actual_vals.iloc[:,i],predictions.iloc[:,i])
        roc_auc[i]=auc(fpr[i],tpr[i])
    #micro-average roc curve
    fpr["micro"],tpr["micro"],_=roc_curve(actual_vals.to_numpy().ravel(),predictions.to_numpy().ravel())
    roc_auc["micro"]=auc(fpr["micro"],tpr["micro"])
    
    colors = ['aqua', 'darkorange', 'cornflowerblue','darkred']
    #main plotter
    lw=2
    plt.figure(figsize=(10,8))
    plt.plot(fpr["micro"], tpr["micro"],
             label='micro-average ROC curve (area = {0:0.2f})'
                   ''.format(roc_auc["micro"]),
             color='deeppink', linestyle=':', linewidth=4)
    labels=['0','1','2','3']
    for i,color in zip(range(n_classes),colors):
         plt.plot(fpr[i], tpr[i], color=color,lw=lw,label='ROC curve of class {0} {name} (area = {area:0.2f})'
                 ''.format(i, name=labels[i],area=roc_auc[i]))
    plt.plot([0, 1], [0, 1], 'k--', lw=lw)
    plt.xlim([0.0, 1.0])
    plt.ylim([0.0, 1.05])
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    plt.title('ROC Curve')
    plt.legend(loc="lower right")
    plt.show()
    

<h2>Accuracy Score</h2>

In [None]:
score(y_test,predictions)

<h2>Confusion Matrix</h2>

In [None]:
plot_matrix(y_test,predictions)

<h2>ROC Curve</h2>

In [None]:
plot_roc(y_test,predictions)