# Mobile Price Classification
## Workflow
- 1 Import the Data
- 2 Prepare the Data
- 3 Distubition Visualizing
- 4 Devlope a Model


In [None]:
import sklearn

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import matplotlib.pyplot as plt
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestClassifier
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.svm import SVC
import graphviz
from sklearn.tree import export_graphviz

In [None]:
train=pd.read_csv("../input/mobile-price-classification/train.csv")
test=pd.read_csv("../input/mobile-price-classification/test.csv")
X_train_rw=train.drop(['price_range'],axis=1)
y_train_rw= train['price_range'].astype("category")

train.head()

In [None]:
y_train_rw.dtypes

In [None]:
train.columns

## Check Missvalues

In [None]:
train.isnull().sum()

In [None]:
test.isnull().sum()

In [None]:
fig = plt.figure(figsize=(15,12))
sns.heatmap(train.corr())

## The price_range has a correlation with  4 Features

In [None]:
corr_train=train.corr()
corr_train['price_range'].sort_values(ascending=False).round(2)

## Engineering Features
- We add Only Columns with hight Correlation to Target 

In [None]:
X_train_rw['ram_1'] = X_train_rw['ram'] ** 2
X_train_rw['ram_2'] = X_train_rw['ram'] ** 3
X_train_rw['battery_power_1'] = X_train_rw['battery_power']** 2
X_train_rw['battery_power_2'] = X_train_rw['battery_power']** 3

In [None]:
X_train, X_valid, y_train, y_valid = train_test_split( X_train_rw, y_train_rw, random_state=0)
print("Size of X_train set: {} size of X_test set: {}  size of y_train set: {}".format(X_train.shape, X_valid.shape,y_train.shape))

In [None]:
X_train_v, X_test, y_train_v, y_test = train_test_split( X_train_rw, y_train_rw, random_state=5)

## We can use PolynomialFeatures Function 
- For SVM there is no change

In [None]:
from sklearn.preprocessing import PolynomialFeatures
# include polynomials up to x ** 10:
# the default "include_bias=True" adds a feature that's constantly 1
#poly = PolynomialFeatures(degree=3, include_bias=False)
#poly.fit(X_train)
#X_train_New = poly.transform(X_train)

## Scaling the Train and Test feature set 

In [None]:
test.columns

In [None]:

from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_valid = scaler.transform(X_valid)
X_train_v = scaler.transform(X_train_v)
X_test = scaler.transform(X_test)


## SVC Model
### Simple Grid Search

In [None]:
best_score = 0
for gamma in [0.001, 0.01, 0.1, 1, 10, 100]:
    for C in [0.001, 0.01, 0.1, 1, 10, 100]:
# for each combination of parameters, train an SVC
        svm = SVC(gamma=gamma, C=C)
        svm.fit(X_train, y_train)
# evaluate the SVC on the test set
        score = svm.score(X_valid, y_valid)
# if we got a better score, store the score and parameters
        if score > best_score:
            best_score = score
            best_parameters = {'C': C, 'gamma': gamma}
    
print("Best score: {:.2f}".format(best_score))
print("Best parameters: {}".format(best_parameters))

## Grid Search with Cross-Validation

In [None]:
from sklearn.model_selection import cross_val_score
from sklearn.metrics import accuracy_score
from sklearn.metrics import roc_auc_score

## After using Engineering Features we improved the Test Accuracy from 92% to 93%


In [None]:
for gamma in [0.0001,0.001, 0.01, 0.1, 1, 10, 100]:
    for C in [0.001, 0.01, 0.1, 1, 10, 100,105]:
        svm = SVC(gamma=gamma, C=C)
        # perform cross-validation
        scores = cross_val_score(svm, X_train, y_train, cv=10)
# compute mean cross-validation accuracy
        score = np.mean(scores)
# if we got a better score, store the score and parameters
        if score > best_score:
            best_score = score
            best_parameters = {'C': C, 'gamma': gamma}
# rebuild a model on the combined training and validation set
svm = SVC(**best_parameters)
svm.fit(X_valid, y_valid)
print("Best score: {:.2f}".format(best_score))
print("Best parameters: {}".format(best_parameters))
range_price = svm.predict(X_test)
print("Test Accuracy: {:.3f}".format(accuracy_score(y_test, range_price)))


# I now use the PolynomialFeatures 
- But there is no improvment 

In [None]:
#for gamma in [0.001, 0.01, 0.1, 1, 10, 100]:
#    for C in [0.001, 0.01, 0.1, 1, 10, 100]:
 #       svm = SVC(gamma=gamma, C=C)
        # perform cross-validation
 #       scores = cross_val_score(svm, X_train_New, y_train, cv=10)
# compute mean cross-validation accuracy
  #      score = np.mean(scores)
# if we got a better score, store the score and parameters
 #       if score > best_score:
 #           best_score = score
 #          best_parameters = {'C': C, 'gamma': gamma}
# rebuild a model on the combined training and validation set
#svm = SVC(**best_parameters)
#svm.fit(X_valid, y_valid)
#print("Best score: {:.2f}".format(best_score))
#print("Best parameters: {}".format(best_parameters))
#range_price = svm.predict(X_test)
#print("Accuracy: {:.3f}".format(accuracy_score(y_test, range_price)))