In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
# Library Import
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
sns.set_style("darkgrid")
import pandas_profiling as ProfileReport
import plotly.graph_objs as go
import plotly.express as px
import cufflinks as cf

import warnings
warnings.filterwarnings("ignore")

from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
init_notebook_mode(connected = True)
cf.go_offline();

# Base Model
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import RobustScaler
from sklearn.preprocessing import label_binarize

from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.linear_model import SGDClassifier

from sklearn.model_selection import train_test_split

from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report
from sklearn.metrics import roc_auc_score
from sklearn.metrics import auc
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import accuracy_score
from sklearn.metrics import mean_squared_error
from sklearn.metrics import f1_score
from sklearn.metrics import roc_curve
from lightgbm import LGBMClassifier

## Data Load

In [None]:
train = pd.read_csv('../input/mobile-price-classification/train.csv')
test = pd.read_csv('../input/mobile-price-classification/test.csv')

print('Train Shape:', train.shape)
print('*'*25)
print('Test Shape:', test.shape)

df = train.copy()

In [None]:
df.head()

In [None]:
df.shape

In [None]:
df.info()

## Missing Value Analysis

In [None]:
import missingno as msno
msno.bar(df)
plt.show()

In [None]:
# df.profile_report()

## Data Analysis

In [None]:
desc = df.describe().T
df1 = pd.DataFrame(index=['battery_power', 'blue', 'clock_speed', 'dual_sim',
                          'fc', 'four_g', 'int_memory', 'm_dep', 'mobile_wt', 
                          'n_cores', 'pc', 'px_height', 'px_width', 'ram', 
                          'sc_h', 'sc_w', 'talk_time', 'three_g','touch_screen',
                          'wifi', 'price_range'], 
                   columns= ["count","mean","std","min",
                             "25%","50%","75%","max"], data= desc )

f,ax = plt.subplots(figsize=(12,12))

sns.heatmap(df1, annot=True,cmap = "Blues", fmt= '.0f',
            ax=ax,linewidths = 5, cbar = False,
            annot_kws={"size": 16})

plt.xticks(size = 18)
plt.yticks(size = 12, rotation = 0)
plt.ylabel("Variables")
plt.title("Descriptive Statistics", size = 16)
plt.show()

In [None]:
# Correlations

corr=df.corr()
fig = plt.figure(figsize=(15,12))
r = sns.heatmap(corr, cmap='Purples')
r.set_title('Correlations')

In [None]:
# price range correlations

corr.sort_values(by=['price_range'],ascending=False).iloc[0].sort_values(ascending=False)

## Data Visualization

In [None]:
#Ram vs Price Range
sns.pointplot(y='ram',x='price_range',data=df);

In [None]:
#Battery Power vs Price Range
sns.pointplot(y='battery_power',x='price_range',data=df);

In [None]:
sns.boxplot(x='price_range', y='battery_power',data=df)

In [None]:
#Int Memory vs Price Range
sns.pointplot(y='int_memory',x='price_range',data=df);

**X and Y**

In [None]:
X = df.drop('price_range',axis=1)
y = df['price_range']

## Splitting of Test and Train Data

In [None]:
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size = 0.33,random_state=42)

# Creating and Training Model

## 1) Linear Regression

In [None]:
lrm = LinearRegression()
lrm.fit(X_train,y_train)
lrm.score(X_test,y_test)

## 2) KNN

In [None]:
knnm = KNeighborsClassifier(n_neighbors=10)
knnm.fit(X_train,y_train)
knnm.score(X_test,y_test)
y_knn_pred = knnm.predict(X_test)
print(classification_report(y_test,y_knn_pred))

## 4) Decision Tree

In [None]:
dtc = DecisionTreeClassifier()
dtc.fit(X_train,y_train)
dtc.score(X_test,y_test)

## 5) Random Forest Classifier

In [None]:
rf = RandomForestClassifier()
rf.fit(X_train,y_train)
rf.score(X_test,y_test)

## 6) LGBM Classifier

In [None]:
lgbmc = LGBMClassifier(random_state = 42)
lgbmc.fit(X_train,y_train)
lgbmc.score(X_test,y_test)

## 7) SVM Classifier

In [None]:
svmcm = SVC(C=0.1, kernel='rbf')
svmcm.fit(X_train,y_train)
svmcm.score(X_test,y_test)
y_svmcm_pred = svmcm.predict(X_test)
print(classification_report(y_test,y_svmcm_pred))

In [None]:
plt.scatter(y_test,y_svmcm_pred)

In [None]:
val=confusion_matrix(y_test,y_svmcm_pred)
print(val)

In [None]:
plt.figure(figsize = (10,7))
sns.heatmap(val,annot=True)

## Classification of test.csv

In [None]:
df_test = pd.read_csv('../input/mobile-price-classification/test.csv')
df_test.head()

In [None]:
df_test.drop('id',axis=1,inplace=True)
df_test.head()

In [None]:
pred_svmcm = svmcm.predict(df_test)
pred_svmcm

In [None]:
df_test['price_range'] = pred_svmcm

In [None]:
df_test

In [None]:
X = df_test.drop('price_range',axis=1)
y = df_test['price_range']

X_train,X_test,y_train,y_test = train_test_split(X,y,test_size = 0.33,random_state=42)

svmcm_t = SVC(C=0.1)
svmcm_t.fit(X_train,y_train)
svmcm_t.score(X_test,y_test)

In [None]:
y_svmcm_t_pred = svmcm_t.predict(X_test)
print(classification_report(y_test,y_svmcm_t_pred))
plt.plot(y_test,y_svmcm_t_pred)
plt.scatter(y_test,y_svmcm_t_pred)

In [None]:
df_test['price_range'].to_csv('submission.csv')