In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score
from sklearn.metrics import f1_score
from sklearn.metrics import confusion_matrix

In [None]:
train = pd.read_csv(r'/kaggle/input/mobile-price-classification/train.csv')
test = pd.read_csv(r'/kaggle/input/mobile-price-classification/test.csv')

In [None]:
train.head()

In [None]:
print(f'train data shape: {train.shape}')
print(f'test data shape: {test.shape}')

In [None]:
train['price_range'].value_counts()

In [None]:
train.isnull().sum()

In [None]:
train.info()

In [None]:
train.describe().T

In [None]:
bin_cols = ['blue','dual_sim','four_g','three_g','touch_screen','wifi']
cont_cols = ['battery_power','int_memory','m_dep','mobile_wt','px_height','px_width',
             'ram','clock_speed','fc','pc','n_cores','sc_h','sc_w','talk_time']

In [None]:
fig = plt.figure(figsize=(15,20))
for i,c in enumerate(bin_cols):
    axes = fig.add_subplot(3,2,i+1)
    sns.countplot(x='price_range', hue=c, data=train)
    plt.xlabel('')
    plt.title(f'{c}')

In [None]:
fig = plt.figure(figsize=(15,40))
for i,c in enumerate(cont_cols):
    axes = fig.add_subplot(7,2,i+1)
    sns.kdeplot(train.loc[train['price_range']==0,c], label=0, shade=True)
    sns.kdeplot(train.loc[train['price_range']==1,c], label=1, shade=True)
    sns.kdeplot(train.loc[train['price_range']==2,c], label=2, shade=True)
    sns.kdeplot(train.loc[train['price_range']==3,c], label=3, shade=True)
    plt.xlabel('')
    plt.title(f'{c}')

In [None]:
std_scalar = StandardScaler()
train_sc = std_scalar.fit_transform(train.drop('price_range', axis=1))
test_sc = std_scalar.transform(test.drop('id', axis=1))

In [None]:
train_df = pd.DataFrame(np.c_[train_sc,train['price_range'].values], columns=train.columns)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(train_df.drop(columns=['price_range'], axis=1), 
                                                    train_df['price_range'], test_size=0.2, random_state=42)

In [None]:
lr = LogisticRegression()
lr.fit(X_train, y_train)

In [None]:
cross_val_score(lr, X_train, y_train, cv=3, scoring='accuracy')

In [None]:
y_train_pred = lr.predict(X_train)
f1_score(y_train, y_train_pred, average='micro')

In [None]:
y_test_pred = lr.predict(X_test)
f1_score(y_test, y_test_pred, average='micro')

In [None]:
confusion_matrix(y_train, y_train_pred)

In [None]:
confusion_matrix(y_test, y_test_pred)

In [None]:
y_test[:5]

In [None]:
digit_score=lr.decision_function(X_test[3:4])

In [None]:
print(digit_score)
print(np.argmax(digit_score))

In [None]:
lr.classes_