In [None]:
!pip install plotly

In [None]:
import numpy as np
import pandas as pd

import seaborn as sns
import matplotlib.pyplot as plt
sns.set_style("darkgrid")

from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report

import plotly.express as px
%matplotlib inline

from plotly.offline import iplot

import plotly.graph_objs as go

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

**Đọc dữ liệu từ file**

In [None]:
df = pd.read_csv('/kaggle/input/mobile-price-classification/train.csv')
df.head(5)

# **Phân Tích Dữ Liệu**

**Mối quan hệ giữa:** Ram - Battery Power - Price Range

In [None]:
df_pr_0 = df[df.price_range == 0]
df_pr_1 = df[df.price_range == 1]
df_pr_2 = df[df.price_range == 2]
df_pr_3 = df[df.price_range == 3]

trace1 =go.Scatter(
                    y = df_pr_0.battery_power,
                    x = df_pr_0.ram,
                    mode = "markers",
                    name = "Price Range: Rẻ",
                    marker = dict(color = 'rgba(240, 136, 200, 0.8)'),
                    text= df_pr_0.price_range)

trace2 =go.Scatter(
                    y = df_pr_1.battery_power,
                    x = df_pr_1.ram,
                    mode = "markers",
                    name = "Price Range: Trung",
                    marker = dict(color = 'rgba(255, 128, 2, 0.8)'),
                    text= df_pr_1.price_range)

trace3 =go.Scatter(
                    y = df_pr_2.battery_power,
                    x = df_pr_2.ram,
                    mode = "markers",
                    name = "Price Range: Cao",
                    marker = dict(color = 'rgba(0, 240, 170, 0.8)'),
                    text= df_pr_2.price_range)

trace4 =go.Scatter(
                    y = df_pr_3.battery_power,
                    x = df_pr_3.ram,
                    mode = "markers",
                    name = "Price Range: Xa Xỉ",
                    marker = dict(color = 'rgba(50, 70, 190, 0.8)'),
                    text= df_pr_3.price_range) 


data = [trace1, trace2, trace3, trace4]

layout = dict(title = 'Ram - Battery Power - Price Range',
              xaxis= dict(title= 'Ram',
                          ticklen= 5,zeroline= False),
              yaxis= dict(title= 'Battery Power',
                          ticklen= 5,zeroline= False),
             autosize=False,
             width=700,
             height=450,)
fig = dict(data = data, layout = layout)
    
iplot(fig)

**Mối quan hệ giữa:** Ram - Price Range

In [None]:
df_pr_0 = df[df.price_range == 0]
df_pr_1 = df[df.price_range == 1]
df_pr_2 = df[df.price_range == 2]
df_pr_3 = df[df.price_range == 3]

trace0 = go.Box(
    y=df_pr_0.ram,
    name = 'Price Range: Rẻ',
    marker = dict(
        color = 'rgb(223, 240, 0)',
    )
)
trace1 = go.Box(
    y=df_pr_1.ram,
    name = 'Price Range: Trung',
    marker = dict(
        color = 'rgb(10, 110, 220)',
    )
)
trace2 = go.Box(
    y=df_pr_2.ram,
    name = 'Price Range: Cao',
    marker = dict(
        color = 'rgb(242, 54, 14)',
    )
)
trace3 = go.Box(
    y=df_pr_3.ram,
    name = 'Price Range: Xa Xỉ',
    marker = dict(
        color = 'rgb(60, 200, 135)',
    )
)

data = [trace0, trace1, trace2, trace3]

layout = dict(title = 'Ram - Price Range',
              xaxis= dict(title= 'Price Range',
                          ticklen= 5,zeroline= False),
              yaxis= dict(title= 'Ram',
                          ticklen= 5,zeroline= False),
             autosize=False,
             width=700,
             height=450)

fig = dict(data = data, layout = layout)

iplot(fig)

**Mối quan hệ giữa:** Battery Power - Price Range

In [None]:
df_pr_0 = df[df.price_range == 0]
df_pr_1 = df[df.price_range == 1]
df_pr_2 = df[df.price_range == 2]
df_pr_3 = df[df.price_range == 3]

trace0 = go.Box(
    y=df_pr_0.battery_power,
    name = 'Price Range: Rẻ',
    marker = dict(
        color = 'rgb(223, 240, 0)',
    )
)
trace1 = go.Box(
    y=df_pr_1.battery_power,
    name = 'Price Range: Trung',
    marker = dict(
        color = 'rgb(10, 110, 220)',
    )
)
trace2 = go.Box(
    y=df_pr_2.battery_power,
    name = 'Price Range: Cao Cấp',
    marker = dict(
        color = 'rgb(242, 54, 14)',
    )
)
trace3 = go.Box(
    y=df_pr_3.battery_power,
    name = 'Price Range: Xa Xỉ',
    marker = dict(
        color = 'rgb(60, 200, 135)',
    )
)

data = [trace0, trace1, trace2, trace3]

layout = dict(title = 'Battery Power - Price Range',
              xaxis= dict(title= 'Price Range',
                          ticklen= 5,zeroline= False),
              yaxis= dict(title= 'Battery Power',
                          ticklen= 5,zeroline= False),
             autosize=False,
             width=700,
             height=450)

fig = dict(data = data, layout = layout)

iplot(fig)

**Mối quan hệ giữa:** Phone Height - Price Range

In [None]:
fig = px.histogram(df, x = 'px_height',
                   color = 'price_range',
                   title = "Phone Height - Price Range")
fig.update_layout(
xaxis_title_text = 'px_height',
yaxis_title_text = 'Frequency',autosize=False,
    width=700,
    height=450)
fig

**Mối quan hệ giữa:** Phone Width - Price Range

In [None]:
fig = px.histogram(df, x = 'px_width', 
                   color = 'price_range', 
                   title = "Phone Width - Price Range")
fig.update_layout(
xaxis_title_text = 'px_width',
yaxis_title_text = 'Frequency',autosize=False,
    width=700,
    height=450)
fig

# **Tiền Xử Lý**

**Phân chia dữ liệu train - test**

In [None]:
random_state = 42

X = df.iloc[:,0:20].values
y = df.iloc[:, 20].values

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = random_state)

**Chuẩn hoá dữ liệu**

In [None]:
sc = StandardScaler()

X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

# **Models**

**Hồi quy Logistic**

In [None]:
classifier = LogisticRegression(random_state = random_state)

classifier.fit(X_train,y_train)

y_pred = classifier.predict(X_test)

print(classification_report(y_test, y_pred))

**Ma Trận Lỗi:** Dự đoán và thực tế

In [None]:
cm = confusion_matrix(y_test, y_pred)

df1 = pd.DataFrame(columns=["0","1","2","3"], index= ["0","1","2","3"], data= cm )

f,ax = plt.subplots(figsize=(6,6))

sns.heatmap(df1, annot=True,cmap="Greens", fmt= '.0f',
            ax=ax,linewidths = 5, cbar = False,annot_kws={"size": 16})
plt.xlabel("Predicted Label")
plt.xticks(size = 12)
plt.yticks(size = 12, rotation = 0)
plt.ylabel("True Label")
plt.title("Confusion Matrix", size = 12)
plt.show()