In [1]:
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier

from sklearn.metrics import confusion_matrix, accuracy_score, classification_report

In [2]:
df = pd.read_csv("train.csv")

In [3]:
df_test = pd.read_csv("test.csv")

In [4]:
df.shape

(2000, 21)

In [5]:
df.head()

Unnamed: 0,battery_power,blue,clock_speed,dual_sim,fc,four_g,int_memory,m_dep,mobile_wt,n_cores,...,px_height,px_width,ram,sc_h,sc_w,talk_time,three_g,touch_screen,wifi,price_range
0,842,0,2.2,0,1,0,7,0.6,188,2,...,20,756,2549,9,7,19,0,0,1,1
1,1021,1,0.5,1,0,1,53,0.7,136,3,...,905,1988,2631,17,3,7,1,1,0,2
2,563,1,0.5,1,2,1,41,0.9,145,5,...,1263,1716,2603,11,2,9,1,1,0,2
3,615,1,2.5,0,0,0,10,0.8,131,6,...,1216,1786,2769,16,8,11,1,0,0,2
4,1821,1,1.2,0,13,1,44,0.6,141,2,...,1208,1212,1411,8,2,15,1,1,0,1


In [6]:
df.columns

Index(['battery_power', 'blue', 'clock_speed', 'dual_sim', 'fc', 'four_g',
       'int_memory', 'm_dep', 'mobile_wt', 'n_cores', 'pc', 'px_height',
       'px_width', 'ram', 'sc_h', 'sc_w', 'talk_time', 'three_g',
       'touch_screen', 'wifi', 'price_range'],
      dtype='object')

In [7]:
column_names = ['Battery_Power', 'Has_Bluetooth', 'Clock_Speed', 'Has_Dual_Sim', 'Front_Camera_Mega_Pixels', 'Has_4G', 
                'Internal_Memory', 'Mobile_Depth', 'Mobile_Weight', 'No_Cores', 'Primary_Camera_Mega_Pixels', 'Pixel_Height', 
                'Pixel_Width', 'RAM', 'Screen_Height', 'Screen_Width', 'Battery_Talk_Time', 'Has_3G', 'Has_Touch_Screen', 
                'Has_WiFi', 'Price_Range']

In [8]:
df.columns = column_names

In [9]:
df.head()

Unnamed: 0,Battery_Power,Has_Bluetooth,Clock_Speed,Has_Dual_Sim,Front_Camera_Mega_Pixels,Has_4G,Internal_Memory,Mobile_Depth,Mobile_Weight,No_Cores,...,Pixel_Height,Pixel_Width,RAM,Screen_Height,Screen_Width,Battery_Talk_Time,Has_3G,Has_Touch_Screen,Has_WiFi,Price_Range
0,842,0,2.2,0,1,0,7,0.6,188,2,...,20,756,2549,9,7,19,0,0,1,1
1,1021,1,0.5,1,0,1,53,0.7,136,3,...,905,1988,2631,17,3,7,1,1,0,2
2,563,1,0.5,1,2,1,41,0.9,145,5,...,1263,1716,2603,11,2,9,1,1,0,2
3,615,1,2.5,0,0,0,10,0.8,131,6,...,1216,1786,2769,16,8,11,1,0,0,2
4,1821,1,1.2,0,13,1,44,0.6,141,2,...,1208,1212,1411,8,2,15,1,1,0,1


In [10]:
df.iloc[245]

Battery_Power                 1373.0
Has_Bluetooth                    1.0
Clock_Speed                      1.0
Has_Dual_Sim                     1.0
Front_Camera_Mega_Pixels         2.0
Has_4G                           0.0
Internal_Memory                 50.0
Mobile_Depth                     0.2
Mobile_Weight                  127.0
No_Cores                         7.0
Primary_Camera_Mega_Pixels      12.0
Pixel_Height                   105.0
Pixel_Width                    673.0
RAM                           3800.0
Screen_Height                    8.0
Screen_Width                     3.0
Battery_Talk_Time               14.0
Has_3G                           1.0
Has_Touch_Screen                 0.0
Has_WiFi                         0.0
Price_Range                      3.0
Name: 245, dtype: float64

In [11]:
df.dtypes

Battery_Power                   int64
Has_Bluetooth                   int64
Clock_Speed                   float64
Has_Dual_Sim                    int64
Front_Camera_Mega_Pixels        int64
Has_4G                          int64
Internal_Memory                 int64
Mobile_Depth                  float64
Mobile_Weight                   int64
No_Cores                        int64
Primary_Camera_Mega_Pixels      int64
Pixel_Height                    int64
Pixel_Width                     int64
RAM                             int64
Screen_Height                   int64
Screen_Width                    int64
Battery_Talk_Time               int64
Has_3G                          int64
Has_Touch_Screen                int64
Has_WiFi                        int64
Price_Range                     int64
dtype: object

In [12]:
con_int = ['Battery_Power', 'Has_Bluetooth', 'Has_Dual_Sim', 'Front_Camera_Mega_Pixels', 'Has_4G', 
                'Internal_Memory', 'Mobile_Weight', 'No_Cores', 'Primary_Camera_Mega_Pixels', 'Pixel_Height', 
                'Pixel_Width', 'RAM', 'Screen_Height', 'Screen_Width', 'Battery_Talk_Time', 'Has_3G', 'Has_Touch_Screen', 
                'Has_WiFi', 'Price_Range']

for i in con_int:
    df[i] = df[i].astype(int)

In [13]:
df.dtypes

Battery_Power                   int32
Has_Bluetooth                   int32
Clock_Speed                   float64
Has_Dual_Sim                    int32
Front_Camera_Mega_Pixels        int32
Has_4G                          int32
Internal_Memory                 int32
Mobile_Depth                  float64
Mobile_Weight                   int32
No_Cores                        int32
Primary_Camera_Mega_Pixels      int32
Pixel_Height                    int32
Pixel_Width                     int32
RAM                             int32
Screen_Height                   int32
Screen_Width                    int32
Battery_Talk_Time               int32
Has_3G                          int32
Has_Touch_Screen                int32
Has_WiFi                        int32
Price_Range                     int32
dtype: object

In [14]:
df.head()

Unnamed: 0,Battery_Power,Has_Bluetooth,Clock_Speed,Has_Dual_Sim,Front_Camera_Mega_Pixels,Has_4G,Internal_Memory,Mobile_Depth,Mobile_Weight,No_Cores,...,Pixel_Height,Pixel_Width,RAM,Screen_Height,Screen_Width,Battery_Talk_Time,Has_3G,Has_Touch_Screen,Has_WiFi,Price_Range
0,842,0,2.2,0,1,0,7,0.6,188,2,...,20,756,2549,9,7,19,0,0,1,1
1,1021,1,0.5,1,0,1,53,0.7,136,3,...,905,1988,2631,17,3,7,1,1,0,2
2,563,1,0.5,1,2,1,41,0.9,145,5,...,1263,1716,2603,11,2,9,1,1,0,2
3,615,1,2.5,0,0,0,10,0.8,131,6,...,1216,1786,2769,16,8,11,1,0,0,2
4,1821,1,1.2,0,13,1,44,0.6,141,2,...,1208,1212,1411,8,2,15,1,1,0,1


In [15]:
df.sample()

Unnamed: 0,Battery_Power,Has_Bluetooth,Clock_Speed,Has_Dual_Sim,Front_Camera_Mega_Pixels,Has_4G,Internal_Memory,Mobile_Depth,Mobile_Weight,No_Cores,...,Pixel_Height,Pixel_Width,RAM,Screen_Height,Screen_Width,Battery_Talk_Time,Has_3G,Has_Touch_Screen,Has_WiFi,Price_Range
570,1572,1,0.8,1,7,1,8,0.8,169,5,...,577,1237,3461,6,3,10,1,1,1,3


In [16]:
df.isnull().sum()

Battery_Power                 0
Has_Bluetooth                 0
Clock_Speed                   0
Has_Dual_Sim                  0
Front_Camera_Mega_Pixels      0
Has_4G                        0
Internal_Memory               0
Mobile_Depth                  0
Mobile_Weight                 0
No_Cores                      0
Primary_Camera_Mega_Pixels    0
Pixel_Height                  0
Pixel_Width                   0
RAM                           0
Screen_Height                 0
Screen_Width                  0
Battery_Talk_Time             0
Has_3G                        0
Has_Touch_Screen              0
Has_WiFi                      0
Price_Range                   0
dtype: int64

In [17]:
# Splitting the data into Features and Target variables:

X=df.iloc[:,:-1]
y=df.iloc[:,-1:]

In [18]:
X.head()

Unnamed: 0,Battery_Power,Has_Bluetooth,Clock_Speed,Has_Dual_Sim,Front_Camera_Mega_Pixels,Has_4G,Internal_Memory,Mobile_Depth,Mobile_Weight,No_Cores,Primary_Camera_Mega_Pixels,Pixel_Height,Pixel_Width,RAM,Screen_Height,Screen_Width,Battery_Talk_Time,Has_3G,Has_Touch_Screen,Has_WiFi
0,842,0,2.2,0,1,0,7,0.6,188,2,2,20,756,2549,9,7,19,0,0,1
1,1021,1,0.5,1,0,1,53,0.7,136,3,6,905,1988,2631,17,3,7,1,1,0
2,563,1,0.5,1,2,1,41,0.9,145,5,6,1263,1716,2603,11,2,9,1,1,0
3,615,1,2.5,0,0,0,10,0.8,131,6,9,1216,1786,2769,16,8,11,1,0,0
4,1821,1,1.2,0,13,1,44,0.6,141,2,14,1208,1212,1411,8,2,15,1,1,0


In [19]:
y.head()

Unnamed: 0,Price_Range
0,1
1,2
2,2
3,2
4,1


In [20]:
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.2,random_state=150)

In [21]:
dt = DecisionTreeClassifier()

In [22]:
model = dt.fit(X_train,y_train)

In [23]:
y_pred = model.predict(X_test)
y_pred

array([0, 0, 0, 2, 1, 1, 1, 3, 2, 3, 2, 0, 0, 3, 2, 1, 3, 3, 1, 2, 3, 0,
       1, 0, 3, 1, 0, 3, 3, 2, 3, 2, 2, 3, 3, 1, 1, 3, 0, 3, 2, 0, 3, 3,
       0, 0, 0, 1, 2, 1, 0, 2, 1, 3, 2, 1, 1, 2, 1, 1, 3, 1, 0, 1, 2, 1,
       1, 2, 2, 3, 1, 3, 3, 2, 3, 3, 0, 3, 0, 1, 1, 1, 2, 3, 3, 1, 2, 1,
       3, 1, 1, 2, 1, 1, 1, 3, 2, 0, 3, 2, 2, 2, 0, 2, 1, 1, 3, 2, 0, 0,
       2, 3, 1, 2, 0, 1, 1, 0, 3, 0, 2, 3, 0, 0, 3, 0, 3, 1, 3, 2, 1, 3,
       1, 3, 0, 3, 0, 3, 2, 1, 2, 0, 3, 0, 0, 3, 0, 0, 0, 1, 0, 3, 2, 1,
       0, 3, 3, 3, 1, 1, 2, 1, 2, 0, 0, 0, 2, 2, 3, 1, 2, 3, 1, 1, 3, 3,
       3, 1, 1, 2, 2, 2, 2, 2, 3, 3, 2, 0, 1, 3, 3, 0, 1, 0, 3, 2, 2, 0,
       3, 2, 1, 0, 3, 2, 1, 3, 3, 0, 0, 3, 0, 2, 3, 0, 0, 1, 3, 1, 2, 2,
       0, 2, 3, 3, 3, 3, 3, 0, 1, 2, 1, 1, 0, 3, 3, 3, 0, 2, 2, 3, 0, 3,
       3, 1, 0, 0, 2, 2, 2, 2, 3, 1, 3, 1, 0, 0, 0, 2, 2, 0, 3, 2, 3, 1,
       0, 0, 1, 2, 2, 0, 2, 1, 3, 3, 2, 1, 3, 1, 2, 0, 0, 0, 0, 1, 0, 0,
       2, 2, 1, 3, 3, 2, 3, 3, 2, 2, 0, 3, 0, 0, 3,

In [24]:
print(confusion_matrix(y_test,y_pred))

[[95  7  0  0]
 [ 7 73  6  0]
 [ 0 11 77 14]
 [ 0  0 14 96]]


In [25]:
#df1 = pd.DataFrame({'Actual': y_test,'Predicted': y_pred})

In [26]:
df1 = df

In [27]:
y_test['Predicted_Value'] = y_pred

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  y_test['Predicted_Value'] = y_pred


In [28]:
y_test

Unnamed: 0,Price_Range,Predicted_Value
1855,0,0
1361,0,0
1253,0,0
1937,2,2
975,1,1
...,...,...
30,3,3
1993,3,3
1191,3,3
201,1,1


In [29]:
df_test.head()

Unnamed: 0,id,battery_power,blue,clock_speed,dual_sim,fc,four_g,int_memory,m_dep,mobile_wt,...,pc,px_height,px_width,ram,sc_h,sc_w,talk_time,three_g,touch_screen,wifi
0,1,1043,1,1.8,1,14,0,5,0.1,193,...,16,226,1412,3476,12,7,2,0,1,0
1,2,841,1,0.5,1,4,1,61,0.8,191,...,12,746,857,3895,6,0,7,1,0,0
2,3,1807,1,2.8,0,1,0,27,0.9,186,...,4,1270,1366,2396,17,10,10,0,1,1
3,4,1546,0,0.5,1,18,1,25,0.5,96,...,20,295,1752,3893,10,0,7,1,1,0
4,5,1434,0,1.4,0,11,1,49,0.5,108,...,18,749,810,1773,15,8,7,1,0,1


In [30]:
df_test.drop(columns='id',inplace=True)

In [31]:
column_names

['Battery_Power',
 'Has_Bluetooth',
 'Clock_Speed',
 'Has_Dual_Sim',
 'Front_Camera_Mega_Pixels',
 'Has_4G',
 'Internal_Memory',
 'Mobile_Depth',
 'Mobile_Weight',
 'No_Cores',
 'Primary_Camera_Mega_Pixels',
 'Pixel_Height',
 'Pixel_Width',
 'RAM',
 'Screen_Height',
 'Screen_Width',
 'Battery_Talk_Time',
 'Has_3G',
 'Has_Touch_Screen',
 'Has_WiFi',
 'Price_Range']

In [32]:
column_names_test = ['Battery_Power',
 'Has_Bluetooth',
 'Clock_Speed',
 'Has_Dual_Sim',
 'Front_Camera_Mega_Pixels',
 'Has_4G',
 'Internal_Memory',
 'Mobile_Depth',
 'Mobile_Weight',
 'No_Cores',
 'Primary_Camera_Mega_Pixels',
 'Pixel_Height',
 'Pixel_Width',
 'RAM',
 'Screen_Height',
 'Screen_Width',
 'Battery_Talk_Time',
 'Has_3G',
 'Has_Touch_Screen',
 'Has_WiFi']

In [33]:
df_test.columns = column_names_test

In [34]:
df_test.head()

Unnamed: 0,Battery_Power,Has_Bluetooth,Clock_Speed,Has_Dual_Sim,Front_Camera_Mega_Pixels,Has_4G,Internal_Memory,Mobile_Depth,Mobile_Weight,No_Cores,Primary_Camera_Mega_Pixels,Pixel_Height,Pixel_Width,RAM,Screen_Height,Screen_Width,Battery_Talk_Time,Has_3G,Has_Touch_Screen,Has_WiFi
0,1043,1,1.8,1,14,0,5,0.1,193,3,16,226,1412,3476,12,7,2,0,1,0
1,841,1,0.5,1,4,1,61,0.8,191,5,12,746,857,3895,6,0,7,1,0,0
2,1807,1,2.8,0,1,0,27,0.9,186,3,4,1270,1366,2396,17,10,10,0,1,1
3,1546,0,0.5,1,18,1,25,0.5,96,8,20,295,1752,3893,10,0,7,1,1,0
4,1434,0,1.4,0,11,1,49,0.5,108,6,18,749,810,1773,15,8,7,1,0,1


In [35]:
result = model.predict(df_test)

In [36]:
result

array([3, 3, 3, 3, 1, 3, 3, 1, 3, 0, 3, 3, 0, 1, 2, 0, 2, 0, 3, 2, 0, 2,
       1, 1, 3, 0, 2, 0, 3, 0, 3, 0, 3, 0, 1, 1, 3, 1, 2, 1, 1, 2, 0, 0,
       0, 1, 0, 3, 1, 2, 1, 0, 2, 0, 3, 1, 3, 1, 0, 3, 3, 3, 0, 1, 1, 1,
       1, 3, 1, 1, 1, 2, 2, 3, 3, 0, 2, 0, 2, 3, 1, 3, 3, 1, 3, 0, 3, 1,
       3, 0, 1, 2, 2, 0, 2, 1, 0, 1, 1, 2, 1, 0, 0, 3, 1, 2, 0, 1, 2, 3,
       3, 2, 1, 3, 3, 3, 3, 2, 3, 0, 0, 3, 2, 1, 2, 0, 3, 2, 2, 2, 0, 2,
       1, 1, 3, 0, 1, 0, 3, 2, 1, 2, 1, 3, 2, 3, 3, 2, 2, 3, 2, 3, 0, 0,
       2, 2, 3, 3, 3, 3, 2, 3, 3, 3, 3, 3, 1, 0, 3, 0, 0, 0, 2, 1, 0, 1,
       1, 0, 1, 2, 1, 0, 0, 1, 2, 2, 2, 1, 0, 0, 0, 0, 1, 3, 2, 1, 2, 2,
       2, 3, 1, 2, 3, 3, 3, 2, 2, 1, 0, 0, 1, 2, 0, 3, 3, 3, 1, 2, 0, 3,
       2, 2, 3, 1, 0, 1, 0, 3, 0, 1, 0, 2, 2, 1, 2, 1, 3, 0, 3, 1, 2, 0,
       0, 2, 1, 2, 2, 3, 1, 1, 3, 0, 0, 2, 3, 3, 1, 3, 1, 1, 3, 2, 1, 2,
       3, 3, 3, 1, 1, 0, 2, 3, 1, 1, 3, 2, 0, 3, 0, 1, 3, 0, 0, 3, 2, 3,
       2, 2, 1, 3, 3, 2, 3, 1, 2, 1, 1, 0, 2, 3, 1,

In [37]:
df_test['Predicted_Value'] = result

In [38]:
df_test.head()

Unnamed: 0,Battery_Power,Has_Bluetooth,Clock_Speed,Has_Dual_Sim,Front_Camera_Mega_Pixels,Has_4G,Internal_Memory,Mobile_Depth,Mobile_Weight,No_Cores,...,Pixel_Height,Pixel_Width,RAM,Screen_Height,Screen_Width,Battery_Talk_Time,Has_3G,Has_Touch_Screen,Has_WiFi,Predicted_Value
0,1043,1,1.8,1,14,0,5,0.1,193,3,...,226,1412,3476,12,7,2,0,1,0,3
1,841,1,0.5,1,4,1,61,0.8,191,5,...,746,857,3895,6,0,7,1,0,0,3
2,1807,1,2.8,0,1,0,27,0.9,186,3,...,1270,1366,2396,17,10,10,0,1,1,3
3,1546,0,0.5,1,18,1,25,0.5,96,8,...,295,1752,3893,10,0,7,1,1,0,3
4,1434,0,1.4,0,11,1,49,0.5,108,6,...,749,810,1773,15,8,7,1,0,1,1


In [39]:
df_test.iloc[3]

Battery_Power                 1546.0
Has_Bluetooth                    0.0
Clock_Speed                      0.5
Has_Dual_Sim                     1.0
Front_Camera_Mega_Pixels        18.0
Has_4G                           1.0
Internal_Memory                 25.0
Mobile_Depth                     0.5
Mobile_Weight                   96.0
No_Cores                         8.0
Primary_Camera_Mega_Pixels      20.0
Pixel_Height                   295.0
Pixel_Width                   1752.0
RAM                           3893.0
Screen_Height                   10.0
Screen_Width                     0.0
Battery_Talk_Time                7.0
Has_3G                           1.0
Has_Touch_Screen                 1.0
Has_WiFi                         0.0
Predicted_Value                  3.0
Name: 3, dtype: float64

In [40]:
print(model.predict([[1540,0,0.5,1,18,1,25,0.5,96,8,20,295,1752,3893,10,0,7,1,1,0]]))

[3]


In [44]:
for i in df.columns:
    print(f"The Minimum Value in Column '{i}' is {df[i].min()}.")
    print(f"The Maximum Value in Column '{i}' is {df[i].max()}.\n")

The Minimum Value in Column 'Battery_Power' is 501.
The Maximum Value in Column 'Battery_Power' is 1998.

The Minimum Value in Column 'Has_Bluetooth' is 0.
The Maximum Value in Column 'Has_Bluetooth' is 1.

The Minimum Value in Column 'Clock_Speed' is 0.5.
The Maximum Value in Column 'Clock_Speed' is 3.0.

The Minimum Value in Column 'Has_Dual_Sim' is 0.
The Maximum Value in Column 'Has_Dual_Sim' is 1.

The Minimum Value in Column 'Front_Camera_Mega_Pixels' is 0.
The Maximum Value in Column 'Front_Camera_Mega_Pixels' is 19.

The Minimum Value in Column 'Has_4G' is 0.
The Maximum Value in Column 'Has_4G' is 1.

The Minimum Value in Column 'Internal_Memory' is 2.
The Maximum Value in Column 'Internal_Memory' is 64.

The Minimum Value in Column 'Mobile_Depth' is 0.1.
The Maximum Value in Column 'Mobile_Depth' is 1.0.

The Minimum Value in Column 'Mobile_Weight' is 80.
The Maximum Value in Column 'Mobile_Weight' is 200.

The Minimum Value in Column 'No_Cores' is 1.
The Maximum Value in Col

In [45]:
df.dtypes

Battery_Power                   int32
Has_Bluetooth                   int32
Clock_Speed                   float64
Has_Dual_Sim                    int32
Front_Camera_Mega_Pixels        int32
Has_4G                          int32
Internal_Memory                 int32
Mobile_Depth                  float64
Mobile_Weight                   int32
No_Cores                        int32
Primary_Camera_Mega_Pixels      int32
Pixel_Height                    int32
Pixel_Width                     int32
RAM                             int32
Screen_Height                   int32
Screen_Width                    int32
Battery_Talk_Time               int32
Has_3G                          int32
Has_Touch_Screen                int32
Has_WiFi                        int32
Price_Range                     int32
dtype: object

In [50]:
float(i) for i in range(float(0.5,3.1))

SyntaxError: invalid syntax (<ipython-input-50-c8edba608f8f>, line 1)