In [1]:
import numpy as np
import pandas as pd
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import LabelEncoder, OneHotEncoder, MinMaxScaler, StandardScaler
from sklearn.metrics import accuracy_score

In [2]:
train_df = pd.read_csv('train.csv')
test_df = pd.read_csv('test.csv')

In [3]:
train_df.isnull().sum()

battery_power    0
blue             0
clock_speed      0
dual_sim         0
fc               0
four_g           0
int_memory       0
m_dep            0
mobile_wt        0
n_cores          0
pc               0
px_height        0
px_width         0
ram              0
sc_h             0
sc_w             0
talk_time        0
three_g          0
touch_screen     0
wifi             0
price_range      0
dtype: int64

In [4]:
test_df.isnull().sum()

battery_power    0
blue             0
clock_speed      0
dual_sim         0
fc               0
four_g           0
int_memory       0
m_dep            0
mobile_wt        0
n_cores          0
pc               0
px_height        0
px_width         0
ram              0
sc_h             0
sc_w             0
talk_time        0
three_g          0
touch_screen     0
wifi             0
price_range      0
dtype: int64

In [5]:
X_train = train_df.iloc[:, :-1]
y_train = train_df.iloc[:, -1]

X_test = test_df.iloc[:, :-1]
y_test = test_df.iloc[:, -1]

In [6]:
def add_columns(dataset):
    dataset.loc[ dataset['mobile_wt'] < 100, 'weigth_mobile'] = 1
    dataset.loc[(dataset['mobile_wt'] >= 100) & (dataset['mobile_wt'] < 130), 'weigth_mobile'] = 2
    dataset.loc[(dataset['mobile_wt'] >= 130) & (dataset['mobile_wt'] < 160), 'weigth_mobile'] = 3
    dataset.loc[(dataset['mobile_wt'] >= 160) & (dataset['mobile_wt'] <= 190), 'weigth_mobile'] = 4
    dataset.loc[ dataset['mobile_wt'] > 190, 'weigth_mobile'] = 5
    
    dataset.loc[ dataset['px_height'] < 300, 'px_height_rate'] = 1
    dataset.loc[(dataset['px_height'] >= 300) & (dataset['px_height'] < 600), 'px_height_rate'] = 2
    dataset.loc[(dataset['px_height'] >= 600) & (dataset['px_height'] < 900), 'px_height_rate'] = 3
    dataset.loc[(dataset['px_height'] >= 900) & (dataset['px_height'] <= 1200), 'px_height_rate'] = 4
    dataset.loc[(dataset['px_height'] >= 1200) & (dataset['px_height'] <= 1500), 'px_height_rate'] = 5
    dataset.loc[(dataset['px_height'] >= 1500) & (dataset['px_height'] <= 1800), 'px_height_rate'] = 6
    dataset.loc[ dataset['px_height'] > 1800, 'px_height_rate'] = 7
    
    dataset.loc[ dataset['px_width'] < 300, 'px_width_rate'] = 1
    dataset.loc[(dataset['px_width'] >= 300) & (dataset['px_width'] < 600), 'px_width_rate'] = 2
    dataset.loc[(dataset['px_width'] >= 600) & (dataset['px_width'] < 900), 'px_width_rate'] = 3
    dataset.loc[(dataset['px_width'] >= 900) & (dataset['px_width'] <= 1200), 'px_width_rate'] = 4
    dataset.loc[(dataset['px_width'] >= 1200) & (dataset['px_width'] <= 1500), 'px_width_rate'] = 5
    dataset.loc[(dataset['px_width'] >= 1500) & (dataset['px_width'] <= 1800), 'px_width_rate'] = 6
    dataset.loc[ dataset['px_width'] > 1800, 'px_width_rate'] = 7
    
#     dataset.loc[ dataset['ram'] < 512, 'ram_rate'] = 1
#     dataset.loc[(dataset['ram'] >= 512) & (dataset['ram'] < 1024), 'ram_rate'] = 2
#     dataset.loc[(dataset['ram'] >= 1024) & (dataset['ram'] < 2048), 'ram_rate'] = 3
#     dataset.loc[ dataset['ram'] >= 2048, 'ram_rate'] = 4
    
    return dataset

In [7]:
X_train = add_columns(X_train)
X_test = add_columns(X_test)

X_train.head()

Unnamed: 0,battery_power,blue,clock_speed,dual_sim,fc,four_g,int_memory,m_dep,mobile_wt,n_cores,...,ram,sc_h,sc_w,talk_time,three_g,touch_screen,wifi,weigth_mobile,px_height_rate,px_width_rate
0,1923,0,0.5,1,7,0,46,0.5,191,1,...,1489,10,9,3,1,1,1,5.0,3.0,6.0
1,633,1,2.2,0,0,1,49,0.1,139,8,...,3560,11,1,16,1,1,1,3.0,2.0,4.0
2,1236,0,0.9,1,2,1,57,0.1,188,1,...,1406,14,12,20,1,0,1,4.0,2.0,3.0
3,781,0,1.1,0,2,0,38,0.4,198,5,...,3508,13,8,5,0,0,1,5.0,2.0,6.0
4,1456,1,0.5,1,7,0,7,0.4,105,5,...,1587,6,5,20,1,0,1,2.0,3.0,4.0


In [8]:
X_train = X_train.values
y_train = y_train.values

X_test = X_test.values
y_test = y_test.values

In [9]:
def Normalize(X, y):
    minmax_scaler = MinMaxScaler()
    stander_scaler = StandardScaler()
    
    X[:, 0] = minmax_scaler.fit_transform(X[:, 0].reshape(-1, 1)).ravel() #battery_power
    X[:, 8] = minmax_scaler.fit_transform(X[:, 8].reshape(-1, 1)).ravel()#mobile_wt
    
    X[:, 11] = minmax_scaler.fit_transform(X[:, 11].reshape(-1, 1)).ravel()#px_height
    X[:, 12] = minmax_scaler.fit_transform(X[:, 12].reshape(-1, 1)).ravel() #px_width
    X[:, 13] = minmax_scaler.fit_transform(X[:, 13].reshape(-1, 1)).ravel()#ram
    
    return X, y

In [10]:
X_train, y_train = Normalize(X_train, y_train)
X_test, y_test = Normalize(X_test, y_test)

In [11]:
print(X_train.shape, y_train.shape, X_test.shape, y_test.shape)
print(X_train[0])

(1600, 23) (1600,) (400, 23) (400,)
[ 0.9498998   0.          0.5         1.          7.          0.
 46.          0.5         0.925       1.         10.          0.39132653
  0.84045394  0.32914439 10.          9.          3.          1.
  1.          1.          5.          3.          6.        ]


In [12]:
from sklearn.svm import SVC

# clf = SVC(kernel='linear', verbose=True)
clf = SVC(kernel='linear', C = 1000)
clf.fit(X_train, y_train)

SVC(C=1000, break_ties=False, cache_size=200, class_weight=None, coef0=0.0,
    decision_function_shape='ovr', degree=3, gamma='scale', kernel='linear',
    max_iter=-1, probability=False, random_state=None, shrinking=True,
    tol=0.001, verbose=False)

In [13]:
y_pred = clf.predict(X_test)

In [14]:
print("Accuracy_score:", accuracy_score(y_test, y_pred))

Accuracy_score: 0.975
