# SVM classification with Cancer datasset
- 2020.05.15

## 1. import packages

In [30]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPClassifier
from sklearn.preprocessing import MinMaxScaler

## 2. Load dataset

In [4]:
dataset = pd.read_csv('cancer.csv')

# Explore dataset
dataset.head()

Unnamed: 0,mean radius,mean texture,mean perimeter,mean area,mean smoothness,mean compactness,mean concavity,mean concave points,mean symmetry,mean fractal dimension,...,worst texture,worst perimeter,worst area,worst smoothness,worst compactness,worst concavity,worst concave points,worst symmetry,worst fractal dimension,benign
0,18.0,10.4,123.0,1000.0,0.118,0.278,0.3,0.147,0.242,0.0787,...,17.3,185.0,2020.0,0.162,0.666,0.712,0.265,0.46,0.119,0.0
1,20.6,17.8,133.0,1330.0,0.0847,0.0786,0.0869,0.0702,0.181,0.0567,...,23.4,159.0,1960.0,0.124,0.187,0.242,0.186,0.275,0.089,0.0
2,19.7,21.3,130.0,1200.0,0.11,0.16,0.197,0.128,0.207,0.06,...,25.5,153.0,1710.0,0.144,0.424,0.45,0.243,0.361,0.0876,0.0
3,11.4,20.4,77.6,386.0,0.142,0.284,0.241,0.105,0.26,0.0974,...,26.5,98.9,568.0,0.21,0.866,0.687,0.258,0.664,0.173,0.0
4,20.3,14.3,135.0,1300.0,0.1,0.133,0.198,0.104,0.181,0.0588,...,16.7,152.0,1580.0,0.137,0.205,0.4,0.163,0.236,0.0768,0.0


In [5]:
print("The shape of data :", dataset.shape)

The shape of data : (569, 31)


## 3. Make Dataset

In [6]:
X = dataset.iloc[:, 0:30]
Y = dataset.iloc[:, 30]

In [7]:
X.head()

Unnamed: 0,mean radius,mean texture,mean perimeter,mean area,mean smoothness,mean compactness,mean concavity,mean concave points,mean symmetry,mean fractal dimension,...,worst radius,worst texture,worst perimeter,worst area,worst smoothness,worst compactness,worst concavity,worst concave points,worst symmetry,worst fractal dimension
0,18.0,10.4,123.0,1000.0,0.118,0.278,0.3,0.147,0.242,0.0787,...,25.4,17.3,185.0,2020.0,0.162,0.666,0.712,0.265,0.46,0.119
1,20.6,17.8,133.0,1330.0,0.0847,0.0786,0.0869,0.0702,0.181,0.0567,...,25.0,23.4,159.0,1960.0,0.124,0.187,0.242,0.186,0.275,0.089
2,19.7,21.3,130.0,1200.0,0.11,0.16,0.197,0.128,0.207,0.06,...,23.6,25.5,153.0,1710.0,0.144,0.424,0.45,0.243,0.361,0.0876
3,11.4,20.4,77.6,386.0,0.142,0.284,0.241,0.105,0.26,0.0974,...,14.9,26.5,98.9,568.0,0.21,0.866,0.687,0.258,0.664,0.173
4,20.3,14.3,135.0,1300.0,0.1,0.133,0.198,0.104,0.181,0.0588,...,22.5,16.7,152.0,1580.0,0.137,0.205,0.4,0.163,0.236,0.0768


In [8]:
Y.head()

0    0.0
1    0.0
2    0.0
3    0.0
4    0.0
Name: benign, dtype: float64

In [9]:
print("The shape of X :", X.shape)
print("The shape of Y :", Y.shape)

The shape of X : (569, 30)
The shape of Y : (569,)


In [10]:
x_train, x_test, y_train, y_test = train_test_split(X, Y, stratify = Y, random_state = 0)

In [11]:
# Check
print(y_train.shape)
print(y_test.shape)

(426,)
(143,)


## 4. MLP

In [85]:
mlp = MLPClassifier(random_state=42)
mlp.fit(x_train, y_train)

MLPClassifier(activation='relu', alpha=0.0001, batch_size='auto', beta_1=0.9,
              beta_2=0.999, early_stopping=False, epsilon=1e-08,
              hidden_layer_sizes=(100,), learning_rate='constant',
              learning_rate_init=0.001, max_iter=200, momentum=0.9,
              n_iter_no_change=10, nesterovs_momentum=True, power_t=0.5,
              random_state=42, shuffle=True, solver='adam', tol=0.0001,
              validation_fraction=0.1, verbose=False, warm_start=False)

## 5. Accuracy Analysis

In [86]:
print("prediction :", mlp.predict(x_test))
print("train accuracy :", mlp.score(x_train, y_train))
print("test accuracy :", mlp.score(x_test, y_test))

prediction : [1. 0. 0. 1. 0. 1. 0. 0. 0. 1. 0. 0. 1. 1. 1. 1. 1. 0. 1. 0. 1. 0. 1. 1.
 1. 0. 1. 1. 1. 0. 1. 1. 1. 1. 1. 1. 0. 0. 0. 1. 0. 0. 0. 0. 1. 1. 1. 0.
 1. 1. 0. 1. 1. 1. 1. 1. 1. 1. 1. 1. 0. 1. 1. 0. 1. 0. 0. 1. 1. 0. 1. 1.
 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 0. 0. 0. 1. 1. 1. 1. 1. 1. 1. 0. 1. 0. 0.
 1. 1. 1. 0. 0. 1. 0. 1. 1. 1. 0. 0. 1. 1. 1. 0. 1. 0. 1. 1. 1. 1. 1. 1.
 1. 1. 1. 1. 0. 1. 1. 0. 1. 1. 1. 0. 1. 1. 1. 1. 0. 1. 0. 1. 0. 1. 0.]
train accuracy : 0.9389671361502347
test accuracy : 0.9300699300699301


## 6. Underfit? (high bias)

In [87]:
mlp1 = MLPClassifier(max_iter=1000, random_state=42)  # trains longer
mlp1.fit(x_train, y_train)

MLPClassifier(activation='relu', alpha=0.0001, batch_size='auto', beta_1=0.9,
              beta_2=0.999, early_stopping=False, epsilon=1e-08,
              hidden_layer_sizes=(100,), learning_rate='constant',
              learning_rate_init=0.001, max_iter=1000, momentum=0.9,
              n_iter_no_change=10, nesterovs_momentum=True, power_t=0.5,
              random_state=42, shuffle=True, solver='adam', tol=0.0001,
              validation_fraction=0.1, verbose=False, warm_start=False)

In [88]:
print("prediction :", mlp1.predict(x_test))
print("train accuracy :", mlp1.score(x_train, y_train))
print("test accuracy :", mlp1.score(x_test, y_test))

prediction : [1. 0. 0. 1. 0. 1. 0. 0. 0. 1. 0. 0. 1. 1. 1. 1. 1. 0. 1. 0. 1. 0. 1. 1.
 1. 0. 1. 1. 1. 0. 1. 1. 1. 1. 1. 1. 0. 0. 0. 1. 0. 0. 0. 0. 1. 1. 1. 0.
 1. 1. 0. 1. 1. 1. 1. 1. 1. 1. 1. 1. 0. 1. 1. 0. 1. 0. 0. 1. 1. 0. 1. 1.
 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 0. 0. 0. 1. 1. 1. 1. 1. 1. 1. 0. 1. 0. 0.
 1. 1. 1. 0. 0. 1. 0. 1. 1. 1. 0. 0. 1. 1. 1. 0. 1. 0. 1. 1. 1. 1. 1. 1.
 1. 1. 1. 1. 0. 1. 1. 0. 1. 1. 1. 0. 1. 1. 1. 1. 0. 1. 0. 1. 0. 1. 0.]
train accuracy : 0.9389671361502347
test accuracy : 0.9300699300699301


In [97]:
# hidden layer modification
mlp2 = MLPClassifier(max_iter=200, hidden_layer_sizes=[100,100,100], random_state=42) 
mlp2.fit(x_train, y_train)

MLPClassifier(activation='relu', alpha=0.0001, batch_size='auto', beta_1=0.9,
              beta_2=0.999, early_stopping=False, epsilon=1e-08,
              hidden_layer_sizes=[100, 100, 100], learning_rate='constant',
              learning_rate_init=0.001, max_iter=200, momentum=0.9,
              n_iter_no_change=10, nesterovs_momentum=True, power_t=0.5,
              random_state=42, shuffle=True, solver='adam', tol=0.0001,
              validation_fraction=0.1, verbose=False, warm_start=False)

In [98]:
print("prediction :", mlp2.predict(x_test))
print("train accuracy :", mlp2.score(x_train, y_train))
print("test accuracy :", mlp2.score(x_test, y_test))

prediction : [1. 0. 0. 1. 0. 1. 0. 0. 0. 1. 0. 0. 1. 1. 1. 1. 1. 0. 0. 0. 1. 0. 1. 1.
 1. 0. 1. 1. 1. 0. 1. 1. 1. 1. 1. 1. 0. 0. 0. 1. 0. 0. 0. 0. 1. 1. 1. 1.
 1. 1. 0. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 0. 1. 0. 0. 1. 1. 0. 1. 1.
 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 0. 0. 0. 1. 1. 1. 1. 1. 1. 1. 0. 1. 0. 0.
 1. 1. 1. 0. 0. 1. 0. 1. 1. 1. 0. 0. 1. 1. 1. 0. 1. 0. 1. 1. 1. 1. 1. 1.
 1. 1. 1. 1. 0. 1. 1. 0. 1. 1. 1. 0. 1. 1. 1. 1. 0. 1. 0. 1. 0. 1. 0.]
train accuracy : 0.9154929577464789
test accuracy : 0.9370629370629371
