# CLASSIFICATION:

# 2. Breast Cancer Dataset

# K-Nearest Neighbor Algorithm

In [1]:
import numpy as np
from sklearn import datasets

In [2]:
breast_cancer=datasets.load_breast_cancer()
x=breast_cancer.data
y=breast_cancer.target

In [3]:
len(x)

569

In [4]:
len(y)

569

In [5]:
x_tr=breast_cancer.data[:-100]
y_tr=breast_cancer.target[:-100]
x_te=breast_cancer.data[-100:]
y_te=breast_cancer.data[-100:]

In [6]:
len(x_tr)

469

In [7]:
from sklearn.neighbors import KNeighborsClassifier
knn=KNeighborsClassifier()
knn.fit(x_tr,y_tr)

KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
           metric_params=None, n_jobs=None, n_neighbors=5, p=2,
           weights='uniform')

In [8]:
y_prr=knn.predict(x_te)
y_prr

array([1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1,
       0, 0, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0,
       1, 0, 1, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1,
       0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1])

In [9]:
y_te

array([[1.162e+01, 1.818e+01, 7.638e+01, ..., 1.416e-01, 2.660e-01,
        9.270e-02],
       [9.667e+00, 1.849e+01, 6.149e+01, ..., 6.560e-02, 3.174e-01,
        8.524e-02],
       [1.204e+01, 2.814e+01, 7.685e+01, ..., 5.547e-02, 2.404e-01,
        6.639e-02],
       ...,
       [1.660e+01, 2.808e+01, 1.083e+02, ..., 1.418e-01, 2.218e-01,
        7.820e-02],
       [2.060e+01, 2.933e+01, 1.401e+02, ..., 2.650e-01, 4.087e-01,
        1.240e-01],
       [7.760e+00, 2.454e+01, 4.792e+01, ..., 0.000e+00, 2.871e-01,
        7.039e-02]])

# *Performance check-

# 1. Confusion matrix

In [10]:
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score
cnf_matrix=confusion_matrix(y_te,y_prr)
print(cnf_matrix)

ValueError: Classification metrics can't handle a mix of continuous-multioutput and binary targets

In [11]:
acc_sc=accuracy_score(y_te,y_prr)
print(acc_sc)

ValueError: Classification metrics can't handle a mix of continuous-multioutput and binary targets

### Randomization to get better accuracy

In [12]:
from sklearn import datasets
import numpy as np
np.random.seed(0)
indices=np.random.permutation(len(x))
breast_cancer=datasets.load_breast_cancer()
x=breast_cancer.data
y=breast_cancer.target
x_rt=x[indices[:-100]]
y_rt=y[indices[:-100]]
x_xc=x[indices[-100:]]
y_xc=y[indices[-100:]]
from sklearn.neighbors import KNeighborsClassifier
knn=KNeighborsClassifier()
knn.fit(x_rt,y_rt)
y_hg=knn.predict(x_xc)
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score
cnf_matrix=confusion_matrix(y_xc,y_hg)
print(cnf_matrix)
acc_sc=accuracy_score(y_xc,y_hg)
print(acc_sc)

[[36  4]
 [ 0 60]]
0.96


### With seed 1

In [13]:
from sklearn import datasets
import numpy as np
np.random.seed(1)
indices=np.random.permutation(len(x))
breast_cancer=datasets.load_breast_cancer()
x=breast_cancer.data
y=breast_cancer.target
x_rt=x[indices[:-100]]
y_rt=y[indices[:-100]]
x_xc=x[indices[-100:]]
y_xc=y[indices[-100:]]
from sklearn.neighbors import KNeighborsClassifier
knn=KNeighborsClassifier()
knn.fit(x_rt,y_rt)
y_hg=knn.predict(x_xc)
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score
cnf_matrix=confusion_matrix(y_xc,y_hg)
print(cnf_matrix)
acc_sc=accuracy_score(y_xc,y_hg)
print(acc_sc)

[[35  5]
 [ 2 58]]
0.93


# 2. 10 cross validation method

In [14]:
x_folds=np.array_split(x,10)
y_folds=np.array_split(y,10)
scores=list()   

for k in range(10):
    x_tr=list(x_folds)
    x_te=x_tr.pop(k)           
    x_tr=np.concatenate(x_tr)
    y_tr=list(y_folds)
    y_te=y_tr.pop(k)
    y_tr=np.concatenate(y_tr)
    scores.append(knn.fit(x_tr,y_tr).score(x_te,y_te))
print(scores)

[0.8070175438596491, 0.9298245614035088, 0.9298245614035088, 0.8947368421052632, 0.9824561403508771, 0.9473684210526315, 0.9473684210526315, 0.9473684210526315, 0.9122807017543859, 0.9642857142857143]


# Decision Tree

In [15]:
from sklearn.tree import DecisionTreeClassifier
decision_tree=DecisionTreeClassifier()
from sklearn import datasets

In [16]:
breast_cancer=datasets.load_breast_cancer()
x=breast_cancer.data
y=breast_cancer.target

In [17]:
x_tk=breast_cancer.data[:-100]
y_tk=breast_cancer.target[:-100]
x_tl=breast_cancer.data[-100:]
y_tl=breast_cancer.data[-100:]

In [18]:
decision_tree.fit(x_tk,y_tk)

DecisionTreeClassifier(class_weight=None, criterion='gini', max_depth=None,
            max_features=None, max_leaf_nodes=None,
            min_impurity_decrease=0.0, min_impurity_split=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.0, presort=False, random_state=None,
            splitter='best')

In [19]:
y_py=decision_tree.predict(x_tl)
y_py

array([0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1,
       0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 1, 1, 0,
       1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1,
       0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1])

In [20]:
y_tl

array([[1.162e+01, 1.818e+01, 7.638e+01, ..., 1.416e-01, 2.660e-01,
        9.270e-02],
       [9.667e+00, 1.849e+01, 6.149e+01, ..., 6.560e-02, 3.174e-01,
        8.524e-02],
       [1.204e+01, 2.814e+01, 7.685e+01, ..., 5.547e-02, 2.404e-01,
        6.639e-02],
       ...,
       [1.660e+01, 2.808e+01, 1.083e+02, ..., 1.418e-01, 2.218e-01,
        7.820e-02],
       [2.060e+01, 2.933e+01, 1.401e+02, ..., 2.650e-01, 4.087e-01,
        1.240e-01],
       [7.760e+00, 2.454e+01, 4.792e+01, ..., 0.000e+00, 2.871e-01,
        7.039e-02]])

# *Performance Check-

# 1.Confusion matrix

In [23]:
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score

cnf_matrix=confusion_matrix(y_tl,y_py)
print(cnf_matrix)

ValueError: Classification metrics can't handle a mix of continuous-multioutput and binary targets

In [3]:
acc_sc=accuracy_score(y_tl, y_py)
print(acc_sc)

NameError: name 'accuracy_score' is not defined

### Randomization to get better accuracy

In [21]:
from sklearn import datasets
import numpy as np
np.random.seed(0)
indices=np.random.permutation(len(x))
breast_cancer=datasets.load_breast_cancer()
x=breast_cancer.data
y=breast_cancer.target
x_vb=x[indices[:-100]]
y_vb=y[indices[:-100]]
x_zb=x[indices[-100:]]
y_zb=y[indices[-100:]]
from sklearn.tree import DecisionTreeClassifier
decision_tree=DecisionTreeClassifier()
decision_tree.fit(x_vb,y_vb)
y_cb=decision_tree.predict(x_zb)
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score
cnf_matrix=confusion_matrix(y_zb,y_cb)
print(cnf_matrix)
acc_sc=accuracy_score(y_zb,y_cb)
print(acc_sc)

[[39  1]
 [ 4 56]]
0.95


### With seed 1

In [22]:
from sklearn import datasets
import numpy as np
np.random.seed(1)
indices=np.random.permutation(len(x))
breast_cancer=datasets.load_breast_cancer()
x=breast_cancer.data
y=breast_cancer.target
x_vb=x[indices[:-100]]
y_vb=y[indices[:-100]]
x_zb=x[indices[-100:]]
y_zb=y[indices[-100:]]
from sklearn.tree import DecisionTreeClassifier
decision_tree=DecisionTreeClassifier()
decision_tree.fit(x_vb,y_vb)
y_cb=decision_tree.predict(x_zb)
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score
cnf_matrix=confusion_matrix(y_zb,y_cb)
print(cnf_matrix)
acc_sc=accuracy_score(y_zb,y_cb)
print(acc_sc)

[[35  5]
 [ 1 59]]
0.94


### With seed 2

In [23]:
from sklearn import datasets
import numpy as np
np.random.seed(2)
indices=np.random.permutation(len(x))
breast_cancer=datasets.load_breast_cancer()
x=breast_cancer.data
y=breast_cancer.target
x_vb=x[indices[:-100]]
y_vb=y[indices[:-100]]
x_zb=x[indices[-100:]]
y_zb=y[indices[-100:]]
from sklearn.tree import DecisionTreeClassifier
decision_tree=DecisionTreeClassifier()
decision_tree.fit(x_vb,y_vb)
y_cb=decision_tree.predict(x_zb)
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score
cnf_matrix=confusion_matrix(y_zb,y_cb)
print(cnf_matrix)
acc_sc=accuracy_score(y_zb,y_cb)
print(acc_sc)

[[32  5]
 [ 1 62]]
0.94


# 2. 10 cross validation method

In [25]:
x_folds=np.array_split(x,10)
y_folds=np.array_split(y,10)
scores=list()   

for k in range(10):
    x_tk=list(x_folds)
    x_tl=x_tk.pop(k)           
    x_tk=np.concatenate(x_tk)
    y_tk=list(y_folds)
    y_tl=y_tk.pop(k)
    y_tk=np.concatenate(y_tk)
    scores.append(decision_tree.fit(x_tk,y_tk).score(x_tl,y_tl))
print(scores)

[0.9649122807017544, 0.9122807017543859, 0.8771929824561403, 0.9649122807017544, 0.9122807017543859, 0.9824561403508771, 0.8947368421052632, 0.9473684210526315, 0.9824561403508771, 0.8928571428571429]


# Naive-Baye's Classification

In [24]:
from sklearn.naive_bayes import GaussianNB
naive_bayes=GaussianNB()
from sklearn import datasets

In [25]:
breast_cancer=datasets.load_breast_cancer()
x=breast_cancer.data
y=breast_cancer.target

In [26]:
x_to=breast_cancer.data[:-100]
y_to=breast_cancer.target[:-100]
x_tp=breast_cancer.data[-100:]
y_tp=breast_cancer.data[-100:]

In [27]:
naive_bayes.fit(x_to,y_to)

GaussianNB(priors=None, var_smoothing=1e-09)

In [28]:
y_pb=naive_bayes.predict(x_tp)
y_pb

array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1,
       0, 0, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 0,
       1, 1, 1, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1,
       0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1])

In [29]:
y_tp

array([[1.162e+01, 1.818e+01, 7.638e+01, ..., 1.416e-01, 2.660e-01,
        9.270e-02],
       [9.667e+00, 1.849e+01, 6.149e+01, ..., 6.560e-02, 3.174e-01,
        8.524e-02],
       [1.204e+01, 2.814e+01, 7.685e+01, ..., 5.547e-02, 2.404e-01,
        6.639e-02],
       ...,
       [1.660e+01, 2.808e+01, 1.083e+02, ..., 1.418e-01, 2.218e-01,
        7.820e-02],
       [2.060e+01, 2.933e+01, 1.401e+02, ..., 2.650e-01, 4.087e-01,
        1.240e-01],
       [7.760e+00, 2.454e+01, 4.792e+01, ..., 0.000e+00, 2.871e-01,
        7.039e-02]])

# *Performance check

# 1. Confusion matrix

In [32]:
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score

cnf_matrix=confusion_matrix(y_tp,y_pb)
print(cnf_matrix)

ValueError: Classification metrics can't handle a mix of continuous-multioutput and binary targets

In [6]:
acc_sc=accuracy_score(y_tp, y_pb)
print(acc_sc)

NameError: name 'accuracy_score' is not defined

### Randomization to get better accuracy

In [30]:
from sklearn import datasets
import numpy as np
np.random.seed(0)
indices=np.random.permutation(len(x))
breast_cancer=datasets.load_breast_cancer()
x=breast_cancer.data
y=breast_cancer.target
x_qw=x[indices[:-100]]
y_qw=y[indices[:-100]]
x_rw=x[indices[-100:]]
y_rw=y[indices[-100:]]
from sklearn.naive_bayes import GaussianNB
naive_bayes=GaussianNB()
naive_bayes.fit(x_qw,y_qw)
y_bn=naive_bayes.predict(x_rw)
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score
cnf_matrix=confusion_matrix(y_rw,y_bn)
print(cnf_matrix)
acc_sc=accuracy_score(y_rw,y_bn)
print(acc_sc)

[[37  3]
 [ 1 59]]
0.96


### With seed 1

In [31]:
from sklearn import datasets
import numpy as np
np.random.seed(1)
indices=np.random.permutation(len(x))
breast_cancer=datasets.load_breast_cancer()
x=breast_cancer.data
y=breast_cancer.target
x_qw=x[indices[:-100]]
y_qw=y[indices[:-100]]
x_rw=x[indices[-100:]]
y_rw=y[indices[-100:]]
from sklearn.naive_bayes import GaussianNB
naive_bayes=GaussianNB()
naive_bayes.fit(x_qw,y_qw)
y_bn=naive_bayes.predict(x_rw)
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score
cnf_matrix=confusion_matrix(y_rw,y_bn)
print(cnf_matrix)
acc_sc=accuracy_score(y_rw,y_bn)
print(acc_sc)

[[35  5]
 [ 0 60]]
0.95


# 2. 10 cross validation method

In [32]:
x_folds=np.array_split(x,10)
y_folds=np.array_split(y,10)
scores=list()   

for k in range(10):
    x_to=list(x_folds)
    x_tp=x_to.pop(k)           
    x_to=np.concatenate(x_to)
    y_to=list(y_folds)
    y_tp=y_to.pop(k)
    y_to=np.concatenate(y_to)
    scores.append(naive_bayes.fit(x_to,y_to).score(x_tp,y_tp))
print(scores)

[0.8947368421052632, 0.8596491228070176, 0.9122807017543859, 0.9298245614035088, 0.9473684210526315, 0.9649122807017544, 0.9824561403508771, 0.9649122807017544, 0.9473684210526315, 0.9642857142857143]


# Support Vector Machine

In [33]:
from sklearn import datasets, svm

In [34]:
breast_cancer=datasets.load_breast_cancer()
x=breast_cancer.data
y=breast_cancer.target

In [35]:
x_tw=breast_cancer.data[:-100]
y_tw=breast_cancer.target[:-100]
x_r=breast_cancer.data[-100:]
y_r=breast_cancer.target[-100:]

In [36]:
svc=svm.SVC(C=1,kernel='linear')
svc.fit(x_tw[:-100],y_tw[:-100]) 

SVC(C=1, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma='auto_deprecated',
  kernel='linear', max_iter=-1, probability=False, random_state=None,
  shrinking=True, tol=0.001, verbose=False)

In [37]:
y_pg=svc.predict(x_r)
y_pg

array([1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1,
       0, 0, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 0,
       1, 0, 1, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1,
       0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1])

In [38]:
y_r

array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1,
       1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 0,
       1, 0, 1, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1,
       0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1])

# *Performance check

# 1. Confusion matrix

In [39]:
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score

cnf_matrix=confusion_matrix(y_r ,y_pg)
print(cnf_matrix)

[[23  0]
 [ 4 73]]


In [40]:
acc_sc=accuracy_score(y_r, y_pg)
print(acc_sc)

0.96


# 2. 10 cross validation method

In [41]:
x_folds=np.array_split(x,10)
y_folds=np.array_split(y,10)
scores=list()   

for k in range(10):
    x_tw=list(x_folds)
    x_r=x_tw.pop(k)           
    x_tw=np.concatenate(x_tw)
    y_tw=list(y_folds)
    y_r=y_tw.pop(k)
    y_tw=np.concatenate(y_tw)
    scores.append(svc.fit(x_tw,y_tw).score(x_r,y_r))
print(scores)

[0.9122807017543859, 0.9298245614035088, 0.9473684210526315, 0.9649122807017544, 0.9649122807017544, 0.9649122807017544, 0.9824561403508771, 0.9473684210526315, 0.9473684210526315, 0.9642857142857143]


# SVM with Radial Base Function Kernel (RBF)

In [42]:
import numpy as np
from sklearn import datasets,svm

In [43]:
breast_cancer=datasets.load_breast_cancer()
x=breast_cancer.data
y=breast_cancer.target

In [44]:
x_trb=breast_cancer.data[:-100]
y_trb=breast_cancer.target[:-100]
x_twe=breast_cancer.data[-100:]
y_twe=breast_cancer.data[-100:]

In [45]:
svc=svm.SVC(C=1,kernel='rbf',gamma="auto")
svc.fit(x_trb[:-100],y_trb[:-100]) 

SVC(C=1, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma='auto', kernel='rbf',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False)

In [46]:
y_nm=svc.predict(x_twe)
y_nm

array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1])

In [47]:
y_twe

array([[1.162e+01, 1.818e+01, 7.638e+01, ..., 1.416e-01, 2.660e-01,
        9.270e-02],
       [9.667e+00, 1.849e+01, 6.149e+01, ..., 6.560e-02, 3.174e-01,
        8.524e-02],
       [1.204e+01, 2.814e+01, 7.685e+01, ..., 5.547e-02, 2.404e-01,
        6.639e-02],
       ...,
       [1.660e+01, 2.808e+01, 1.083e+02, ..., 1.418e-01, 2.218e-01,
        7.820e-02],
       [2.060e+01, 2.933e+01, 1.401e+02, ..., 2.650e-01, 4.087e-01,
        1.240e-01],
       [7.760e+00, 2.454e+01, 4.792e+01, ..., 0.000e+00, 2.871e-01,
        7.039e-02]])

# *Performance check

# 1. Confusion matrix

In [19]:
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score

cnf_matrix=confusion_matrix(y_twe,y_nm)
print(cnf_matrix)

ValueError: Classification metrics can't handle a mix of continuous-multioutput and binary targets

In [20]:
acc_sc=accuracy_score(y_twe, y_nm)
print(acc_sc)

ValueError: Classification metrics can't handle a mix of continuous-multioutput and binary targets

### Randomization to get better accuracy

In [48]:
from sklearn import datasets,svm
import numpy as np
np.random.seed(0)
indices=np.random.permutation(len(x))
breast_cancer=datasets.load_breast_cancer()
x=breast_cancer.data
y=breast_cancer.target
x_zc=x[indices[:-100]]
y_zc=y[indices[:-100]]
x_hk=x[indices[-100:]]
y_hk=y[indices[-100:]]
svc=svm.SVC(C=1,kernel='rbf')
svc.fit(x_zc,y_zc)
y_we=svc.predict(x_hk)
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score
cnf_matrix=confusion_matrix(y_hk,y_we)
print(cnf_matrix)
acc_sc=accuracy_score(y_hk,y_we)
print(acc_sc)

[[ 0 40]
 [ 0 60]]
0.6




### With seed 1

In [49]:
from sklearn import datasets,svm
import numpy as np
np.random.seed(1)
indices=np.random.permutation(len(x))
breast_cancer=datasets.load_breast_cancer()
x=breast_cancer.data
y=breast_cancer.target
x_zc=x[indices[:-100]]
y_zc=y[indices[:-100]]
x_hk=x[indices[-100:]]
y_hk=y[indices[-100:]]
svc=svm.SVC(C=1,kernel='rbf')
svc.fit(x_zc,y_zc)
y_we=svc.predict(x_hk)
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score
cnf_matrix=confusion_matrix(y_hk,y_we)
print(cnf_matrix)
acc_sc=accuracy_score(y_hk,y_we)
print(acc_sc)

[[ 0 40]
 [ 0 60]]
0.6




### With seed 10

In [50]:
from sklearn import datasets,svm
import numpy as np
np.random.seed(10)
indices=np.random.permutation(len(x))
breast_cancer=datasets.load_breast_cancer()
x=breast_cancer.data
y=breast_cancer.target
x_zc=x[indices[:-100]]
y_zc=y[indices[:-100]]
x_hk=x[indices[-100:]]
y_hk=y[indices[-100:]]
svc=svm.SVC(C=1,kernel='rbf')
svc.fit(x_zc,y_zc)
y_we=svc.predict(x_hk)
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score
cnf_matrix=confusion_matrix(y_hk,y_we)
print(cnf_matrix)
acc_sc=accuracy_score(y_hk,y_we)
print(acc_sc)

[[ 0 38]
 [ 0 62]]
0.62




# 2. 10 cross validation method

In [52]:
x_folds=np.array_split(x,10)
y_folds=np.array_split(y,10)
scores=list()   

for k in range(10):
    x_trb=list(x_folds)
    x_twe=x_trb.pop(k)           
    x_trb=np.concatenate(x_trb)
    y_trb=list(y_folds)
    y_twe=y_trb.pop(k)
    y_trb=np.concatenate(y_trb)
    scores.append(svc.fit(x_trb,y_trb).score(x_twe,y_twe))
print(scores)



[0.19298245614035087, 0.6140350877192983, 0.631578947368421, 0.5087719298245614, 0.5087719298245614, 0.7894736842105263, 0.7192982456140351, 0.7719298245614035, 0.7719298245614035, 0.7678571428571429]


In [81]:
from sklearn import datasets
import numpy as np
breast_cancer=datasets.load_breast_cancer()
x=breast_cancer.data
y=breast_cancer.target
np.random.seed(0)
indices=np.random.permutation(len(x))
x_xbn=x[indices[:-100]]
y_xbn=y[indices[:-100]]
x_xbc=x[indices[-100:]]
y_xbc=y[indices[-100:]]
from sklearn.neural_network import MLPClassifier
mlp=MLPClassifier()
mlp.fit(x_xbn,y_xbn)
y_pr_mlp=mlp.predict(x_xbc)
from sklearn.metrics import confusion_matrix
cnf_matrix=confusion_matrix(y_xbc,y_pr_mlp)
from sklearn.metrics import accuracy_score
print(cnf_matrix)
acc_sc=accuracy_score(y_xbc, y_pr_mlp)
print(acc_sc)        

[[33  7]
 [ 0 60]]
0.93


In [82]:
from sklearn import datasets
import numpy as np
breast_cancer=datasets.load_breast_cancer()
x=breast_cancer.data
y=breast_cancer.target
np.random.seed(0)
indices=np.random.permutation(len(x))
x_xbn=x[indices[:-100]]
y_xbn=y[indices[:-100]]
x_xbc=x[indices[-100:]]
y_xbc=y[indices[-100:]]
from sklearn.linear_model import Perceptron
sp=Perceptron()
sp.fit(x_xbn,y_xbn)
y_pr_sp=sp.predict(x_xbc)
from sklearn.metrics import confusion_matrix
cnf_matrix=confusion_matrix(y_xbc,y_pr_sp)
from sklearn.metrics import accuracy_score
print(cnf_matrix)
acc_sc=accuracy_score(y_xbc,y_pr_sp)
print(acc_sc)

[[30 10]
 [ 1 59]]
0.89


