In [1]:
import numpy as np
from sklearn import datasets
from sklearn.base import ClassifierMixin
from sklearn.neighbors.base import SupervisedIntegerMixin
from sklearn.neighbors.base import NeighborsBase,KNeighborsMixin

In [2]:
breast_cancer = datasets.load_breast_cancer()
breast_cancer_X = breast_cancer.data
breast_cancer_y = breast_cancer.target

In [3]:
print breast_cancer_X.shape

(569, 30)


In [4]:
print breast_cancer_y.shape
np.unique(breast_cancer_y)

(569,)


array([0, 1])

In [5]:
class Majority(SupervisedIntegerMixin, ClassifierMixin,NeighborsBase, KNeighborsMixin,):
    
    def __init__(self,estimators=None,voting='hard'):
        self.voting=voting
        self.result=None
        self.estimators=estimators
    
    def fit(self,train_X,train_y):
        unique_label,count_label = np.unique(train_y,return_counts=True)
        max_index=np.argmax(count_label)
        self.result=unique_label[max_index]
    
    def predict(self,X):
        nb_element,nb_feature=X.shape
        Y_predict = np.zeros(nb_element)
        for i in range(0,nb_element):
            Y_predict[i]=self.result
        return Y_predict

In [6]:
breast_cancer_train_X = breast_cancer_X[:400]
breast_cancer_train_y = breast_cancer_y[:400]
breast_cancer_test_X = breast_cancer_X[400:]
breast_cancer_test_y = breast_cancer_y[400:]

In [7]:
mj = Majority()
mj.fit(breast_cancer_train_X,breast_cancer_train_y)
Y_predict=mj.predict(breast_cancer_test_X)
number_element = breast_cancer_test_X.shape[0]
acc=0.0
for i in range(0,number_element):
    if Y_predict[i]==breast_cancer_test_y[i]:
        acc+=1
acc=(acc/float(number_element))
print acc

0.769230769231


In [8]:
from sklearn.model_selection import cross_val_score
knn_score=cross_val_score(mj,breast_cancer_X,breast_cancer_y)
knn_score.mean()

0.62742040285899936

In [9]:
unique_label,count_label = np.unique(breast_cancer_y,return_counts=True)

In [10]:
print count_label

[212 357]


Because in this case, we have 357 positive and 212 negative, so when we indenpended uniform to select a data, we could get 63% accuracy.

In [11]:
from sklearn.linear_model.base import BaseEstimator,LinearClassifierMixin,SparseCoefMixin

In [12]:
def sigmoid(z):
    return 1.0/(1.0+np.exp(-z))

In [24]:
class LRlinear(BaseEstimator,LinearClassifierMixin,SparseCoefMixin):
    def __init__(self,alpha=0.1,maxiter=10000):
        self.alpha=alpha
        self.maxiter=maxiter
        self.w=None
        
    def fit(self,X,y):
        self.X_train = np.hstack(((np.ones((X.shape[0],1))),X))
        self.m,self.n = self.X_train.shape
        self.y_train = y.reshape((y.shape[0]),1)
        self.w = np.ones((self.n,1))
        
        for k in range(self.maxiter):
            hw=sigmoid(np.dot(self.X_train,self.w))
            difference = self.y_train-hw
            self.w = self.w+self.alpha*np.transpose(self.X_train).dot(difference)
        
    def predict(self,X):
        X=np.hstack(((np.ones((X.shape[0],1))),X))
        hw = sigmoid(np.dot(X,self.w))
        Y_pre = np.zeros(len(X))
        for i in range(len(X)):
            if hw[i]>0.5:
                Y_pre[i]=1
            else:
                Y_pre[i]=0
        return Y_pre

In [27]:
LR = LRlinear()
LR.fit(breast_cancer_train_X,breast_cancer_train_y)
Y_predict=LR.predict(breast_cancer_test_X)
number_element = breast_cancer_test_X.shape[0]
acc=0.0
for i in range(0,number_element):
    if Y_predict[i]==breast_cancer_test_y[i]:
        acc+=1
acc=(acc/float(number_element))
print acc

  from ipykernel import kernelapp as app


0.881656804734


In [28]:
LR=LRlinear(alpha=0.1)
LR_score=cross_val_score(LR,breast_cancer_X,breast_cancer_y)

LR_score.mean()

  from ipykernel import kernelapp as app


0.91562238930659978

In [15]:
from sklearn.linear_model import LogisticRegression
LRr = LogisticRegression()
LRr_score=cross_val_score(LRr,breast_cancer_X,breast_cancer_y)

LRr_score.mean()

0.94902069989789295