In [120]:
import xlrd
import numpy as np
import matplotlib.pyplot as plt
import operator
import pandas as pd
from sklearn.model_selection import train_test_split
plt.rcParams["font.sans-serif"]=["SimHei"] #设置字体
plt.rcParams["axes.unicode_minus"]=False #该语句解决图像中的“-”负号的乱码问题

class KNN():
    def __init__(self, k):
        self.k = k

    def fit(self, X, y):
        self.N = len(X)
        self.d = len(X[0])
        self.X = X
        self.y = y

    def predict(self, x):
        neighbors, neighbors_labels = self.k_neighbors(x)
        #print "neightbors", neighbors
        votes = {}
        for label in neighbors_labels:
            if label not in votes:
                votes[label] = 1
            else:
                votes[label] += 1
        sorted_votes = sorted(votes.items(), key=operator.itemgetter(1), reverse=True)
        return sorted_votes[0][0]

    def predicts(self, X):
        labels = []
        for x in X:
            label = self.predict(x)
            labels.append(label)
        return labels

    def k_neighbors(self, x):
        distances = []
        for i in range(self.N):
            dist = self.distance(x, self.X[i])
            distances.append([self.X[i], self.y[i], dist])
        distances.sort(key=operator.itemgetter(2))
        neighbors = []
        neighbors_labels = []
        for k in range(self.k):
            neighbors.append(distances[k][0])
            neighbors_labels.append(distances[k][1])
        return neighbors, neighbors_labels

    def distance(self, x, y):
        sum = 0
        for i in range(self.d):
            sum += (x[i]-y[i])**2
        return sum

def plot_desicion_boundary(X, y, knn):
    x_min = np.array(X)[:, 0].min() - 0.1
    x_max = np.array(X)[:, 0].max() + 0.1
    y_min = np.array(X)[:, 1].min() - 0.1
    y_max = np.array(X)[:, 1].max() + 0.1
    xx, yy = np.meshgrid(np.arange(x_min, x_max, 0.01), np.arange(y_min, y_max, 0.01))
    print(np.vstack([xx.ravel(), yy.ravel()]).T.tolist())
    Z = knn.predicts(np.vstack([xx.ravel(), yy.ravel()]).T.tolist())
    Z = np.array(Z).reshape(xx.shape)
    print (Z)
    f, axarr = plt.subplots(1, 1, figsize=(10, 8))
    axarr.contourf(xx, yy, Z, alpha=0.4)
    axarr.scatter(np.array(X)[:, 0], np.array(X)[:, 1], c=y, s=10, edgecolor='k')
    axarr.set_title("KNN (k={})".format(knn.k))
    plt.savefig("k={}.png".format(knn.k))
    plt.show()


In [121]:
class Bagging:
    def __init__(self,base_learner="knn",n_learners=10,K=5):
        #self.learners=[base_learner for _ in range (n_learners)]
        self.learners=[KNN(K) for _ in range (n_learners)]
    
    def fit(self,X,y):
        for learner in self.learners:
            examples=np.random.choice(np.array(len(X)),int(len(X)),replace=True)
            learner.fit(X[examples,:],y[examples])

    def predict(self,X):
        preds=[learner.predicts(X) for learner in self.learners]
        return np.array(preds).mean(axis=0)

In [122]:
X=pd.read_csv("X.csv",index_col=0)
y=pd.read_csv("y.csv",index_col=0)
y=y["status"]
m,n=X.shape
X_train, X_test, y_train, y_test = train_test_split(X, y,train_size=0.8,random_state=1)

In [123]:
X_train=X_train.values
y_train=y_train.values
X_test=X_test.values
y_test=y_test.values

In [124]:

knn = KNN(3)
knn.fit(X_train, y_train)
y_pred=knn.predicts(X_test)
print("Accurancy:",np.mean(y_pred==y_test))

Accurancy: 0.7619047619047619


In [125]:
knn_bagging=Bagging('knn',25,3)
knn_bagging.fit(X_train,y_train)
preds=knn_bagging.predict(X_test)
preds[preds>=0.5]=1
preds[preds<0.5]=0
print(preds)
print("Accurancy:",np.mean(preds==y_test))

[1. 1. 1. 0. 1. 1. 1. 1. 1. 0. 1. 1. 1. 1. 1. 1. 1. 1. 0. 1. 0. 1. 1. 1.
 1. 1. 0. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 0. 1.]
Accurancy: 0.7857142857142857
