## 样本数据 data.csv

![jupyter](data.png)

In [15]:
import numpy as np
import matplotlib.pyplot as plt
import scipy.optimize as opt

In [16]:
def load_data(data_file):
    data = np.loadtxt(open(data_file, 'rb'), delimiter = ',', skiprows = 1, usecols = (1,2,3))
    X = data[:, 0:2]
    X = np.append(X, np.reshape(X[:,0] * X[:,1], (X.shape[0], 1)), axis = 1)
    Y = data[:, 2]
    Y = np.reshape(Y, (Y.shape[0], 1))
    return X, Y

In [17]:
def plot_data(X, Y, image):
    positive = np.where(Y == 1)
    negative = np.where(Y == 0)
    #plt.plot(X[positive,0], X[positive, 1], 'ro', label = 'positive')
    #plt.plot(X[negative, 0], X[negative, 1], 'gx', label = 'negative')
    plt.scatter(X[positive,0], X[positive, 1], marker = 'o', c = 'r')
    plt.scatter(X[negative, 0], X[negative, 1], marker = 'o', c = 'g')
    plt.xlabel('grade1')
    plt.ylabel('grade2')
    plt.legend(['positive', 'negative'])
    #plt.savefig(image)
    #plt.clf()

In [18]:
def sigmoid(x):
    rs = 1.0 / (1.0 + np.exp(-x))
    for (i, j), value in np.ndenumerate(rs):
        if value < 1.0e-10:
            rs[i][j] = 1.0e-10
        elif value > 1.0 - 1.0e-10:
            rs[i][j] = 1.0 - 1.0e-10
    return rs

In [19]:
def cost(theta, x, y, lam = 0.):
    m = x.shape[0]
    theta = np.reshape(theta, (len(theta), 1))
    lamb = theta.copy()
    lamb[0][0] = 0.
    J = (-1.0 / m) * (y.T.dot(np.log(sigmoid(x.dot(theta)))) + (1 - y).T.dot(np.log(1 - sigmoid(x.dot(theta))))) + lam / (2 * m) * lamb.T.dot(lamb)
    #grad = (1.0 / m) * (np.transpose(x).dot(sigmoid(x.dot(theta) - y)))# + (lam / m) * lamb
    #print J
    return J[0][0]

In [20]:
def grad(theta, x, y, lam = 0.):
    m = x.shape[0]
    theta = np.reshape(theta, (len(theta), 1))
    lamb = theta.copy()
    lamb[0][0] = 0.
    grad = (1.0 / m) * (x.T.dot(sigmoid(x.dot(theta) - y))) + (lam / m) * lamb
    grad = grad.flatten()
    return grad

In [21]:
def plot_boundary(theta):
    x1 = np.arange(-1.0, 1.0, 0.001)
    x2 = -(theta[0] + theta[1] * x1) / (theta[2] + theta[3] * x1)
    plt.plot(x1, x2)
    plt.savefig('boundary01.png')
    plt.clf()

In [22]:
def plot_boundary2(theta):
    x1 = np.arange(-1.0, 1.0, 0.001)
    x2 = -(theta[0] + theta[1] * x1) / theta[2]
    plt.plot(x1, x2)
    plt.savefig('linear.png')
    plt.clf()

In [23]:
def main():
    X, Y = load_data('data2.csv')
    plot_data(X, Y, 'data.PNG')

    theta = np.random.randn(4)
    #theta = [-0.09602962, 0.05465897, -0.25948405, -0.88592286]
    #print theta
    X_new = np.append(np.ones((X.shape[0], 1)), X, axis = 1)
    #theta_final = opt.fmin_tnc(cost, theta, fprime = grad, args = (X_new, Y))
    theta_final = opt.fmin_tnc(cost, theta, fprime = grad, args = (X_new, Y), approx_grad = True, epsilon = 0.001, maxfun = 10000)
    theta_final = theta_final[0]
    print(theta_final)
    plot_boundary(theta_final)

In [24]:
if __name__ == "__main__":
    main()

[160.72388595 423.90947624 416.18208473 346.62835459]


<Figure size 432x288 with 0 Axes>