# 1. Single variable

## Import modules

In [None]:
import sys
sys.path.append('../')

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

## Load data

In [None]:
data = pd.read_csv('wdbc.data', delim_whitespace=False, header=None, index_col=0)
base_columns = ['label', 'radius', 'texture', 'perimeter', 'area', 'smoothness', 'compactness', 'concavity', 'concave_points', 'symmetry', 'fractal_dimension']
label_columns = ['_mean', '_stderr', '_worst']
columns = list(data.columns)
for i, col in enumerate(base_columns):
    if i > 0:
        columns[i] = col + label_columns[0]
        columns[i+10] = col + label_columns[1]
        columns[i+20] = col + label_columns[2]
    else:
        columns[i] = col
data.columns = columns

## Plot

In [None]:
ax = plt.subplot()

## 0/Not admitted
data0 = data.query('label == "B"')
ax.scatter(data0['radius_mean'], data0['texture_mean'], s=30, color="blue", ec="k", label="Benign")

## 1/admitted
data1 = data.query('label == "M"')
ax.scatter(data1['radius_mean'], data1['texture_mean'], color="yellow", marker='*', ec='k', label="Malicious")

plt.legend()
plt.show()

## 課題1. Compute cost function

### ※YOUR CODE IS HERE

In [None]:
X = data[['radius_mean']].values
y = data['label'].replace({'B':0, 'M':1}).values

m, n= X.shape

X_ = (np.c_[np.ones(m), X])
y_ = y.reshape(m, 1)

theta = np.zeros((n+1, 1))

J, grad= lrCostFunction(X_, y_, theta)
print('Testing cost function: %.2f' % J)

Your cost function will be below.

```
Testing cost function: 296.07
Testing cost function: 186.72
```

## 課題2. Gradient Descent

### ※YOUR CODE IS HERE

In [None]:
iterations = 1500
alpha = 0.01
theta_min, j_hist = gradientDescent(X_, y_, theta, alpha, iterations)
print('Theta min: \n', theta_min)
print('Cost min: %.3f' % j_hist[-1])

### Discussion

- このような結果をもたらした原因について考察しよう
- 例えば今回の計算結果の意味は以下のように解釈できる

In [None]:
def predict(X, theta):
    yhat = sigmoid(np.dot(X, theta))
    return np.where(yhat > 0.5, 1, 0)
    
y_predict = predict(X_, theta_min)
n_hits = (y == y_predict[0]).sum()
print("Accuracy: %d / %d = %.1f[%%]" % (n_hits, m, n_hits/m*100))

In [None]:
print("Positive count: %d" % (y == 1).sum())
print("Negative count: %d" % (y == 0).sum())

- このモデルがやっていることは「全て陰性である」という判断を下しているだけに過ぎない
- こんなモデルに果たして意味があるのだろうか？

## 課題3. Newton optimization（Optional）

### ※YOUR CODE IS HERE

In [None]:
theta = np.zeros(n+1)
Xtil = np.c_[np.ones(m), X]
theta_min, J_min = newtonOptimize(Xtil, y, theta, max_iter=150)
print('Theta min: \n', theta_min)
print('Cost min: %.3f' % J_min)

In [None]:
y_predict = predict(Xtil, theta_min)
n_hits = (y == y_predict).sum()
print("Accuracy: %d / %d = %.1f[%%]" % (n_hits, m, n_hits/m*100))

In [None]:
theta_hist, J_hist = newtonOptimize(Xtil, y, theta, max_iter=10, hist=True)

theta0_hist = [t[0] for t in theta_hist]
theta1_hist = [t[1] for t in theta_hist]

fig = plt.figure()
ax = plt.subplot()
ax.plot(theta0_hist, J_hist)
for i, j in enumerate(J_hist):
    ax.text(theta0_hist[i], j, s=i, fontsize=10)

ax.plot(theta1_hist, J_hist)
for i, j in enumerate(J_hist):
    ax.text(theta1_hist[i], j, s=i)
plt.show()

### ※END, back to ex2

## 課題4. Gradient descent with feature normalize

In [None]:
X_norm = featureNormalize(X)
X_ = (np.c_[np.ones(m), X_norm])

In [None]:
m = len(X)
theta = np.zeros((n+1, 1))
iterations = 1500
alpha = 0.01
theta_min, j_hist = gradientDescent(X_, y_, theta, alpha, iterations)
print('Theta min: \n', theta_min)
print('Cost min: %.3f' % j_hist[-1])

In [None]:
y_predict = predict(X_, theta_min)
n_hits = (y == y_predict.ravel()).sum()
print("Accuracy: %d / %d = %.1f[%%]" % (n_hits, m, n_hits/m*100))

- まともになってきたのでは？

## 課題5. Newton Optimize with feature normalize

In [None]:
theta = np.zeros(n+1)
X_norm = featureNormalize(X)
Xtil = np.c_[np.ones(m), X_norm]
theta_min, J_min = newtonOptimize(Xtil, y, theta, max_iter=150)
print('Theta min: \n', theta_min)
print('Cost min: %.3f' % J_min)

In [None]:
y_predict = predict(Xtil, theta_min)
n_hits = (y == y_predict).sum()
print("Accuracy: %d / %d = %.1f[%%]" % (n_hits, m, n_hits/m*100))

- 多少なりとも結果がロバストになってきた？

## 課題6. Advanced Optimize

In [None]:
from scipy.optimize import minimize

def lrCostFunctionOpt(theta, X, y):
    m = X.shape[0]
    h = sigmoid(np.dot(X, theta))
    return 1/m*(-np.sum(y*np.log(h) + (1-y)*(np.log(1-h))))

theta = np.zeros(n+1)
X_norm = featureNormalize(X)
Xtil = np.c_[np.ones(m), X_norm]

theta_min = minimize(lrCostFunctionOpt, theta, args=(Xtil, y)).x

In [None]:
y_predict = predict(Xtil, theta_min)
n_hits = (y == y_predict).sum()
print("Accuracy: %d / %d = %.1f[%%]" % (n_hits, m, n_hits/m*100))

# Multi variables

In [None]:
X = data[['radius_mean', 'texture_mean']].values
y = data['label'].replace({'B':0, 'M':1}).values

m, n = X.shape

X_ = np.c_[np.ones(m), X]
y_ = y.reshape(m, 1)

theta = np.zeros((n+1, 1))

## Gradient Descent with multi variables

In [None]:
X = featureNormalize(X)
X_ = np.hstack((np.ones((len(X), 1)), X))
theta = np.zeros((3, 1))
iterations = 1500
alpha = 0.01
theta_min, j_hist = gradientDescent(X_, y_, theta, alpha, iterations)
print('Theta min: \n', theta_min)
print('Cost min: %.3f' % j_hist[-1])

In [None]:
y_predict = predict(X_, theta_min)
n_hits = (y == y_predict.ravel()).sum()
print("Accuracy: %d / %d = %.1f[%%]" % (n_hits, m, n_hits/m*100))

## Newton Optimize with multi variables

In [None]:
theta = np.zeros(n+1)
X_norm = featureNormalize(X)
Xtil = np.c_[np.ones(m), X_norm]
theta_min, J_min = newtonOptimize(Xtil, y, theta, max_iter=150)
print('Theta min: \n', theta_min)
print('Cost min: %.3f' % J_min)

In [None]:
y_predict = predict(Xtil, theta_min)
n_hits = (y == y_predict).sum()
print("Accuracy: %d / %d = %.1f[%%]" % (n_hits, m, n_hits/m*100))

## Advanced Optimize

In [None]:
theta = np.zeros(n+1)
X_norm = featureNormalize(X)
Xtil = np.c_[np.ones(m), X_norm]
theta_min = minimize(lrCostFunctionOpt, theta, args=(Xtil, y)).x

In [None]:
y_predict = predict(Xtil, theta_min)
n_hits = (y == y_predict).sum()
print("Accuracy: %d / %d = %.1f[%%]" % (n_hits, m, n_hits/m*100))