In [3]:
import numpy as np
from sklearn.linear_model import LinearRegression

In [4]:
def GD(y, tx, initial_w, max_iters, gamma, gradient_func, loss_func):
    w = initial_w
    
    for i in range(max_iters):
        grad = gradient_func(y, tx, w)
        w = w - gamma * grad

    loss = loss_func(y, tx, w)
    return w, loss

def GD_reg(y, tx, initial_w, max_iters, gamma, gradient_func, loss_func, lambda_):
    w = initial_w
    
    for i in range(max_iters):
        grad = gradient_func(y, tx, w, lambda_)
        w = w - gamma * grad

    loss = loss_func(y, tx, w)
    return w, loss

In [5]:
def get_random_sample(y, tx):
    random_sample_index = np.random.randint(len(y))
    y_sample  = y [random_sample_index]
    tx_sample = tx[random_sample_index]
    return y_sample, tx_sample

def SGD(y, tx, initial_w, max_iters, gamma, gradient_func, loss_func):
    w = initial_w
    
    for n_iter in range(max_iters):
        y_sample, tx_sample = get_random_sample(y, tx)
        grad = gradient_func(y_sample, tx_sample, w)
        w = w - gamma * grad
        
    loss = loss_func(y, tx, w)
    return w, loss

# Least Squares

In [6]:
def MSE(y, tx, w):
    e = y - tx.dot(w)
    return np.mean(e**2)

def least_squares_gradient(y, tx, w):
    e = y - tx.dot(w)
    return -tx.T.dot(e)/y.size

def least_squares_GD(y, tx, initial_w, max_iters, gamma):
    w, loss = GD(y, tx, initial_w, max_iters, gamma, least_squares_gradient, MSE)
    return w, loss

def least_squares_SGD(y, tx, initial_w, max_iters, gamma):
    w, loss = SGD(y, tx, initial_w, max_iters, gamma, least_squares_gradient, MSE)
    return w, loss

def least_squares(y : np.array, tx : np.array):
    Q, R = np.linalg.qr(tx)
    w = np.linalg.solve(R, Q.T.dot(y))
    loss = MSE(y, tx, w)
    
    return w, loss

In [7]:
def ridge_regression(y : np.array, tx: np.array , lambda_):
    D = tx.shape[1]
    lambda_I = np.eye(D) * np.sqrt(2*len(y)*lambda_)
    tx_expended = np.append(tx, lambda_I, axis=0)
    y_expended  = np.append(y, np.zeros(D))
    
    Q, R = np.linalg.qr(tx_expended)
    w = np.linalg.solve(R, Q.T.dot(y_expended))
    loss = MSE(y, tx, w)
    
    return w, loss

# Logistic Regression

# Logistic Regression

In [8]:
def sigmoid(t):
    exp_t = np.exp(t)
    return exp_t / (1 + exp_t)

def logistic_loss(y, tx, w):
    xtw = tx.dot(w)
    loss = np.sum(np.log(1 + np.exp(xtw))) - y.T.dot(xtw)
    return np.squeeze(loss)

def logistic_gradient(y, tx, w):
    return tx.T.dot(sigmoid(tx.dot(w)) - y)

def reg_logistic_gradient(y, tx, w, lambda_):
    return logistic_gradient(y, tx, w) + 2 * lambda_ * w

In [9]:
def logistic_regression(y, tx, initial_w, max_iters, gamma):
    w, loss = GD(y, tx, initial_w, max_iters, gamma, logistic_gradient, logistic_loss)
    return w, loss

def reg_logistic_regression(y, tx, lambda_, initial_w, max_iters, gamma):
    w, loss = GD_reg(y, tx, initial_w, max_iters, gamma, reg_logistic_gradient, logistic_loss, lambda_)
    return w, loss

# Sandbox for testing

In [10]:
x = [[12,16,71,99,45,27,80,58,4,50],
     [35,78,73,3,55,43,56,98,32,40]]
x = (x-np.mean(x, axis=1).reshape(2,1))/np.std(x, axis=1).reshape(2,1)
y = [56,22,37,78,83,55,70,94,12,40]
x = np.array(x).T
tx = np.insert(x, 0, 1, axis=1)
y = np.array(y)
linreg = LinearRegression().fit(x,y)
initial_w = np.array([0,0,0])
w = initial_w


print(np.append(np.array(linreg.intercept_), linreg.coef_))

[54.7        15.08492696  3.55532095]


### Testing for least Squares GD

In [11]:
least_squares_GD(y, tx, initial_w, 100, 0.1)

(array([54.69854709, 15.08425522,  3.55477245]), 423.1618830220783)

### Testing for least Squares SGD

In [12]:
least_squares_SGD(y, tx, initial_w, 100000, 0.0001)

(array([54.79272918, 14.99698322,  3.65181066]), 423.1889072789986)

### Testing least squares

In [13]:
least_squares(y, tx)

(array([54.7       , 15.08492696,  3.55532095]), 423.16188021914616)

### Testing ridge regression

In [14]:
ridge_regression(y, tx , lambda_=0.001)

(array([54.59081836, 15.05403516,  3.54571034]), 423.1747990944212)

### Some other functions

In [None]:
def quantile_normalize(data, q=0.95):
    low    = (1-q) / 2
    high   = 1-low
    q_low  = np.quantile(data, low,  axis=0)
    q_high = np.quantile(data, high, axis=0)
    median = np.quantile(data, 0.5, axis=0)
    return (data - median) / (q_high - q_low)

def mim_max_normalize(data):
    return (data - data.min(axis=0)) / (data.max(axis=0) - data.min(axis=0))

def z_normalize(data):
    return (data - data.mean(axis=0)) / data.std(axis=0)

def accuracy(y, tx, w):
    pred    = np.where(implementation.sigmoid(tx.dot(w)) > 0.5, 1, 0)
    correct = np.sum(np.where(pred == y, 1, 0))
    return correct / len(y)

def accuracy2(y, tx, w):
    pred    = np.where(tx.dot(w) > 0, 1, 0)
    correct = np.sum(np.where(pred == y, 1, 0))
    return correct / len(y)

In [None]:
def build_k_indices(y, k_fold, seed=0):
    """build k indices for k-fold."""
    num_row = y.shape[0]
    interval = int(num_row / k_fold)
    np.random.seed(seed)
    indices = np.random.permutation(num_row)
    k_indices = [indices[k * interval: (k + 1) * interval] for k in range(k_fold)]
    return np.array(k_indices)
def cross_validation(y, x, k_indices, k, lambda_):
    """return the loss of ridge regression."""
    # get k'th subgroup in test, others in train
    te_indices = k_indices[k]
    tr_indices = k_indices[~(np.arange(k_indices.shape[0]) == k)]
    tr_indices = tr_indices.reshape(-1)
    y_te = y[te_indices]
    y_tr = y[tr_indices]
    tx_te = x[te_indices]
    tx_tr = x[tr_indices]

    w, loss_tr = implementation.reg_logistic_regression(y_tr, tx_tr, lambda_, initial_w, max_iters, gamma)

    accuracy_te = accuracy(y_te, tx_te, w)
    accuracy_tr = accuracy(y_tr, tx_tr, w)
    return accuracy_tr, accuracy_te, w
'''
seed = 7
k_fold = 5
lambdas = np.logspace(-4, 0, 30)
initial_w = np.zeros(tx.shape[1])
max_iters = 2000
gamma     = 0.000003
# split data in k fold
k_indices = build_k_indices(y, k_fold, seed)
# define lists to store the loss of training data and test data
accuracy_tr = []
accuracy_te = []
# cross validation
for lambda_ in lambdas:
    rmse_tr_tmp = []
    rmse_te_tmp = []
    for k in range(k_fold):
        loss_tr, loss_te,_ = cross_validation(y, tx, k_indices, k, lambda_)
        rmse_tr_tmp.append(loss_tr)
        rmse_te_tmp.append(loss_te)
    accuracy_tr.append(np.mean(rmse_tr_tmp))
    accuracy_te.append(np.mean(rmse_te_tmp))
'''
'''plt.semilogx(lambdas, accuracy_tr)
plt.semilogx(lambdas, accuracy_te)

plt.xlabel('lambda')
plt.ylabel('accuracy')'''

In [None]:
def build_interaction_tx(input_data, normalisation_function):
    input_data = normalisation_function(input_data)

    n_features = input_data.shape[1]
    n_interacted_features = int(n_features + (n_features-1) * n_features / 2)

    x = np.empty((n_interacted_features, len(input_data)))
    x[:n_features] = input_data.T
    index = n_features
    for i in range(n_features):
        for j in range(i):
            x[index] = x[i] * x[j]
            index = index + 1

    x = normalisation_function(x.T)
    tx = np.append(np.ones(len(x)).reshape(-1,1), x, axis=1)

    return tx

In [None]:
def GD(y, tx, initial_w, max_iters, gamma, gradient_func, loss_func):
    w = initial_w

    for i in range(max_iters):
        if i%500 == 0:
            print(str(accuracy(y, tx, w)).ljust(25, ' '), accuracy(y_te, tx_te, w))
        grad = gradient_func(y, tx, w)
        w = w - gamma * grad

    loss = loss_func(y, tx, w)
    return w, loss

def logistic_regression(y, tx, initial_w, max_iters, gamma):
    w, loss = GD(y, tx, initial_w, max_iters, gamma, implementation.logistic_gradient, implementation.logistic_loss)
    return w, loss