In [13]:
#Exercise 7
import numpy as np
import matplotlib.pyplot as plt
from plot import *
Iris2D1_train = np.loadtxt("data/Iris2D1_train.txt")
Iris2D1_test = np.loadtxt("data/Iris2D1_test.txt")
Iris2D2_train = np.loadtxt("data/Iris2D2_train.txt")
Iris2D2_test = np.loadtxt("data/Iris2D2_test.txt")
s = False

In [14]:
#7.1
def plot_iris(data, name):
    if "2D1" in name:
        y = "Feature 2"
    else:
        y = "Feature 1"
    
    plt.scatter(data[data[:,2] == 0][:, 0], data[data[:,2] == 0][:, 1], ec = "black", label = "Class 0", zorder = 3)
    plt.scatter(data[data[:,2] == 1][:, 0], data[data[:,2] == 1][:, 1], ec = "black", color = 'red', label = "Class 1", zorder = 3)
    plot_template(title = "Plot of " + name, xlabel = "Feature 0", ylabel = y, equal_axis=False, legend= True, save = s)

In [15]:
plot_iris(Iris2D1_train, "Iris2D1 train")
plot_iris(Iris2D1_test, "Iris2D1 test")
plot_iris(Iris2D2_train, "Iris2D2 train")
plot_iris(Iris2D2_test, "Iris2D2 test")
#the two classes are very distinctly separated in the Iris2D2 datasets while there is a little overlap in the Iris2D1 datasets

<Figure size 432x288 with 0 Axes>

In [6]:
#7.2
def logistic(x):
    out = 1/(1 + np.exp(-x))
    return out

def add_one(X):
    row, col = np.shape(X)
    one_col = np.ones(row)
    X = np.c_[one_col, X]
    return X

def gradient(X, y, w):
    """
    Returns a vector of partial derivaties
    """
    s = -y.T * (w.T @ X.T) # Transpose to get 1*d @ d*N
    theta = logistic(s) # 1*N
    c =  -y * X # N*d
    grad = c.T @ theta.T # Transpose to d*N @ N*1
    return grad

def insample_error(X, y, w):
    """
    Returns a single real value which corresponds to the error
    """
    N = len(X)
    s = -y.T * (w.T @ X.T) # Transpose to get 1*N x 1*d @ d*N
    pyx = np.log(1 + np.exp(s)) # Calculate P(Yn|Xn) likelihood
    error = np.sum(pyx)/N # Calculate sum[P(Yn|Xn)]/N
    return error

def train_log(X, y):
    """
    Perfoms logistic regression training
    Takes in X = N*d array and Y = N*1 array
    Returns an array of weights w = d*1
    """
    X = add_one(X) # Add intercept column
    N, d = np.shape(X)
    w = np.reshape(np.random.randn(d), (d, 1)) #initialize random weights
    error = insample_error(X, y , w)
    learning_rate = 0.01
    iteration = 1
    convergance = 0
    tolerance = 10**-10
    
    while convergance == 0:
        m = gradient(X, y, w)
        w_new = w - (learning_rate * m) # update weight
        new_error = insample_error(X, y, w_new)
        g = np.linalg.norm(m) # convert partial derivate array to single gradient value
        iteration += 1
        
        #check if new error is better
        if new_error < error:
            w = w_new
            error = new_error
            learning_rate *= 1.1
        else:
            learning_rate *=0.9
        
        #check convergance condition
        if g < tolerance:
            #print("Tolerance reached")
            convergance = 1
        elif iteration == 10000:
            #print("Max iterations")
            convergance = 1
    return w
    
def predict_log(X, w):
    X = add_one(X) #Add column for intercept
    pred = logistic(w.T @ X.T).T # h(x) = theta(w.T @ x)
    pred = pred > 0.5 # Convert prediction to a boolean that indicates if it is > 0.5 or not
    pred = np.array(pred, dtype = int) # convert to 0 or 1
    pred = 2*(pred-0.5) # conver to -1 or 1
    return pred

def get_error(true, pred):
    """
    Takes in two N*1 arrays
    Each array consists of -1 or 1
    Returns a single error value
    """
    N = len(true)
    error = abs(true - pred)/2 #convert each value to 0 or 1
    error = np.sum(error)/N
    return error

def split_data(data):
    """
    Saves last column as y. Converts y to -1 or 1
    Saves rest of the columns as x
    """
    x = data[:, :2]
    y = data[:, -1:]
    y = 2*(y-0.5) # convert to -1 or 1
    return x, y

def log_regression(train, test):
    """
    Perform logistic regression on a dataset with y value as last column
    Returns the 0-1 error value, weights, and predicted values
    """
    train_x, train_y = split_data(train)
    test_x, test_y = split_data(test)
    weights = train_log(train_x, train_y)
    pred = predict_log(test_x, weights)
    error = get_error(test_y, pred)
    return error, weights, pred

In [9]:
#7.3
e1, w1, p1 = log_regression(Iris2D1_train, Iris2D1_test)
e2, w2, p2 = log_regression(Iris2D2_train, Iris2D2_test)
#7.4
print(f"Iris2D1 Error: {e1}\nIris2D1 Weights:\n{w1}")
print("\n")
print(f"Iris2D2 Error: {e2}\nIris2D2 Weights:\n{w2}")

Iris2D1 Error: 0.06666666666666667
Iris2D1 Weights:
[[-12.49747315]
 [ -4.63317351]
 [  8.39690487]]


Iris2D2 Error: 0.0
Iris2D2 Weights:
[[-28.86604052]
 [ 12.62804651]
 [-12.67523621]]


In [8]:
#Iris2D1_test[:, -1:]

# Exercise 8
1. The insample error function is defined as:
   $$E_{in} = \frac{1}{N}\sum_{n=1}^{N}ln\left(\frac{1}{\theta(y_n\mathbf{w}^T\mathbf{x}_n)}\right)$$
   The logistic function $\theta$ is defined as:
   $$\theta(s) = \frac{e^s}{1+e^s}$$
   Therefore:
   $$\frac{1}{\theta(s)} = \frac{1+e^s}{e^s}$$
   Rearranging this gives us:
   $$\frac{1}{\theta(s)} = \frac{1}{e^s} + \frac{e^s}{e^s}$$
   $$\frac{1}{\theta(s)} = e^{-s} + 1$$
   $$\frac{1}{\theta(s)} = 1 + e^{-s}$$
   Therefore the insample error function is equivalent to:
   $$E_{in} = \frac{1}{N}\sum_{n=1}^{N}ln(1+ e^{-y_n\mathbf{w}^T\mathbf{x}_n})$$
   The derivative of $E_{in}$ is:
   $$\nabla_{\mathbf{w}}E_{in} = \frac{1}{N}\sum_{n=1}^{N}\frac{1}{1+e^{-y_n\mathbf{w}^T\mathbf{x}_n}}(-y_n\mathbf{x}_ne^{-y_n\mathbf{w}^T\mathbf{x}_n})$$
   Rearranging this gives us:
   $$\nabla_{\mathbf{w}}E_{in} = \frac{1}{N}\sum_{n=1}^{N}-y_n\mathbf{x}_n\frac{e^{-y_n\mathbf{w}^T\mathbf{x}_n}}{1+e^{-y_n\mathbf{w}^T\mathbf{x}_n}}$$
   Since the logistic function $\theta$ is defined as:
   $$\theta(s) = \frac{e^s}{1+e^s}$$
   We can therefore rewrite the derivative as:
   $$\nabla_{\mathbf{w}}E_{in} = \frac{1}{N}\sum_{n=1}^{N}-y_n\mathbf{x}_n\theta(-y_n\mathbf{w}^T\mathbf{x}_n)$$

# weights = train(x, y)
t_x, t_y = split_data(Iris2D1_test)
pred = predict(t_x, weights)
error = error(t_y, pred)
error

In [8]:
Iris2D1_train = np.loadtxt("Iris2D1_train.txt")
x, y = split_data(Iris2D1_train)
d = np.shape(x)[1]
w = np.reshape(np.random.randn(d), (d, 1))
gradient(x, y, w)\
#insample_error(x,y,w)

array([[236.25668179],
       [169.14329519]])

In [119]:
np.random.randn(19)
train_x_2d1, train_y_2d1 = split_data(Iris2D1_train)
#train(train_x_2d1, train_y_2d1)

In [93]:
x = add_one(Iris2D2_train[:, :2])
y = Iris2D2_train[:, -1:]
w = np.reshape(np.random.randn(len(x)), (70, 1))
#s = y @ w.T @ x
#sum(np.log(1 + np.exp(-s)))
#Iris2D2_train

In [121]:
x = train_x_2d1
y = train_y_2d1
w = np.reshape(np.random.randn(np.shape(x)[1]), (np.shape(x)[1], 1))
#logistic(y @ w.T @ x)
y * (w.T @ x.T)

array([[ 6.31352151,  5.70893406,  7.65324077,  7.55041377,  6.45792455,
        -8.82158169,  7.30974203, -9.66684087, -7.41986941,  6.36165586,
        -9.44002782, -7.64668246,  6.61618627, -8.14844291,  7.91432948,
        -7.88735419, -7.82536117,  6.63660325,  7.92818816, -9.2817661 ,
        -8.26512859,  6.65046193,  7.11064634, -7.55041377,  7.28932505,
         6.38207284,  5.80520275, -7.2346324 , -8.31326294,  6.71901327,
         7.17919768,  6.88383329,  7.78378512,  6.16256016,  6.86341631,
        -7.2346324 ,  7.39945242,  6.49220021, -8.21699424,  7.2411907 ,
         6.34779717, -7.92162986,  8.03101516,  7.3440177 , -9.03453606,
        -7.2346324 , -7.38559374,  7.63938208, -7.82536117,  7.55697207,
         7.11064634, -9.04839474,  6.45792455, -8.07989158,  6.70515459,
        -8.86971604, -8.725313  ,  8.04487384, -7.81150249, -6.20413621,
         6.65702024,  7.27546637,  6.67087892, -7.72253417, -9.57057218,
        -7.35131808,  6.71901327,  7.45414507, -8.4

In [189]:
y = Iris2D2_train[:, -1:]
y = 2*(y-0.5)

(w.T @ x.T) * y.T

array([[-6.31352151, -5.70893406, -7.65324077, -7.55041377,  6.45792455,
         8.82158169,  7.30974203,  9.66684087,  7.41986941, -6.36165586,
        -9.44002782,  7.64668246,  6.61618627,  8.14844291,  7.91432948,
         7.88735419, -7.82536117,  6.63660325,  7.92818816,  9.2817661 ,
        -8.26512859, -6.65046193, -7.11064634,  7.55041377,  7.28932505,
         6.38207284, -5.80520275,  7.2346324 , -8.31326294,  6.71901327,
        -7.17919768,  6.88383329,  7.78378512,  6.16256016,  6.86341631,
         7.2346324 , -7.39945242, -6.49220021, -8.21699424, -7.2411907 ,
         6.34779717,  7.92162986,  8.03101516,  7.3440177 , -9.03453606,
        -7.2346324 ,  7.38559374,  7.63938208, -7.82536117,  7.55697207,
         7.11064634,  9.04839474,  6.45792455,  8.07989158,  6.70515459,
        -8.86971604, -8.725313  , -8.04487384, -7.81150249, -6.20413621,
         6.65702024,  7.27546637,  6.67087892, -7.72253417,  9.57057218,
        -7.35131808,  6.71901327, -7.45414507, -8.4