### Pattern Recognization and Machine Learning
#### Exercise 4
### Task 3
#### Implement gradient descent for log-loss.

a) Implement a log-loss minimization algorithm for the loss of Equation.

b) Apply the code for the data log_loss_data X.scv and Y.csv. The data is in CSV format. Load X and y using numpy.loadtxt.

c) Plot the path of w over 100 iterations and check the accuracy

In [1]:
import numpy as np
import matplotlib.pyplot as plt

def log_loss(w, X, y):
    """ 
    Computes the log-loss function at w. The 
    computation uses the data in X with
    corresponding labels in y. 
    """
    
    L = 0 # Accumulate loss terms here.
     
        
    # Process each sample in X:
    for n in range(X.shape[0]):
        L += np.log(1 + np.exp(y[n] * np.dot(w, X[n])))
    
    return L
    
def grad(w, X, y):
    """ 
    Computes the gradient of the log-loss function
    at w. The computation uses the data in X with
    corresponding labels in y. 
    """
        
    G = 0 # Accumulate gradient here.
    
    # Process each sample in X:
    for n in range(X.shape[0]):
        
        numerator =  np.exp(-y[n] * np.dot(w, X[n])) * (-y[n])*X[n]
        denominator = 1 + np.exp(-y[n] * np.dot(w, X[n]))
        
        G += numerator / denominator
    
    return G
    
if __name__ == "__main__":
        
    # Add your code here:
        
    # 1) Load X and y.
    X=np.loadtxt('X.csv',delimiter=",")
    y=np.loadtxt('y.csv')

    # 2) Initialize w at w = np.array([1, -1])
    w = np.array([1, -1])
    
    # 3) Set step_size to a small positive value.
    step_size=0.01

    # 4) Initialize empty lists for storing the path and
    # accuracies: W = []; accuracies = []
    W = []
    accuracies = []
    
    for iteration in range(100):

        # 5) Apply the gradient descent rule.
        G = grad(w,X,y)
        w = w - step_size*G
        

        # 6) Print the current state.
        print ("Iteration %d: w = %s (log-loss = %.2f)" % \
              (iteration, str(w), log_loss(w, X, y)))
        
        # 7) Compute the accuracy (already done for you)
            
        # Predict class 1 probability
        y_prob = 1 / (1 + np.exp(-np.dot(X, w)))
                # Threshold at 0.5 (results are 0 and 1)
        y_pred = (y_prob > 0.5).astype(int)
                # Transform [0,1] coding to [-1,1] coding
        y_pred = 2*y_pred - 1

        accuracy = np.mean(y_pred == y)
        accuracies.append(accuracy)
        
        W.append(w)
    
    # 8) Below is a template for plotting. Feel free to 
    # rewrite if you prefer different style.
    
    W = np.array(W)
    
    plt.figure(figsize = [5,5])
    plt.subplot(211)
    plt.plot(W[:,0], W[:,1], 'ro-')
    plt.xlabel('w$_0$')
    plt.ylabel('w$_1$')
    plt.title('Optimization path')
    
    plt.subplot(212)
    plt.plot(100.0 * np.array(accuracies), linewidth = 2)
    plt.ylabel('Accuracy / %')
    plt.xlabel('Iteration')
    plt.tight_layout()
    plt.savefig("log_loss_minimization.pdf", bbox_inches = "tight")

Iteration 0: w = [-1.61077699  6.98804232] (log-loss = 6400.49)
Iteration 1: w = [-1.30784233  6.78337571] (log-loss = 6149.70)
Iteration 2: w = [-1.01807052  6.57630451] (log-loss = 5900.48)
Iteration 3: w = [-0.74104888  6.36752118] (log-loss = 5653.27)
Iteration 4: w = [-0.47645165  6.15776289] (log-loss = 5408.61)
Iteration 5: w = [-0.22424352  5.94783477] (log-loss = 5167.17)
Iteration 6: w = [0.01507978 5.73864477] (log-loss = 4929.87)
Iteration 7: w = [0.24028557 5.53124896] (log-loss = 4697.92)
Iteration 8: w = [0.44931875 5.32688626] (log-loss = 4472.91)
Iteration 9: w = [0.63947123 5.12696068] (log-loss = 4256.72)
Iteration 10: w = [0.80792106 4.93293232] (log-loss = 4051.25)
Iteration 11: w = [0.9525058  4.74613814] (log-loss = 3858.07)
Iteration 12: w = [1.07227464 4.56764329] (log-loss = 3678.22)
Iteration 13: w = [1.16750141 4.39819999] (log-loss = 3512.14)
Iteration 14: w = [1.23935522 4.23827521] (log-loss = 3359.81)
Iteration 15: w = [1.28961454 4.08806051] (log-loss =

### Task 4
#### Define the network in Keras.


In [2]:
from tensorflow.keras.datasets import mnist
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Activation, Flatten, Conv2D, MaxPooling2D
from tensorflow.keras.utils import to_categorical
import numpy as np
import os


model= Sequential()

N = 32 # Number of feature maps
w, h = 5, 5 # Conv. window size
model.add(Conv2D(N, (w, h),input_shape=(64, 64, 1),activation = 'relu',padding = 'same')) 
# rectified linear unit-Activation function
model.add(MaxPooling2D(pool_size=(4, 4)))
model.add(Conv2D(N, (w, h),activation = 'relu',padding = 'same')) 
model.add(MaxPooling2D((4,4)))
model.add(Flatten())
model.add(Dense(100, activation = 'relu'))
model.add(Dense(2, activation = 'softmax'))
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              (None, 64, 64, 32)        832       
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 16, 16, 32)        0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 16, 16, 32)        25632     
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 4, 4, 32)          0         
_________________________________________________________________
flatten (Flatten)            (None, 512)               0         
_________________________________________________________________
dense (Dense)                (None, 100)               51300     
_________________________________________________________________
dense_1 (Dense)              (None, 2)                 2

### Task 5
#### Compile and train the net.

a) Compile the network following the examples of the lecture slides and documentation at http://keras.io/.

b) Train the model with the GTSRB dataset from last week.

Use the following parameters:

Loss: categorical crossentropy (same thing as log loss; see previous
exercises)

Optimizer: stochastic gradient descent

Minibatch size: 32

Number of epochs: 20

Also add the parameter metrics=[’accuracy’] as an argument of model.compile and give the test data to training algorithm.

model.fit(..., validation_data = [X_test, y_test]). 

Then, the optimizer will report the test error every epoch.

In [3]:
import glob
import numpy as np
import os
import matplotlib.pyplot as plt
from simplelbp import local_binary_pattern

def load_data(folder):
    """ 
    Load all images from subdirectories of
    'folder'. The subdirectory name indicates
    the class.
    """
    
    X = []          # Images go here
    y = []          # Class labels go here
    classes = []    # All class names go here
    
    subdirectories = glob.glob(folder + "/*")
    
    # Loop over all folders
    for d in subdirectories:
        
        # Find all files from this folder
        files = glob.glob(d + os.sep + "*.jpg")
        
        # Load all files
        for name in files:
            
            # Load image and parse class name
            img = plt.imread(name)
            class_name = name.split(os.sep)[-2]

            # Convert class names to integer indices:
            if class_name not in classes:
                classes.append(class_name)
            
            class_idx = classes.index(class_name)
            
            X.append(img)
            y.append(class_idx)
    
    # Convert python lists to contiguous numpy arrays
    X = np.array(X)
    y = np.array(y)
    
    return X, y
X, y = load_data(".")

In [4]:

from sklearn.model_selection import train_test_split
X_train, X_test, y_train,  y_test = train_test_split(X, y, test_size=0.20, random_state=50)

# Keras assumes 4D input, but MNIST is lacking color channel.
# -> Add a dummy dimension at the end.
X_train = X_train[..., np.newaxis] / 255.0
X_test = X_test[..., np.newaxis] / 255.0
# Output has to be one-hot-encoded
y_train = to_categorical(y_train)
y_test = to_categorical(y_test)


In [5]:

model.compile(loss='categorical_crossentropy',optimizer='SGD',metrics = ['accuracy'])
model.fit(X_train, y_train, epochs = 20, batch_size=32, validation_data = (X_test, y_test))

Train on 161 samples, validate on 41 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<tensorflow.python.keras.callbacks.History at 0x26b0bd9c808>