# Lab Tasks
0. Determine a paper to read.
1. Follow what is left in lecture note.
2. Try to design different data representations by providing alternative to `compute_simple_feature`. Evaluation your proposal.
3. Try multi-class linear model (non-perceptron). Check [here](http://scikit-learn.org/stable/modules/generated/sklearn.linear_model.LogisticRegression.html) for reference.

# Build Perceptron with Scikit-learn and Classify hand-written digits

In [None]:
import matplotlib.pyplot as plt
%matplotlib inline

In [None]:
import numpy as np
from sklearn.linear_model import Perceptron

# Build a Perceptron using Scikit-Learn Package

In [None]:
perc = Perceptron()

In [None]:
perc

In [None]:
known_customer1_x = [60, 0]
known_customer1_y = 0
known_customer2_x = [100, 30]
known_customer2_y = 0
known_customer3_x = [40, 30]
known_customer3_y = 1
known_customer4_x = [55, 35]
known_customer4_y = 1
known_customer5_x = [45, 5]
known_customer5_y = 0

In [None]:
X = [known_customer1_x, 
     known_customer2_x, 
     known_customer3_x, 
     known_customer4_x, 
     known_customer5_x]
y = [known_customer1_y, 
     known_customer2_y, 
     known_customer3_y, 
     known_customer4_y, 
     known_customer5_y]

In [None]:
print (X, y)

In [None]:
perc.fit(X,y)

In [None]:
print (perc.predict([[0, 100], [120, 0]]))

In [None]:
X_test = np.asarray([ [120, 20], [15, 10] ])
pred_test = perc.predict(X_test)

In [None]:
X_np = np.array(X)
y_np = np.array(y)
plt.scatter(X_np[:, 0], X_np[:, 1], c=y_np, cmap='summer')
plt.scatter(X_test[:, 0], X_test[:, 1], c=pred_test, cmap='summer', s=40)
plt.xlabel('Income')
plt.ylabel('Debt')
plt.show()

## 1 Load data

Read the raw data. Get X and ground-truth y. Define a function converting x-sample into a 16x16 image (for visualisation)

In [None]:
import os
import urllib.request
data_filename = "ref/usps.train.txt"
if not os.path.exists(data_filename):
    urllib.request.urlretrieve(
        "https://dl.dropboxusercontent.com/s/yow02sbys5tzepe/usps.train.txt", 
        data_filename)

In [None]:
train_raw = np.genfromtxt(data_filename)

In [None]:
print (train_raw.shape)
# Let us investigate the data a bit
print(train_raw[0][0]) # seems to be labels
print(train_raw[0][1:].reshape((16,16)))# seems to be the data

In [None]:
# Load the data, and extract the samples with label 1 and 5.
# The image pixels => X, each row represents an image of 16x16=256 pixels
# The labels       => y.
i_1 = train_raw[:, 0] == 1
i_5 = train_raw[:, 0] == 5
i_1_5 = np.maximum(i_1, i_5)
X_raw, y = train_raw[i_1_5,1:], train_raw[i_1_5,0]

print ("Shape of data-X ", X_raw.shape)
print ("Shape of data-y ", y.shape)

# this function convert a row of 256 pixels to 16x16 image.
# google numpy reshape from more information about the function
def to_img(x):
    return x.reshape((16,16))

In [None]:
plt.imshow(to_img(X_raw[0]), cmap='gray', interpolation='nearest')

## 2 Prepare data

### 2.1 feature extraction

In [None]:
def compute_simple_feature(X_raw):
    x = []
    
    for x_ in X_raw:
        x_im = to_img(x_)
        x_im_hf = x_im[:,::-1]  # flip image horizontally
        asym_h = np.abs(x_im - x_im_hf).mean()
        
        # two features per image
        # 1. the mean brightness of the pixel
        # 2. the horizontal asymmetry 
        x.append([x_.mean(), asym_h])
        
    # asarray assembles a list into a numpy array
    # if the elements of the list are list themselves, 
    # we will have a 2D array
    return np.asarray(x)

In [None]:
X = compute_simple_feature(X_raw)

# If you want to see the data, uncomment the following and check some 100 samples, 
# scatter(X[:100,0], X[:100, 1], c=y[:100], cmap='summer')

In [None]:
print(X.shape)

## 3 Use scikit-learn to build a perceptron

In [None]:
# see above, the class template Perceptron instantiate a perceptron object
perc1 = Perceptron()

In [None]:
# using first 100 sample images and their labels to train the perceptron
TRAIN_NUM = 100
perc1.fit(X[:TRAIN_NUM], y[:TRAIN_NUM])

In [None]:
# this function draws the areas in 2D plane where the model predict to 1 and 0
# along with some sample images, predictions and target values
def draw_model_and_samples(X, targets, perc):
        preds = perc.predict(X)
        plt.clf() 
        
        #======== 
        # Draw the classification boundary, you can skip this block
        xx, yy = np.meshgrid(np.linspace(X[:,0].min()*1.05, X[:,0].max()*1.05, 100),
                             np.linspace(X[:,1].min()*1.05, X[:,1].max()*1.05, 100),
                             )
        zz = perc.predict(np.vstack([xx.flatten(), yy.flatten()]).T).reshape(xx.shape)
        plt.contourf(xx, yy, zz, cmap='summer', alpha=0.8)
        # ========
        
        # target values are indicated by the colours of the big circles
        plt.scatter(X[:, 0], X[:, 1], c=targets, s=128, cmap='summer') 
        # predicted values are indicated by the colours of the small circles
        plt.scatter(X[:, 0], X[:, 1], c=preds, s=36, cmap='summer')
        # so when the two circles have the same colour, the prediction is
        # correct.
        # Check http://scikit-learn.org/stable/modules/generated/sklearn.linear_model.Perceptron.html
        # for more information about the perceptron model as well as visualising data and model
        plt.grid('on')
        plt.title("Errors {} / {}".format(np.count_nonzero(preds!=targets), X.shape[0]))

In [None]:
# show perceptron prediction on the 50-th, 616-th, 700-th and 800-th sample
for sample_index in [50, 616, 700, 800]:
    plt.figure(1)
    plt.imshow(to_img(X_raw[sample_index]), cmap='gray', 
               interpolation='nearest')
    plt.figure(2)
    draw_model_and_samples(X, y, perc1)
    plt.plot(X[sample_index, 0], X[sample_index, 1], 'ro', ms=12)
    plt.show()
    _ = input("Enter for next")