# Homework 3 Part 1
The code block below contains required code for the homework

In [21]:
# Implement perceptron, average perceptron, and pegasos
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import colors
import pdb
import itertools
import operator
import functools

print("Importing code_for_hw03")

######################################################################
# Plotting

def tidy_plot(xmin, xmax, ymin, ymax, center = False, title = None,
                 xlabel = None, ylabel = None):
    plt.ion()
    plt.figure(facecolor="white")
    ax = plt.subplot()
    if center:
        ax.spines['left'].set_position('zero')
        ax.spines['right'].set_color('none')
        ax.spines['bottom'].set_position('zero')
        ax.spines['top'].set_color('none')
        ax.spines['left'].set_smart_bounds(True)
        ax.spines['bottom'].set_smart_bounds(True)
        ax.xaxis.set_ticks_position('bottom')
        ax.yaxis.set_ticks_position('left')
    else:
        ax.spines["top"].set_visible(False)    
        ax.spines["right"].set_visible(False)    
        ax.get_xaxis().tick_bottom()  
        ax.get_yaxis().tick_left()
    eps = .05
    plt.xlim(xmin-eps, xmax+eps)
    plt.ylim(ymin-eps, ymax+eps)
    if title: ax.set_title(title)
    if xlabel: ax.set_xlabel(xlabel)
    if ylabel: ax.set_ylabel(ylabel)
    return ax

def plot_separator(ax, th, th_0):
    xmin, xmax = ax.get_xlim()
    ymin,ymax = ax.get_ylim()
    pts = []
    eps = 1.0e-6
    # xmin boundary crossing is when xmin th[0] + y th[1] + th_0 = 0
    # that is, y = (-th_0 - xmin th[0]) / th[1]
    if abs(th[1,0]) > eps:
        pts += [np.array([x, (-th_0 - x * th[0,0]) / th[1,0]]) \
                                                        for x in (xmin, xmax)]
    if abs(th[0,0]) > 1.0e-6:
        pts += [np.array([(-th_0 - y * th[1,0]) / th[0,0], y]) \
                                                         for y in (ymin, ymax)]
    in_pts = []
    for p in pts:
        if (xmin-eps) <= p[0] <= (xmax+eps) and \
           (ymin-eps) <= p[1] <= (ymax+eps):
            duplicate = False
            for p1 in in_pts:
                if np.max(np.abs(p - p1)) < 1.0e-6:
                    duplicate = True
            if not duplicate:
                in_pts.append(p)
    if in_pts and len(in_pts) >= 2:
        # Plot separator
        vpts = np.vstack(in_pts)
        ax.plot(vpts[:,0], vpts[:,1], 'k-', lw=2)
        # Plot normal
        vmid = 0.5*(in_pts[0] + in_pts[1])
        scale = np.sum(th*th)**0.5
        diff = in_pts[0] - in_pts[1]
        dist = max(xmax-xmin, ymax-ymin)
        vnrm = vmid + (dist/10)*(th.T[0]/scale)
        vpts = np.vstack([vmid, vnrm])
        ax.plot(vpts[:,0], vpts[:,1], 'k-', lw=2)
        # Try to keep limits from moving around
        ax.set_xlim((xmin, xmax))
        ax.set_ylim((ymin, ymax))
    else:
        print('Separator not in plot range')

def plot_data(data, labels, ax = None, clear = False,
                  xmin = None, xmax = None, ymin = None, ymax = None):
    if ax is None:
        if xmin == None: xmin = np.min(data[0, :]) - 0.5
        if xmax == None: xmax = np.max(data[0, :]) + 0.5
        if ymin == None: ymin = np.min(data[1, :]) - 0.5
        if ymax == None: ymax = np.max(data[1, :]) + 0.5
        ax = tidy_plot(xmin, xmax, ymin, ymax)

        x_range = xmax - xmin; y_range = ymax - ymin
        if .1 < x_range / y_range < 10:
            ax.set_aspect('equal')
        xlim, ylim = ax.get_xlim(), ax.get_ylim()
    elif clear:
        xlim, ylim = ax.get_xlim(), ax.get_ylim()
        ax.clear()
    else:
        xlim, ylim = ax.get_xlim(), ax.get_ylim()
    colors = np.choose(labels > 0, cv(['r', 'g']))[0]
    ax.scatter(data[0,:], data[1,:], c = colors,
                    marker = 'o', s=50, edgecolors = 'none')
    # Seems to occasionally mess up the limits
    ax.set_xlim(xlim); ax.set_ylim(ylim)
    ax.grid(True, which='both')
    #ax.axhline(y=0, color='k')
    #ax.axvline(x=0, color='k')
    return ax

# Must either specify limits or existing ax
def plot_nonlin_sep(predictor, ax = None, xmin = None , xmax = None,
                        ymin = None, ymax = None, res = 30):
    if ax is None:
        ax = tidy_plot(xmin, xmax, ymin, ymax)
    else:
        if xmin == None:
            xmin, xmax = ax.get_xlim()
            ymin, ymax = ax.get_ylim()
        else:
            ax.set_xlim((xmin, xmax))
            ax.set_ylim((ymin, ymax))

    cmap = colors.ListedColormap(['black', 'white'])
    bounds=[-2,0,2]
    norm = colors.BoundaryNorm(bounds, cmap.N)            
            
    ima = np.array([[predictor(x1i, x2i) \
                         for x1i in np.linspace(xmin, xmax, res)] \
                         for x2i in np.linspace(ymin, ymax, res)])
    im = ax.imshow(np.flipud(ima), interpolation = 'none',
                       extent = [xmin, xmax, ymin, ymax],
                       cmap = cmap, norm = norm)

######################################################################
#   Utilities

# Takes a list of numbers and returns a column vector:  n x 1
def cv(value_list):
    return np.transpose(rv(value_list))

# Takes a list of numbers and returns a row vector: 1 x n
def rv(value_list):
    return np.array([value_list])

# x is dimension d by 1
# th is dimension d by 1
# th0 is a scalar
# return a 1 by 1 matrix
def y(x, th, th0):
   return np.dot(np.transpose(th), x) + th0

# x is dimension d by 1
# th is dimension d by 1
# th0 is dimension 1 by 1
# return 1 by 1 matrix of +1, 0, -1
def positive(x, th, th0):
   return np.sign(y(x, th, th0))

# data is dimension d by n
# labels is dimension 1 by n
# ths is dimension d by 1
# th0s is dimension 1 by 1
# return 1 by 1 matrix of integer indicating number of data points correct for
# each separator.
def score(data, labels, th, th0):
   return np.sum(positive(data, th, th0) == labels)

######################################################################
#   Data Sets

# Return d = 2 by n = 4 data matrix and 1 x n = 4 label matrix
def super_simple_separable_through_origin():
    X = np.array([[2, 3, 9, 12],
                  [5, 1, 6, 5]])
    y = np.array([[1, -1, 1, -1]])
    return X, y

def super_simple_separable():
    X = np.array([[2, 3, 9, 12],
                  [5, 2, 6, 5]])
    y = np.array([[1, -1, 1, -1]])
    return X, y

def xor():
    X = np.array([[1, 2, 1, 2],
                  [1, 2, 2, 1]])
    y = np.array([[1, 1, -1, -1]])
    return X, y

def xor_more():
    X = np.array([[1, 2, 1, 2, 2, 4, 1, 3],
                  [1, 2, 2, 1, 3, 1, 3, 3]])
    y = np.array([[1, 1, -1, -1, 1, 1, -1, -1]])
    return X, y

######################################################################
#   Tests for part 2:  features

# Make it take miscellaneous args and pass into learner
def test_linear_classifier_with_features(dataFun, learner, feature_fun,
                             draw = True, refresh = True, pause = True):
    raw_data, labels = dataFun()
    data = feature_fun(raw_data) if feature_fun else raw_data
    if draw:
        ax = plot_data(raw_data, labels)
        def hook(params):
            (th, th0) = params
            plot_nonlin_sep(
                lambda x1,x2: int(positive(feature_fun(cv([x1, x2])), th, th0)),
                ax = ax)
            plot_data(raw_data, labels, ax)
            plt.pause(0.05)
            print('th', th.T, 'th0', th0)
            if pause: input('press enter here to continue:')
    else:
        hook = None
    th, th0 = learner(data, labels, hook = hook)
    if hook: hook((th, th0))
    print("Final score", int(score(data, labels, th, th0)))
    print("Params", np.transpose(th), th0)

def mul(seq):
    return functools.reduce(operator.mul, seq, 1)

def make_polynomial_feature_fun(order):
    # raw_features is d by n
    # return is k by n where k = sum_{i = 0}^order  multichoose(d, i)
    def f(raw_features):
        d, n = raw_features.shape
        result = []   # list of column vectors
        for j in range(n):
            features = []
            for o in range(order+1):
                indexTuples = \
                          itertools.combinations_with_replacement(range(d), o)
                for it in indexTuples:
                    features.append(mul(raw_features[i, j] for i in it))
            result.append(cv(features))
        return np.hstack(result)
    return f

def test_with_features(dataFun, order = 2, draw=True, pause=True):
    test_linear_classifier_with_features(
        dataFun,                        # data
        perceptron,                     # learner
        make_polynomial_feature_fun(order), # feature maker
        draw=draw,
        pause=pause)

# Perceptron algorithm with offset.
# data is dimension d by n
# labels is dimension 1 by n
# T is a positive integer number of steps to run
def perceptron(data, labels, params = {}, hook = None):
    T = params.get('T', 100)
    (d, n) = data.shape
    m = 0
    theta = np.zeros((d, 1)); theta_0 = np.zeros((1, 1))
    dataset_margin = 1
    for t in range(T):
        dataset_margin = 1
        for i in range(n):
            x = data[:,i:i+1]
            y = labels[:,i:i+1]
            dataset_margin = min(dataset_margin, y * positive(x, theta, theta_0))
            if y * positive(x, theta, theta_0) <= 0.0:
                m += 1
                theta = theta + y * x
                theta_0 = theta_0 + y
                if hook: hook((theta, theta_0))
        if dataset_margin > 0:
            break

    return theta, theta_0, dataset_margin, m




######################################################################
#   Tests for part 2D:  Encoding discrete values

def one_hot_internal(x, k):
    # Make an empty column vector
    v = np.zeros((k, 1))
    # Set an entry to 1
    v[x-1, 0] = 1
    return v

def test_one_hot(sub):
    if one_hot_internal(3, 5).tolist() == sub(3, 5).tolist() and one_hot_internal(4, 7).tolist() == sub(4, 7).tolist():
        print("Passed! \n")
    else: print("Test Failed")

#-----------------------------------------------------------------------------
print("Imported tidy_plot, plot_separator, plot_data, plot_nonlin_sep, cv, rv, y, positive, score")
print("Datasets: super_simple_separable_through_origin(), super_simple_separable(), xor(), xor_more()")
print("Tests for part 2: test_linear_classifier_with_features, mul, make_polynomial_feature_fun, ")
print("                  test_with_features")
print("Also loaded: perceptron, one_hot_internal, test_one_hot")

######################################################################
#   Example for part 3B) test_with_features()
#test_with_features(super_simple_separable, 2, draw=True, pause=True)


Importing code_for_hw03
Imported tidy_plot, plot_separator, plot_data, plot_nonlin_sep, cv, rv, y, positive, score
Datasets: super_simple_separable_through_origin(), super_simple_separable(), xor(), xor_more()
Tests for part 2: test_linear_classifier_with_features, mul, make_polynomial_feature_fun, 
                  test_with_features
Also loaded: perceptron, one_hot_internal, test_one_hot


# Homework Code
Code block below contains code written for homework

In [22]:
data = np.array([[200, 800, 200, 800],
             [0.2,  0.2,  0.8,  0.8]])
labels = np.array([[-1, -1, 1, 1]])

# DON'T RUN: TOTAL MISTAKES: 666696
# _, _, _, mistakes = perceptron(data, labels, params={"T": 400000})
mistakes = 666696
print("Mistakes made: " + str(mistakes))

Mistakes made: 666696


In [23]:
data_1 = data * np.array([[0.001],[1]])
_, _, _, mistakes = perceptron(data_1, labels)

print("Mistakes made: " + str(mistakes))


Mistakes made: 7


In [24]:
data = np.array([[2, 3,  4,  5]])
labels = np.array([[1, 1, -1, -1]])

th, th_0, _, _ = perceptron(data, labels)
print("theta: ", th)
print("theta_0: ",th_0)

theta:  [[-2.]]
theta_0:  [[7.]]


In [25]:
def one_hot(x, k):
    vec = np.zeros((k, 1))
    vec[x-1] = 1
    return vec

print(one_hot(2, 10))

[[0.]
 [1.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]]


In [26]:
def one_hot_arr(data, k):
    d, n = data.shape
    one_hot_data = []
    for i in range(n):
        v_d = data[:,i]
        v = one_hot(v_d, k)
        one_hot_data.append(v[:, 0])
    
    one_hot_data = np.array(one_hot_data)
    return one_hot_data.T

In [27]:
one_hot_data = one_hot_arr(data, 6)
th, th_0, _, _ = perceptron(one_hot_data, labels)
print(th, th_0)

[[ 0.]
 [ 2.]
 [ 1.]
 [-2.]
 [-1.]
 [ 0.]] [[0.]]


In [28]:
new_data = np.array([[1,2,3,4,5,6]])
new_labels = np.array([[1,1,-1,-1,1,1]])
one_hot_new_data = one_hot_arr(new_data, 6)

th, th_0, _, _ = perceptron(one_hot_new_data, new_labels, params={"T": 200})
print(th, th_0)

[[ 1.]
 [ 1.]
 [-2.]
 [-2.]
 [ 1.]
 [ 1.]] [[0.]]


In [29]:
deg = [1,10,20,30,40,50]
feature_len = []
for d in deg:
    x = np.array([[1,2]]).T
    feat_fn = make_polynomial_feature_fun(d)
    feature_len.append(len(feat_fn(x)))
print("Number of features: ", feature_len)

Number of features:  [3, 66, 231, 496, 861, 1326]


Training for the `super_simple_separable_through_origin` dataset

In [30]:
data, labels = super_simple_separable_through_origin()
th, th0, margin_1, mistake_1 = perceptron(data, labels)
print("th: ", th, "th_0: ", th0)
print("margin: ", margin_1, ",mistakes: ", mistake_1)

th:  [[-9.]
 [18.]] th_0:  [[2.]]
margin:  1 ,mistakes:  12


Training for the `super_simple_separable` dataset

In [31]:
data, labels = super_simple_separable()
th, th0, margin_2, mistake_2 = perceptron(data, labels)
print("th: ", th, "th_0: ", th0)
print("margin: ", margin_2, ",mistakes: ", mistake_2)

th:  [[-24.]
 [ 37.]] th_0:  [[-3.]]
margin:  1 ,mistakes:  47


Training for the `xor` dataset

In [38]:
data_xor, labels_xor = xor()
# Use polynomial feature mapping of with degree 2
th, th0, margin_3, mistake_3 = perceptron(make_polynomial_feature_fun(2)(data_xor), labels_xor)
print("th: ", th, "th_0: ", th0)
print("margin: ", margin_3, ",mistakes: ", mistake_3)

th:  [[ 1.]
 [-1.]
 [-1.]
 [-5.]
 [11.]
 [-5.]] th_0:  [[1.]]
margin:  1 ,mistakes:  65


Training for the `xor_more` dataset

In [48]:
data_xor_more, labels_xor_more = xor_more()
# Use polynomial feature mapping of with degree 3
th, th0, margin_4, mistake_4 = perceptron(\
                                          make_polynomial_feature_fun(3)(data_xor_more), \
                                          labels_xor_more, \
                                         params={"T": 50000})
print("th: ", th, "th_0: ", th0)
print("margin: ", margin_4, ",mistakes: ", mistake_4)

th:  [[ -78.]
 [  28.]
 [ -39.]
 [  72.]
 [ 248.]
 [ -19.]
 [  76.]
 [-522.]
 [ 476.]
 [-153.]] th_0:  [[-78.]]
margin:  1 ,mistakes:  2202
