In [11]:
import numpy as np
import pandas as pd

In [12]:
# create data set
dataset = np.array([
    [1,1,0,1,1,1],
    [0,0,1,1,0,-1],
    [0,1,1,0,0,1],
    [1,0,0,1,0,-1],
    [1,0,1,0,1,1],
    [1,0,1,1,0,-1]
])

In [13]:
# the basic method, using threshold 0
def basicZeroThresh(dataset,weights, rate=.5):
    # assumes y is last column in row
    for row in dataset:
        data = row[:-1]
        target = row[len(row) - 1]

        a = np.dot(weights, data)

        # if a is positive and y is negative or the other way
        if np.sign(a) != np.sign(target) or np.sign(a) == 0:
            weights = (weights + np.multiply(rate,np.multiply(data,target))).tolist()

    return weights

In [14]:
weights = [0,0,0,0,0]
prevWeights = []
iters = 0

while (prevWeights != weights) and iters < 15:
    prevWeights = weights.copy()
    weights = basicZeroThresh(dataset,weights)
    iters += 1

print(f'iterations: {iters}')
print(weights)

iterations: 2
[0.0, 1.0, 0.0, -0.5, 0.5]


In [15]:
def winnow(dataset,weights,theta):
    for row in dataset:
        # trim off target 
        data = row[:-1]
        
        # put target in variable
        target = row[len(row) - 1]
        
        # get dot product
        a = np.dot(weights, data)
        
        # if all good, just pass
        if (a > theta and np.sign(target) == 1) or (a < theta and np.sign(target) == -1):
            pass
        elif a <= theta and np.sign(target) == 1:
            # adjust weights for features with a 1
            for i,w in enumerate(weights):
                if data[i] == 1:
                    weights[i] = weights[i]*2
        elif a >= theta and np.sign(target) == -1:
            # adjust weights with feature of 1
            for i,w in enumerate(weights):
                if data[i] == 1:
                    weights[i] = weights[i]/2
            
    return weights

In [16]:
weights = [1,1,1,1,1]
prevWeights = []
iters = 0

while (prevWeights != weights) and iters < 15:
    prevWeights = weights.copy()
    weights = winnow(dataset,weights,5)
    iters += 1

print(f'iterations: {iters}')
print(weights)

iterations: 3
[1.0, 8, 2.0, 0.5, 4]


In [17]:
def variableThreshWithBasic(dataset,weights,rate=0.5):
    
    # again assume last column is our target
    for row in dataset:
        # trim off target variable
        data = row[:-1]
        
        # add threshold, initialized to -1
        data = np.append(data, [-1])
        
        # from our row, grab the target, assumed to be last value in row
        target = row[len(row) - 1]
        
        # calculate a
        a = np.dot(weights, data)
        
        if np.sign(a) != np.sign(target) or np.sign(a) == 0:
            weights = (weights + np.multiply(rate,np.multiply(data,target))).tolist()
    
    return weights

In [18]:
# adjust weight vector
weights = [1,1,1,1,1,1]
prevWeights = []
iters = 0

while (prevWeights != weights) and iters < 15:
    prevWeights = weights.copy()
    weights = variableThreshWithBasic(dataset,weights)
    iters += 1

print(f'iterations: {iters}')
print(weights)

iterations: 3
[0.5, 2.0, 0.5, 0.0, 1.5, 1.5]


In [19]:
def variableThreshWithWinnow(dataset,weights):
    #weights.append(theta)
    
    for row in dataset:
        data = row[:-1]
        data = np.append(data, [-1])
        
        target = row[len(row) - 1]
        
        a = np.dot(weights, data)
        
        if (a > 0 and np.sign(target) == 1) or (a <= 0 and np.sign(target) == -1):
            pass
        elif a <= 0 and np.sign(target) == 1:
            for i,w in enumerate(weights):
                if (i+1) == len(weights):
                    weights[i] = weights[i]/2
                else:
                    if data[i] == 1:
                        weights[i] = weights[i]*2
        elif a > 0 and np.sign(target) == -1:
            for i,w in enumerate(weights):
                if (i+1) == len(weights):
                    weights[i] = weights[i]*2
                else:
                    if data[i] == 1:
                        weights[i] = weights[i]/2
    
    return weights

In [20]:
# adjust weight vector
weights = [1,1,1,1,1,1]
prevWeights = []
iters = 0

while (prevWeights != weights) and iters < 15:
    prevWeights = weights.copy()
    weights = variableThreshWithWinnow(dataset,weights)
    iters += 1

print(f'iterations: {iters}')
print(weights)

iterations: 2
[0.5, 2, 1.0, 0.25, 1, 2.0]


# EXERCISE 12.2.1

In [21]:
#### MODIFIED TRAINING SET
dataset = np.array([
    [1,1,0,1,1,1],
    [0,0,1,1,1,-1], # example b includes 'nigeria'
    [0,1,1,0,0,1],
    [1,0,0,1,0,-1],
    [1,0,1,0,1,1],
    [1,0,1,1,0,-1]
])

## EXERCISE 12.2.1 (c: the basic method)

In [22]:
# adjust weight vector
weights = [1,1,1,1,1,1]
prevWeights = []
iters = 0

while (prevWeights != weights) and iters < 15:
    prevWeights = weights.copy()
    weights = variableThreshWithBasic(dataset,weights)
    iters += 1

print(f'iterations: {iters}')
print(weights)

iterations: 3
[0.5, 2.5, 0.5, -1.0, 1.0, 1.5]


## EXERCISE 12.2.1 (d: the winnow method)

In [25]:
# adjust weight vector
weights = [1,1,1,1,1,1]
prevWeights = []
iters = 0

while (prevWeights != weights) and iters < 150:
    prevWeights = weights.copy()
    weights = variableThreshWithWinnow(dataset,weights)
    iters += 1

print(f'iterations: {iters}')
print(weights)

iterations: 108
[33554432.0, 268435456, 1.4901161193847656e-08, 1.7105694144590052e-49, 134217728.0, 134217728.0]


# EXERCISE 12.3.2 (which vectors are support vectors)

In [26]:
# import library
from sklearn import svm

# built sample data set
X = [[3,4,5],[2,7,2],[5,5,5],[1,2,3],[3,3,2],[2,4,1]]
y = [1,1,1,-1,-1,-1]

# create model object
clf = svm.SVC()

# fit sample dataset 
clf.fit(X, y)

# get support vectors
clf.support_vectors_

array([[1., 2., 3.],
       [3., 3., 2.],
       [2., 4., 1.],
       [3., 4., 5.],
       [2., 7., 2.],
       [5., 5., 5.]])

In [27]:
### Appears all 6 of our vectors are support vectors

# EXERCISE 12.4.3: (a)

In [46]:

def find_nearest(data, targets, value):
    array = np.asarray(data)
    idx = (np.abs(array - value)).argmin()
    return targets[idx]

points = np.array([[1],[2],[4],[8],[16],[32]])
targets = np.array([[1],[2],[3],[4],[5],[6]])

value = 28

print(find_nearest(points, targets, value))

[6]


# EXERCISE Exercise 12.4.3: (b)

In [56]:
points = np.array([[1],[2],[4],[8],[16],[32]])
targets = np.array([[1],[2],[3],[4],[5],[6]])

closest_below = max(np.nonzero(points<=28)[0])
closest_above = min(np.nonzero(points>=28)[0])

(closest_above+closest_below)/2


4.5