In [1]:
import homework7_data as data
from collections import defaultdict

In [2]:
class BinaryPerceptron(object):

    def __init__(self, examples, iterations):
        weights = defaultdict(lambda:0) # weights initialized to zero
        for iters in range(iterations):
            for (data_point, true_label) in examples:
                dot_product = sum(weights[key]*data_point.get(key, 0) for key in weights)
                if dot_product > 0:
                    pred_label = True
                else:
                    pred_label = False
                if pred_label != true_label:
                    if true_label == True:
                        weights = {key: weights.get(key, 0) + data_point.get(key, 0)
                                  for key in set(weights) | set(data_point)}
                    else:
                        weights = {key: weights.get(key, 0) - data_point.get(key, 0)
                                  for key in set(weights) | set(data_point)}
        self.weights = weights

    def predict(self, x):
        weights = self.weights
        dot_product = sum(weights[key]*x.get(key, 0) for key in weights)
        if dot_product > 0:
            pred_label = True
        else:
            pred_label = False
        return pred_label

class MulticlassPerceptron(object):

    def __init__(self, examples, iterations):
        weight_dictionaries = defaultdict(lambda: defaultdict(lambda: 0)) # dictionary of dictionaries
        # first level corresponds to a label, second to its associated weights
        labels = []
        for example in examples:
            labels.append(example[1]) # example[0] is the instance vector
        labels = set(labels) # all possible labels in the data
        self.labels = labels 
        for iters in range(iterations):
            for (data_point, true_label) in examples:
                dot_products = defaultdict(lambda: 0) # initialize dot products to later take argmax over
                for label in labels:
                    weights = weight_dictionaries[label] # get the weight vector for this label
                    dot_products[label] = sum(weights[key]*data_point.get(key, 0) for key in weights)
                pred_label = max(dot_products, key = dot_products.get) # find label that produces largest dot product
                if pred_label != true_label:
                    true_label_weights = weight_dictionaries[true_label]
                    weight_dictionaries[true_label] = {key: true_label_weights.get(key, 0) + data_point.get(key, 0)
                                              for key in set(true_label_weights) | set(data_point)}
                    # increase weight for the correct label 
                    pred_label_weights = weight_dictionaries[pred_label]
                    weight_dictionaries[pred_label] = {key: pred_label_weights.get(key, 0) - data_point.get(key, 0)
                                              for key in set(pred_label_weights) | set(data_point)}
                    # decrease weight for the incorrect label
        self.weights = weight_dictionaries
        
    def predict(self, x):
        labels = self.labels
        weight_dictionaries = self.weights
        dot_products = defaultdict(lambda: 0)
        for label in labels:
            weights = weight_dictionaries[label]
            dot_products[label] = sum(weights[key]*x.get(key, 0) for key in weights)
        pred_label = max(dot_products, key = dot_products.get)
        return pred_label

In [3]:
class IrisClassifier(object):

    def __init__(self, data):
        iterations = 100
        train = []
        for (features, label) in data:
            feature_dict = {}
            for idx, feature in enumerate(features):
                feature_dict[idx] = feature
            train.append((feature_dict, label))
        self.p = MulticlassPerceptron(train, iterations)

    def classify(self, instance):
        p = self.p
        feature_dict = {}
        for idx, feature in enumerate(instance):
            feature_dict[idx] = feature
        return p.predict(feature_dict)

In [4]:
class DigitClassifier(object):

    def __init__(self, data):
        iterations = 10
        train = []
        for (features, label) in data:
            feature_dict = {}
            for idx, feature in enumerate(features):
                feature_dict[idx] = feature
            train.append((feature_dict, label))
        self.p = MulticlassPerceptron(train, iterations)

    def classify(self, instance):
        p = self.p
        feature_dict = {}
        for idx, feature in enumerate(instance):
            feature_dict[idx] = feature
        return p.predict(feature_dict)

In [5]:
class BiasClassifier(object):

    def __init__(self, data):
        iterations = 15
        train = []
        for (feature, label) in data:
            feature_dict = {}
            feature_dict['feature'] = feature
            feature_dict['bias'] = 1
            train.append((feature_dict, label))
        self.p = BinaryPerceptron(train, iterations)

    def classify(self, instance):
        p = self.p
        feature_dict = {}
        feature_dict['feature'] = instance
        feature_dict['bias'] = 1
        return p.predict(feature_dict)

In [6]:
class MysteryClassifier1(object):

    def __init__(self, data):
        iterations = 15
        train = []
        for (features, label) in data:
            feature_dict = {0: features[0], 1: features[1], 2: features[0]**2 + features[1]**2, 3: 1}
            train.append((feature_dict, label))
        self.p = MulticlassPerceptron(train, iterations)

    def classify(self, instance):
        p = self.p
        feature_dict = {0: instance[0], 1: instance[1], 2: instance[0]**2 + instance[1]**2, 3: 1}
        return p.predict(feature_dict)

In [7]:
class MysteryClassifier2(object):

    def __init__(self, data):
        iterations = 10
        train = []
        for (features, label) in data:
            feature_dict = {0: features[0], 1: features[1], 2: features[2],
                            3: features[0] * features[1] * features[2], 
                            4: 1}
            train.append((feature_dict, label))
        self.p = MulticlassPerceptron(train, iterations)

    def classify(self, instance):
        p = self.p
        feature_dict = {0: instance[0], 1: instance[1], 2: instance[2],
                        3: instance[0] * instance[1] * instance[2], 
                        4: 1}
        return p.predict(feature_dict)

In [8]:
data.iris

[((6.0, 2.2, 4.0, 1.0), 'iris-versicolor'),
 ((6.9, 3.1, 5.4, 2.1), 'iris-virginica'),
 ((5.5, 2.4, 3.7, 1.0), 'iris-versicolor'),
 ((6.3, 2.8, 5.1, 1.5), 'iris-virginica'),
 ((6.8, 3.0, 5.5, 2.1), 'iris-virginica'),
 ((6.3, 2.7, 4.9, 1.8), 'iris-virginica'),
 ((6.3, 3.4, 5.6, 2.4), 'iris-virginica'),
 ((5.9, 3.0, 4.2, 1.5), 'iris-versicolor'),
 ((6.4, 2.9, 4.3, 1.3), 'iris-versicolor'),
 ((5.7, 4.4, 1.5, 0.4), 'iris-setosa'),
 ((6.4, 3.2, 4.5, 1.5), 'iris-versicolor'),
 ((6.9, 3.2, 5.7, 2.3), 'iris-virginica'),
 ((6.1, 2.6, 5.6, 1.4), 'iris-virginica'),
 ((4.6, 3.4, 1.4, 0.3), 'iris-setosa'),
 ((6.5, 3.0, 5.5, 1.8), 'iris-virginica'),
 ((6.9, 3.1, 4.9, 1.5), 'iris-versicolor'),
 ((6.7, 2.5, 5.8, 1.8), 'iris-virginica'),
 ((5.5, 2.3, 4.0, 1.3), 'iris-versicolor'),
 ((7.7, 2.8, 6.7, 2.0), 'iris-virginica'),
 ((5.7, 2.6, 3.5, 1.0), 'iris-versicolor'),
 ((5.8, 2.8, 5.1, 2.4), 'iris-virginica'),
 ((6.3, 2.3, 4.4, 1.3), 'iris-versicolor'),
 ((7.7, 2.6, 6.9, 2.3), 'iris-virginica'),
 ((6.3, 