In [1]:
import numpy as np
import pandas as pd

In [2]:
train = np.loadtxt('pa3train.txt')
test = np.loadtxt('pa3test.txt')
dictionary = pd.read_csv('pa3dictionary.txt', header = None)[0].tolist()
train = pd.DataFrame(train, columns = dictionary + ['Target'])
test = pd.DataFrame(test, columns = dictionary + ['Target'])

In [3]:
train_1_2 = train[(train['Target']==1) | (train['Target']==2)]
test_1_2 = test[(test['Target']==1) | (test['Target']==2)]

In [4]:
X_train = train.iloc[:, :-1]
y_train = train.iloc[:, -1]
X_test = test.iloc[:, :-1]
y_test = test.iloc[:, -1]
X_train_1_2 = train_1_2.iloc[:, :-1]
y_train_1_2 = train_1_2.iloc[:, -1]
X_test_1_2 = test_1_2.iloc[:, :-1]
y_test_1_2 = test_1_2.iloc[:, -1]

In [5]:
class Perceptron():
    def __init__(self, iterations = 1, kind = 'perceptron'):
        self.w = None
        self.classes = None
        self.iterations = iterations
        self.kind = kind
        self.cm = None
        
    def fit(self, X, y):
        self.classes = sorted(np.unique(y))
        c = self.classes[0]
        yi = y.apply(lambda x: 1 if x == c else -1)
        if self.kind == 'perceptron':
            self.w = np.zeros(X.shape[1])
            for _ in range(self.iterations):
                for i in range(X.shape[0]):
                    if yi.iloc[i] * (self.w.dot(X.iloc[i])) <= 0:
                        self.w = self.w + yi.iloc[i] * X.iloc[i]

        elif self.kind == 'voted' or self.kind == 'average':
            cm = 1
            w = np.zeros_like(X.iloc[0])
            w_list = [(w,cm)]

            for _ in range(self.iterations):
                for t in range(X.shape[0]):
                    if yi.iloc[t]*w.dot(X.iloc[t])<=0:
                        w_list.append((w,cm))

                        w = w+yi.iloc[t]*X.iloc[t]
                        cm = 1
                    else:
                        cm +=1

            self.w = w_list
            
                
                        
                    
    def predict(self, X):
        if self.kind == 'perceptron':
            return pd.Series(self.w.dot(X.T), index = X.index)\
                        .apply(lambda x: self.classes[0] if x >= 0 else self.classes[1])
        elif self.kind == 'voted':
            pred = 0
    
            for i in range(len(self.w)):
                w,c = self.w[i]
                pred += c*np.sign(X.dot(w))

            return pd.Series(np.sign(pred), index = X.index)\
                        .apply(lambda x: self.classes[0] if x >= 0 else self.classes[1])
        
        elif self.kind == 'average':
            pred = 0
    
            for i in range(len(self.w)):
                w,c = self.w[i]
                pred += X.dot(c*w)

            return pd.Series(np.sign(pred), index = X.index)\
                        .apply(lambda x: self.classes[0] if x >= 0 else self.classes[1])
            
                
    def score(self, X, y):
        return (self.predict(X) == y.values).mean()

# Question 1

In [6]:
output = pd.DataFrame()
for j in ['perceptron', 'voted', 'average']:
    train_err = []
    test_err = []
    for i in range(1, 5):
        clf = Perceptron(i, j)
        clf.fit(X_train_1_2, y_train_1_2)
        train_err += [1 - clf.score(X_train_1_2, y_train_1_2)]
        test_err += [1 - clf.score(X_test_1_2, y_test_1_2)]
    output[j,'training_error'] = train_err
    output[j,'test_error'] = test_err
output.index = range(1,5)
output.index.name = 'passes'

In [7]:
output

Unnamed: 0_level_0,"(perceptron, training_error)","(perceptron, test_error)","(voted, training_error)","(voted, test_error)","(average, training_error)","(average, test_error)"
passes,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1,0.038532,0.05305,0.065138,0.087533,0.077064,0.116711
2,0.03578,0.061008,0.038532,0.061008,0.051376,0.082228
3,0.018349,0.045093,0.027523,0.045093,0.034862,0.061008
4,0.016514,0.045093,0.022936,0.045093,0.031193,0.050398


# Question 2

In [8]:
clf = Perceptron(3, 'average')

In [9]:
clf.fit(X_train_1_2, y_train_1_2)

In [10]:
w_avg = np.zeros_like(X_train.iloc[0])
for i in range(len(clf.w)):
    w,c = clf.w[i]
    w_avg+=c*w
w_avg = w_avg.to_frame().reset_index()
w_avg.columns = ['word', 'value']
w_avg.index.name = 'index'

In [12]:
# highest three
print('words with highest coordinates')
display(w_avg.sort_values(by = 'value', ascending = False)[:3])

# lowest three
print('words with lowest coordinates')
display(w_avg.sort_values(by = 'value')[:3])

words with highest coordinates


Unnamed: 0_level_0,word,value
index,Unnamed: 1_level_1,Unnamed: 2_level_1
438,file,391671.0
466,program,238641.0
203,line,144297.0


words with lowest coordinates


Unnamed: 0_level_0,word,value
index,Unnamed: 1_level_1,Unnamed: 2_level_1
78,he,-228772.0
469,team,-124122.0
393,game,-115906.0


In [13]:
clf.classes

[1.0, 2.0]

# Question 3

In [60]:
C_list = []
for i in range(1,7):
    y = y_train.apply(lambda x: x if x == i else 99)
    clf = Perceptron(1, 'perceptron')
    clf.fit(X_train,y)
    C_list += [clf]

In [73]:
pred = np.zeros([y_test.shape[0], 6])
counter = 0
for i in C_list:
    pred[:,counter] = i.predict(X_test)
    counter += 1
np.place(pred, pred == 99, 0)

In [86]:
prediction = pd.DataFrame(pred).apply(lambda x: max(np.unique(x)) if len(np.unique(x)) == 2 else "Don't Know", axis = 1)

In [103]:
def confusion_matrix(y,predicted):
    df = pd.DataFrame({
    'predicted':prediction,
    'actual': y_test
    })
    
    return df.groupby('actual')['predicted'].value_counts(normalize = True).unstack().T.fillna(0)

In [105]:
confused_train = confusion_matrix(y_train,prediction)
display(confused_train)

actual,1.0,2.0,3.0,4.0,5.0,6.0
predicted,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1.0,0.718919,0.010417,0.034286,0.021739,0.0,0.0
2.0,0.010811,0.65625,0.034286,0.027174,0.012821,0.018519
3.0,0.0,0.015625,0.371429,0.0,0.0,0.027778
4.0,0.016216,0.005208,0.0,0.684783,0.0,0.0
5.0,0.016216,0.03125,0.074286,0.005435,0.801282,0.12037
6.0,0.005405,0.010417,0.034286,0.0,0.070513,0.5
Don't Know,0.232432,0.270833,0.451429,0.26087,0.115385,0.333333


- a. Based on the confusion matrix above, the perceptron classifier has the highest accuracy for the testing set is class 5

- b. The perceptron classifier has the least accuracy for the testing set is class 3

- c. The classes that perceptron often mistakely classfies is in class 6 as 5