In [1]:
# Using 10C2 Binary Classifiers with Majority Voting

In [2]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import LogisticRegression
from sklearn import metrics

In [3]:
def read_data(filename):
    with open(filename, 'r') as f:
        lines = f.readlines()
    
    num_points = len(lines)
    dim_points = 28 * 28
    data = np.empty((num_points, dim_points))
    labels = np.empty(num_points)
    
    for ind, line in enumerate(lines):
        num = line.split(',')
        labels[ind] = int(num[0])
        data[ind] = [ int(x) for x in num[1:] ]
        
    return (data, labels)

train_data, train_labels = read_data("sample_train.csv")
test_data, test_labels = read_data("sample_test.csv")

In [4]:
N = np.shape(train_data)[0]
pairs = []
binaries = []
#print(N)

In [5]:
for i in range(10):
    for j in range(i):
        pairs.append( (i, j) )
        
#print(pairs)

In [6]:
for pair in pairs:
    #print(pair)
    i = pair[0]
    j = pair[1]

    datapoints = []
    labels = []
    see = 0
    for iterator in range(N):
        if int(train_labels[iterator]) == int(i):
            datapoints.append(train_data[iterator])
            labels.append(1)
            see += 1

        elif int(train_labels[iterator]) == int(j):
            datapoints.append(train_data[iterator])
            labels.append(-1)
            see += 1

    log_reg = LogisticRegression(solver = 'liblinear', max_iter = 100000)
    clf = log_reg.fit(datapoints, labels)
    binaries.append(clf)
    #print(see)

In [7]:
predictions = []
correct = 0
T = len(test_data)

In [8]:
for iterator in range(T):
    index = 0
    frequencies = np.zeros(10)
    for pair in pairs:
        i = pair[0]
        j = pair[1]

        predict = binaries[index].predict(test_data[iterator].reshape(1, -1))
        index += 1
        if(predict[0] == 1):
            frequencies[i] += 1
        else:
            frequencies[j] += 1

    predicted = np.argmax(frequencies)

    if predicted == test_labels[iterator]:
        correct += 1
        
    predictions.append(predicted)

In [9]:
accuracy = str((correct / T) * 100)
print("Accuracy of Pairwise Binary Classifiers with Majority Voting:", accuracy, "%")

print("Confusion Matrix:")
mat = metrics.confusion_matrix(test_labels, predictions)
print(mat)

Accuracy of Pairwise Binary Classifiers with Majority Voting: 90.10000000000001 %
Confusion Matrix:
[[ 98   0   0   0   0   1   1   0   0   0]
 [  0 100   0   0   0   0   0   0   0   0]
 [  0   1  91   0   1   1   0   2   2   2]
 [  0   0   2  91   0   4   1   1   1   0]
 [  0   0   0   0  94   0   2   0   1   3]
 [  1   1   0   3   0  86   1   0   7   1]
 [  3   0   4   0   0   1  92   0   0   0]
 [  0   2   6   0   2   0   0  83   0   7]
 [  1   0   2   5   2   3   0   3  82   2]
 [  0   1   0   2   5   0   0   7   1  84]]


In [None]:
# Thank You ^_^