In [1]:
import matplotlib.pyplot as plt
from sklearn import datasets
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import PolynomialFeatures
import numpy as np
from math import exp
import io
from sklearn.cross_validation import train_test_split
from sklearn.cross_validation import KFold

In [2]:
filename = "data_banknote_authentication.txt"
r = io.open(filename, encoding='utf8').readlines()
X = []
Y = []
for i in r:
    x = i.split(',')
    for j in range(0,len(x)-1):
        try:
            x[j] = float(x[j])
        except ValueError:
            x[j] = 0.0
    X.append(map(float,x[0:len(x)-1]))
    Y.append(float(x[-1]))

In [3]:
X = np.array(X)
Y = np.array(Y)
p = np.random.permutation(len(X))
X = X[p]
Y = Y[p]

In [4]:
def ycap(Theta,x):
    #sigmoid = []
    #for i in range(len(x)):
    v1 = 1 + exp(-np.dot(Theta.transpose(),x))
    sigmoid = 1 / v1
        #sigmoid.append(v2)
    return sigmoid

In [5]:
def decent(y,x,maxiterations = 100):
    eta = 0.0001
    v = 0
    iterations = 0
    m,n = np.shape(x)
    #Theta = [0.01, 0.01, 0.01, 0.01]
    Theta =  np.ones(n)
    #v=0
    while(iterations<maxiterations):
        sumv =0
        for i in range(x.shape[0]):
            hypo = ycap(Theta,x[i]) #using ycap(); hypothesis
            v = hypo - y[i]
            #print x[i]
            sumv += (v*x[i])
            #sumv = (v*x[i])
        newTheta = Theta - (eta * sumv)
        iterations+=1
        Theta = newTheta
    return Theta

In [6]:
def classify(theta,x):
    if ycap(theta,x) > 0.5:
        return 1
    else:
        return 0

In [7]:
def classifyAll(Y,X,X1):
    classifier_predict =[]
    ntheta = decent(Y,X)
    for i in range(0,X1.shape[0]):
        classifier_predict.append(classify(ntheta,X1[i]))
    print classifier_predict
    return classifier_predict

#classifier_predict =[]
#ntheta = decent(Y,X)
#for i in range(len(Y)):
#    classifier_predict.append(classify(ntheta,X[i]))
#print classifier_predict

In [8]:
def accuracy(cm):
    return np.trace(cm)*1.0/np.sum(cm)

In [9]:
def precision(cm):
    prec = []
    for i in range(len(cm)):
        for j in range(len(cm)):
            if i == j:
                prec.append(cm[i][j]*1.0/np.sum(cm[i][j],axis=0))
    return prec

In [10]:
def recall(cm):
    rec = []
    for i in range(len(cm)):
        for j in range(len(cm)):
            if i == j:
                rec.append(cm[i][j]*1.0/np.sum(cm[i][j]))
    return rec

In [11]:
def FM(prec,rec):
    f1 = []
    for i,j in zip(prec,rec):
        f1.append(2.0*(i*j)/(i+j))
    return f1

In [12]:
def confusionmatrix(y,classifier_pred):
    clabels = [0,1]
    cm_fold = []
    for i in clabels:
        tmp =[0]*len(clabels)
        for j in range(len(y)):
            #print j
            if y[j] == i and y[j] == classifier_pred[j]:
                tmp[clabels.index(i)] += 1
            elif y[j] == i and y[j] != classifier_pred[j]:
                tmp[clabels.index(classifier_pred[j])] += 1
        cm_fold.append(tmp)
    return cm_fold

In [13]:
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.33, random_state=42)

labels = classifyAll(y_train,X_train,X_test)

#classes = np.unique(Y)

cm = confusionmatrix(y_test,labels)

#cm = find_confusion_matrix(y_test,labels)

acc = accuracy(cm)

print cm

print acc

[0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1, 0, 0, 1, 1, 1, 0, 1, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0, 1, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 1, 0, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 

In [14]:
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.33, random_state=42)

labels = classifyAll(y_train,X_train,X_test)

cm = confusionmatrix(y_test,labels)


acc = accuracy(cm)
prec = precision(cm)
rec = recall(cm)
fmeasure = FM(prec,rec)

print cm
print acc
print prec
print rec
print fmeasure

[0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1, 0, 0, 1, 1, 1, 0, 1, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0, 1, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 1, 0, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 

In [15]:
classifier_predict =[]
ntheta = decent(Y,X)
for i in range(len(Y)):
    classifier_predict.append(classify(ntheta,X[i]))
#print classifier_predict

cmo = confusionmatrix(Y,classifier_predict)

#cmo = find_confusion_matrix(y_test,labels)
acc1 = accuracy(cmo)

print cmo

print acc1

[[762, 0], [61, 549]]
0.955539358601


In [16]:
classifier_predict2 =[]
ntheta2 = decent(Y,X)
for i in range(len(Y)):
    classifier_predict2.append(classify(ntheta2,X[i]))
#print classifier_predict

cmoo = confusionmatrix(Y,classifier_predict2)

#cmo = find_confusion_matrix(y_test,labels)
acc10 = accuracy(cmoo)
prec1 = precision(cmoo)
rec1 = recall(cmoo)
fmeasure1 = FM(prec1,rec1)

print cmoo

print acc10
print prec1
print rec1
print fmeasure1

[[762, 0], [61, 549]]
0.955539358601
[1.0, 1.0]
[1.0, 1.0]
[1.0, 1.0]


In [17]:
#Polynomial Features
poly = PolynomialFeatures(2)
newX =poly.fit_transform(X)

In [18]:
X_train, X_test, y_train, y_test = train_test_split(newX, Y, test_size=0.33, random_state=42)

newlabels = classifyAll(y_train,X_train,X_test)

cmn = confusionmatrix(y_test,newlabels)


acc2 = accuracy(cmn)
prec2 = precision(cmn)
rec2= recall(cmn)
fmeasure2 = FM(prec2,rec2)

print cmn
print acc2
print prec2
print rec2
print fmeasure2

[0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 1, 0, 1, 1, 1, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 1, 0, 0, 0, 1, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 1, 1, 0, 0, 1, 0, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1, 0, 0, 1, 1, 1, 0, 1, 0, 1, 0, 1, 0, 0, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 1, 0, 1, 1, 1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 1, 0, 1, 1, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 

In [19]:
kf = KFold(len(Y), n_folds=10, shuffle = False)

In [20]:
for train_index, test_index in kf:
    X_train, X_test = X[train_index], X[test_index]
    Y_train, Y_test = Y[train_index], Y[test_index]
    
    labels = classifyAll(Y_train,X_train,X_test)

    cm = confusionmatrix(Y_test,labels)


    acc = accuracy(cm)
    prec = precision(cm)
    rec = recall(cm)
    fmeasure = FM(prec,rec)

    print cm
    print acc
    print prec
    print rec
    print fmeasure

[0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 1, 1, 0, 1, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0]
[[79, 0], [6, 53]]
0.95652173913
[1.0, 1.0]
[1.0, 1.0]
[1.0, 1.0]
[1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 0, 1, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0]
[[78, 0], [4, 56]]
0.971014492754
[1.0, 1.0]
[1.0, 1.0]
[1.0, 1.0]
[1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 