In [45]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

In [46]:
import os

path = os.listdir('./Kaggle/')
classes = {'yes':1, 'no':0}


In [47]:
def pca_manually(img):
    M = np.mean(img.T, axis=1)

    # Sustracting the mean columnwise
    C = img - M

    # Calculating the covariance matrix
    V = np.cov(C.T)

    # Computing the eigenvalues and eigenvectors of covarince matrix
    values, vectors = np.linalg.eig(V)

    p = np.size(vectors, axis =1)

    # Sorting the eigen values in ascending order
    idx = np.argsort(values)
    idx = idx[::-1]

    # Sorting eigen vectors
    vectors = vectors[:,idx]
    values = values[idx]

    # PCs used for reconstruction (can be varied)
    num_PC = 55

    # Cutting the PCs
    if num_PC <p or num_PC >0:
        vectors = vectors[:, range(num_PC)]

    # Reconstructing the image with PCs
    score = np.dot(vectors.T, C)
    constructed_img = np.dot(vectors, score) + M
    constructed_img = np.uint8(np.absolute(constructed_img))
    return constructed_img

In [48]:
import cv2
X = []
Y = []
for cls in classes:
    pth = './Kaggle/'+cls
    for j in os.listdir(pth):
        img = cv2.imread(pth+'/' + j)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
        img = cv2.resize(img, (64,64))
        img_pca = pca_manually(img)
        X.append(img_pca)
        Y.append(classes[cls])
        

In [49]:
np.unique(Y)

array([0, 1])

In [50]:
X = np.array(X)
Y = np.array(Y)

In [51]:
pd.Series(Y).value_counts()

1    1500
0    1500
dtype: int64

In [52]:
X_updated = X.reshape(len(X),-1)
X_updated.shape

(3000, 4096)

In [53]:
xtrain, xtest, ytrain, ytest = train_test_split(X_updated, Y, random_state=10, test_size=0.20)

In [54]:
xtrain.shape, xtest.shape

((2400, 4096), (600, 4096))

In [55]:
print(xtrain.max(), xtrain.min())
print(xtest.max(), xtest.min())
xtrain =xtrain/255
xtest = xtest/255
print(xtrain.max(), xtrain.min())
print(xtest.max(), xtest.min())

255 0
255 0
1.0 0.0
1.0 0.0


In [56]:
from sklearn.svm import SVC

from sklearn import metrics

In [57]:
sv =  SVC(C=9.0,kernel='rbf')
sv.fit(xtrain, ytrain)

SVC(C=9.0)

In [58]:
print("Training Score:", sv.score(xtrain, ytrain))
print("Testing Score:", sv.score(xtest, ytest))
print('Accuracy',metrics.accuracy_score(ytest,sv.predict(xtest)))
print('Precision',metrics.precision_score(ytest,sv.predict(xtest)))
print('Recall',metrics.recall_score(ytest,sv.predict(xtest)))

Training Score: 1.0
Testing Score: 0.975
Accuracy 0.975
Precision 0.9749216300940439
Recall 0.9779874213836478


In [59]:
import joblib
joblib.dump(sv,'svm_pca.pkl')

['svm_pca.pkl']

In [30]:
sv =  SVC(C=9.0,kernel='linear')
sv.fit(xtrain, ytrain)

SVC(C=9.0, kernel='linear')

In [34]:
print("Training Score:", sv.score(xtrain, ytrain))
print("Testing Score:", sv.score(xtest, ytest))
print('Accuracy',metrics.accuracy_score(ytest,sv.predict(xtest)))
print('Precision',metrics.precision_score(ytest,sv.predict(xtest)))
print('Recall',metrics.recall_score(ytest,sv.predict(xtest)))

Training Score: 1.0
Testing Score: 0.9422222222222222
Accuracy 0.9422222222222222
Precision 0.9701149425287356
Recall 0.9154013015184381


In [35]:
sv =  SVC(C=9.0,kernel='poly')
sv.fit(xtrain, ytrain)

SVC(C=9.0, kernel='poly')

In [36]:
print("Training Score:", sv.score(xtrain, ytrain))
print("Testing Score:", sv.score(xtest, ytest))
print('Accuracy',metrics.accuracy_score(ytest,sv.predict(xtest)))
print('Precision',metrics.precision_score(ytest,sv.predict(xtest)))
print('Recall',metrics.recall_score(ytest,sv.predict(xtest)))

Training Score: 0.9995238095238095
Testing Score: 0.9266666666666666
Accuracy 0.9266666666666666
Precision 0.9625292740046838
Recall 0.8915401301518439
