# Exercise 4
### Anton Wiehe & Angelie Kraftelie

In [3]:
from PIL import Image
import numpy as np
import matplotlib.pyplot as plt
from sklearn import svm

# Test image
im_frame = Image.open("positives/" + 'p01.png')
np_frame = np.array(im_frame.getdata())

# Load images (30, 573, 3) == (num_images, num_pixels, rgb)
n = 30
positives = np.zeros((n, 576, 3))
for i in range(n):
    im_frame = Image.open("positives/" + 'p' + ('0' if (i+1) < 10 else '') + str(i + 1) + '.png')
    positives[i] = np.array(im_frame.getdata())
    
negatives = np.zeros((n, 576, 3))
for i in range(n):
    im_frame = Image.open("negatives/" + 'n' + ('0' if (i+1) < 10 else '') + str(i + 1) + '.png')
    negatives[i] = np.array(im_frame.getdata())

In [4]:
red_pos = positives[:, :, 0]
green_pos = positives[:, :, 1]
blue_pos = positives[:, :, 2]

red_mean_pos = np.mean(red_pos, axis=1)
blue_mean_pos = np.mean(green_pos, axis=1)
green_mean_pos = np.mean(blue_pos, axis=1)

In [5]:
red_neg = negatives[:, :, 0]
green_neg = negatives[:, :, 1]
blue_neg = negatives[:, :, 2]

red_mean_neg = np.mean(red_neg, axis=1)
blue_mean_neg = np.mean(green_neg, axis=1)
green_mean_neg = np.mean(blue_neg, axis=1)

In [6]:
red_std_pos = np.std(red_pos, axis=1)
blue_std_pos = np.std(green_pos, axis=1)
green_std_pos = np.std(blue_pos, axis=1)

In [7]:
red_std_neg = np.std(red_neg, axis=1)
blue_std_neg = np.std(green_neg, axis=1)
green_std_neg = np.std(blue_neg, axis=1)


max_five_pixels_red_pos = np.mean(np.sort(red_pos, axis=1)[:, :5], axis=1)
max_five_pixels_blue_pos = np.mean(np.sort(blue_pos, axis=1)[:, :5], axis=1)
max_five_pixels_green_pos = np.mean(np.sort(green_pos, axis=1)[:, :5], axis=1)

max_five_pixels_red_neg = np.mean(np.sort(red_neg, axis=1)[:, :5], axis=1)
max_five_pixels_blue_neg = np.mean(np.sort(blue_neg, axis=1)[:, :5], axis=1)
max_five_pixels_green_neg = np.mean(np.sort(green_neg, axis=1)[:, :5], axis=1)

In [8]:
pos_features = np.concatenate([np.reshape(red_std_pos, (n, 1)),
                              np.reshape(blue_std_pos, (n, 1)), np.reshape(green_std_pos, (n, 1)), 
                              np.reshape(max_five_pixels_red_pos, (n, 1)), np.reshape(max_five_pixels_blue_pos, (n, 1))
                            , np.reshape(max_five_pixels_green_pos, (n, 1))], axis=1)
neg_features = np.concatenate([np.reshape(red_std_neg, (n, 1)),
                              np.reshape(blue_std_neg, (n, 1)), np.reshape(green_std_neg, (n, 1)), 
                              np.reshape(max_five_pixels_red_neg, (n, 1)), np.reshape(max_five_pixels_blue_neg, (n, 1))
                            , np.reshape(max_five_pixels_green_neg, (n, 1))], axis=1)
num_features = np.shape(pos_features)[1]

In [9]:
def split_test_train(features_0, features_1):
    test_split_percentage = 0.05
    number_of_tests = int(n * 2 * 0.1)

    test_idxs = np.random.randint(0, n * 2, number_of_tests)
    both = np.concatenate((features_0, features_1), axis=0)
    test_features = both[test_idxs,:]
    test_labels = (test_idxs >= 30)

    #print("Test idxs: ", test_idxs)
    idxs_0 = test_idxs[test_idxs < n]
    idxs_1 = test_idxs[test_idxs >= n] - n

    features_0 = np.delete(features_0, idxs_0, axis=0)
    features_1 = np.delete(features_1, idxs_1, axis=0)
    
    return features_0, features_1, test_features, test_labels

In [10]:
# Shuffle two lists in same order
def shuffle(a,b):
    assert len(a) == len(b)
    idxs = np.arange(0, len(a), 1)
    np.random.shuffle(idxs)
    return a[idxs], b[idxs]

In [11]:
def k_fold(features_0, features_1, k=10):
    num_features = len(features_0[0])
    split_percentage = 1 / k
    samples_per_split = (len(features_0) + len(features_1)) // k
    both_features = np.concatenate((features_0, features_1), axis=0)
    both_labels = np.concatenate((np.zeros(len(features_0)), np.ones(len(features_1))), axis=0)
    splits = np.zeros((k, samples_per_split, num_features))
    splits_labels = np.zeros((k, samples_per_split))
    for i in range(k):
        split_idxs = np.random.randint(0, len(both_features), samples_per_split)
        split_labels = both_labels[split_idxs]
        split = both_features[split_idxs]

        splits[i] = split
        splits_labels[i] = split_labels

        both_labels = np.delete(both_labels, split_idxs, axis=0)
        both_features = np.delete(both_features, split_idxs, axis=0)

    return splits, splits_labels
    

In [12]:
def evaluate_k_fold(splits, split_labels):
    train_accuracies = np.zeros(len(splits))
    test_accuracies = np.zeros(len(splits))
    
    train_accuracies = []
    test_accuracies = []
    for i in range(len(splits)):
        features = np.concatenate((splits[0:i], splits[i + 1:]), axis=0)
        labels = np.concatenate((split_labels[0:i], split_labels[i + 1:]), axis=0)
        test_features = splits[i]
        test_labels = split_labels[i]
        features_0 = features[labels == 0]
        features_1 = features[labels == 1]
        
        #print(np.shape(splits))
        #print(np.shape(split_labels))
        features = np.reshape(features, ((len(features) * len(features[0]), len(features[0][0]))))
        labels = np.reshape(labels, (len(labels) * len(labels[0])))

        #print(test_labels)
        test_accuracy = train_and_test(features_0, features_1, test_features, test_labels)
        test_accuracies.append(test_accuracy)
        train_accuracy = train_and_test(features_0, features_1, features, labels)
        train_accuracies.append(train_accuracy)
        
        #print("Split ", i ,)
        #print(" Test Accuracy: ", test_accuracy)
        #print(" Train Accuracy: ", train_accuracy)
        #print()
    
    #print("train acc: ",train_accuracies)
    #print("test acc: ", test_accuracies)
    return np.mean(train_accuracies), np.mean(test_accuracies)
        

In [70]:
def train_and_test(features_0, features_1, test_features, test_labels):
    clf = svm.SVC(kernel="sigmoid", C=5, gamma=0.001)
    
    X = np.concatenate((features_0, features_1))
    y = np.concatenate((np.zeros(len(features_0)), np.ones(len(features_1))))
    
    #mean = np.mean(X)
    #std = np.std(X)
    #X = (X - mean) / std
    clf.fit(X, y) 
        
    predictions = clf.predict(test_features)
    #print("Pred: ", predictions)
    #print("labels:", test_labels)
    #print(predictions == test_labels)
    #print()
    n_correct = sum(predictions == test_labels)
  
    return n_correct / len(test_features) #, clf.support_, clf.predict(X)
    

In [71]:
#folds, labels = k_fold(pos_features, neg_features, 1)
#folds
#train, test = evaluate_k_fold(folds, labels)
#print("Overall train - test accuracy: ", train, test)
#train_and_test(pos_features, neg_features, np.concatenate((pos_features, neg_features)), np.concatenate((np.zeros(30), np.ones(30))))
splits, labels = k_fold(pos_features, neg_features, 10)

evaluate_k_fold(splits, labels)

(0.5203703703703704, 0.4499999999999999)

rbf: 0.983
    sigmoid: 0.0
        poly: 0.883
            linear: 0.983