In [None]:
import numpy as np
import pandas as pd

train_labels = np.load('preprocessed_files/train_labels.npy')
test_labels = np.load('preprocessed_files/test_labels.npy')

train_images_untouched = pd.read_pickle('preprocessed_files/train_images_untouched.pkl')
test_images_untouched = pd.read_pickle('preprocessed_files/test_images_untouched.pkl')

train_images_touched = pd.read_pickle('preprocessed_files/train_images_touched.pkl')
test_images_touched = pd.read_pickle('preprocessed_files/test_images_touched.pkl')

In [None]:
def get_probs_labelProbs(df):
    dfs = {}
    label_probs = []
    for i in range(10):
            dfs["df{0}".format(i)] = df[train_labels == i]
            label_probs.append(len(df[train_labels == i]) / len(df))
    probs = []
       
    for key in dfs:
        pixels = list(dfs[key])
        pixel_probs = []
        for pixel in pixels:
            pixel_probs.append(len(dfs[key][dfs[key][pixel] == 255.0]) / len(dfs[key]))
        probs.append(pixel_probs)
    return probs, label_probs

In [None]:
touched_probs, touched_label_probs = get_probs_labelProbs(train_images_touched)
untouched_probs, untouched_label_probs = get_probs_labelProbs(train_images_untouched)

In [None]:
def bernoulli(x, prob):
    if x == 255.0:
        return prob
    else:
        return 1-prob

In [None]:
def classifier(x, probs, label_probs):
    total_probabilities = np.zeros(10)
    for i in range(len(total_probabilities)):
        total_probabilities[i] += np.log(label_probs[i])
        for j in range(len(x)):
            b = bernoulli(x[j], probs[i][j])
            if b > 0:
                total_probabilities[i] += np.log(b)
    return np.argmax(total_probabilities)

In [None]:
train_untouched_predictions = []
for index, row in train_images_untouched.iterrows():
    train_untouched_predictions.append(classifier(row, untouched_probs, untouched_label_probs))
    if index % 1000 == 0:
            print(index)

In [None]:
train_touched_predictions = []
for index, row in train_images_touched.iterrows():
    train_touched_predictions.append(classifier(row, touched_probs, touched_label_probs))
    if index % 1000 == 0:
            print(index)

In [None]:
test_untouched_predictions = []
for index, row in test_images_untouched.iterrows():
    test_untouched_predictions.append(classifier(row, untouched_probs, untouched_label_probs))
    if index % 1000 == 0:
            print(index)

In [None]:
test_touched_predictions = []
for index, row in test_images_touched.iterrows():
    test_touched_predictions.append(classifier(row, touched_probs, touched_label_probs))
    if index % 1000 == 0:
            print(index)

In [None]:
untouched_right = 0
touched_right = 0
for i in range(len(train_touched_predictions)):
    if train_touched_predictions[i] == train_labels[i]:
        touched_right += 1
    
    if train_untouched_predictions[i] == train_labels[i]:
        untouched_right += 1

train_untouched_accuracy = untouched_right / len(untouched_predictions)
train_touched_accuracy = touched_right / len(touched_predictions)
print(train_untouched_accuracy)
print(train_touched_accuracy)

In [None]:
untouched_right = 0
touched_right = 0
for i in range(len(test_touched_predictions)):
    if test_touched_predictions[i] == test_labels[i]:
        touched_right += 1
    
    if test_untouched_predictions[i] == test_labels[i]:
        untouched_right += 1

test_untouched_accuracy = untouched_right / len(untouched_predictions)
test_touched_accuracy = touched_right / len(touched_predictions)
print(test_untouched_accuracy)
print(test_touched_accuracy)