In [None]:
import numpy as np
from scipy.stats import norm
from scipy.stats import bootstrap
from numpy.random import multivariate_normal as mvn
import pandas as pd
import json


In [99]:
class tagClass:
    """Class to store each tag coordinate data."""
    def __init__(self, data: dict, tClass):
        self.x = data.get("x")
        self.y = data.get("y")
        self.fn = data.get("fn").removeprefix("40data/")
        self.label = data.get("label")
        self.tagX, self.tagX2, self.tagY, self.tagY2 = self.getSnip()
        self.tagClass = tClass
    
    # Get functions
    def getX(self):
        return self.x
    def getY(self):
        return self.y
    def getFn(self):
        return self.fn
    def getLabel(self):
        return self.label
    def getFileClass(self):
        return tagClass
    
    # Returns coordinates for 16 x 16 crop around tag centre
    def getSnip(self):
        s = 16
        newX = self.x-(s/2)
        if newX%2 != 0:
            newX -= 1
        newY = self.y-(s/2)
        if newY%2 != 0:
            newY -= 1
        return int(newX), int(newX+s), int(newY), int(newY+s)
    
    def getSnipCoords(self):
        return self.tagX, self.tagX2, self.tagY, self.tagY2

In [101]:
def getTags(rawData, nType):
    """Process raw data into objects of Tag class."""
    listData = []
    for n in nType:
        for i in rawData[n]:
            i = dict(i)
            d = tagClass(i, n)
            listData.append(d)
    return listData

def getPhoto(tag: tagClass):
    """Returns the image this tag is from."""
    filename = "leon_bee_photos_3rdMarch2023/cam5/"+tag.getFn()
    file = np.load(filename, allow_pickle=True)
    photo = file['img']
    return photo

def getSnipPlot(tag):
    """Return a 16 x 16 pixel crop in the image for this tag."""
    tagX, tagX2, tagY, tagY2 = tag.getSnipCoords()
    filename = "leon_bee_photos_3rdMarch2023/cam5/"+tag.getFn()
    file = np.load(filename, allow_pickle=True)
    photo = file['img']
    return photo[tagY:tagY2,tagX:tagX2].astype(np.float32)

def getBayer(x, y):
    """Find Bayer filter pixel colour for given coordinate."""
    if x%2 == 0:
        if y%2 == 0:
            return "R" #RGGB
        else:
            return "G" #GBRG
    else:
        if y%2 == 0:
            return "G" #GRBG
        else:
            return "B" #BGGR
        
def getPixels(t: tuple, photo):
    """Returns RGB values for this tag as ratio."""
    tagX, tagX2, tagY, tagY2 = t
    red = 0
    green = 0
    blue = 0
    for px in range(tagX, tagX2):
        for py in range(tagY, tagY2):
            col = getBayer(py, px)
            if col == 'R':
                red += int(photo[py, px])
            elif col == 'G':
                green += int(photo[py, px])
            else:
                blue += int(photo[py, px])
    totalSum = red + (0.5*green) + blue
    return red/totalSum, (0.5*green)/totalSum, blue/totalSum


In [104]:
raw20 = json.load(open("leon_bee_photos_3rdMarch2023/bee_track40_20m.json"))
tags20 = getTags(raw20['0'], ['545', '547', '549', '551', '553', '557', '559', '561', '563', '565', '567', '569', '571', '573', '575', '577', '579', '581', '583', '585', '587', '591', '593', '595', '599', '601', '605', '607', '609', '611', '615', '617', '619', '621', '623', '625', '627', '629', '631', '645'])
allTags20 = pd.DataFrame(columns=["Label", "Red", "Green", "Blue", "Tag"])
for tag in tags20:
    # Get data for each tag in data
    photo = getPhoto(tag)
    redVal, greenVal, blueVal = getPixels(tag.getSnipCoords(), photo)
    allTags20.loc[len(allTags20.index)] = [int(tag.getLabel().removeprefix("gridTag")), redVal, greenVal, blueVal, tag]


In [221]:
import pickle
e = open('entropy', 'rb')
    
# source, destination
entropy = pickle.load(e)     
e.close()

r_s = open('rsamples', 'rb')
r_samples = pickle.load(r_s)
r_s.close()

g_s = open('gsamples', 'rb')
g_samples = pickle.load(g_s)
g_s.close()

b_s = open('bsamples', 'rb')
b_samples = pickle.load(b_s)
b_s.close()

dbfile=open('mean_full_tags', 'rb')
tagsX = pickle.load(dbfile)

dbfile2=open('hess_full_tags', 'rb')
tagsHessInv = pickle.load(dbfile2)

In [89]:
def calculate_mean_hess(tags, hess):
    covs = []
    means = []

    num_parts=4
    p=10
    start = 1
    end = p

    for tag in range(40):
        for part in range(num_parts): 
            # Inverse Hessian can be used as covariance of the Gaussian
            covA = np.linalg.inv(hess[tag][part*p][2:5,2:5])
            # Maximum-likelihood estimation gives the mean of the Gaussian
            meanA = tags[tag][part*p][2:5]

            for i in range(start + (part*p), end + (part*p)):
                covB = np.linalg.inv(hess[tag][i][2:5,2:5])
                meanB = tags[tag][i][2:5]

                covA = np.linalg.inv(np.linalg.inv(covA) + np.linalg.inv(covB))
                meanA = covA @ ((np.linalg.inv(covA) @ meanA + np.linalg.inv(covB) @ meanB))

            covs.append(covA)
            means.append(meanA)

    return means, covs

means, covs = calculate_mean_hess(tagsX, tagsHessInv)

In [None]:
samples=[]
for i in range(len(r_samples)):

    sample = [r_samples[i], g_samples[i], b_samples[i]]
    samples.append(sample)

(array([    3,     8,    13, ..., 99992, 99998, 99999]),)

In [None]:
def bootstraping(sample):
    data = (sample,)
    res= bootstrap(data, np.mean, confidence_level=0.9, n_resamples=1000)
    return res 

In [215]:
labels = []
[labels.extend(np.ones(4)*i) for i in range(40)]
labels = np.array(labels)

y_test = np.array(labels[::4])
X_test = np.array(samples[::4])

y_train = np.concatenate((labels[1::4], labels[2::4], labels[3::4]))
X_train = np.concatenate((samples[1::4], samples[2::4], samples[3::4]))

In [216]:
n_samples_train = X_train.shape[0]
n_samples_test = X_test.shape[0]

X_train_reshaped = X_train.reshape(n_samples_train, -1)
X_test_reshaped = X_test.reshape(n_samples_test, -1)

In [217]:
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score, mean_squared_error
def scores(modelPred, actual, name):
    """Calculate predicting scores based on specific metrics."""
    print(name)
    a = accuracy_score(actual, modelPred)
    print("Accuracy: ", a)
    print("F1: ", f1_score(actual, modelPred, average='macro'))
    print("Precision: ", precision_score(actual, modelPred, average='macro', zero_division=np.nan))
    print("Recall: ", recall_score(actual, modelPred, average='macro'))
    print("MSE: ", mean_squared_error(actual, modelPred), "\n")
    return a

In [220]:
from sklearn.neighbors import KNeighborsClassifier

for i in range(1,8):
    knn = KNeighborsClassifier(n_neighbors=i)
    knn.fit(X_train_reshaped, y_train)
    y_pred = knn.predict(X_test_reshaped)
    scores(y_pred, y_test, f"KNN - {i}N")

KNN - 1N
Accuracy:  0.825
F1:  0.7666666666666666
Precision:  0.8939393939393939
Recall:  0.825
MSE:  55.675 

KNN - 2N
Accuracy:  0.725
F1:  0.6375
Precision:  0.82183908045977
Recall:  0.725
MSE:  67.925 

KNN - 3N
Accuracy:  0.75
F1:  0.6708333333333333
Precision:  0.8444444444444444
Recall:  0.75
MSE:  66.1 

KNN - 4N
Accuracy:  0.7
F1:  0.6125
Precision:  0.7873563218390806
Recall:  0.7
MSE:  88.625 

KNN - 5N
Accuracy:  0.675
F1:  0.5791666666666666
Precision:  0.761904761904762
Recall:  0.675
MSE:  59.2 

KNN - 6N
Accuracy:  0.625
F1:  0.5166666666666666
Precision:  0.7179487179487178
Recall:  0.625
MSE:  112.075 

KNN - 7N
Accuracy:  0.6
F1:  0.5
Precision:  0.6987179487179487
Recall:  0.6
MSE:  98.175 

