# Assignment 1

In [46]:
import numpy as np
import time
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split

iris = load_iris()
ion_X = np.genfromtxt("ionosphere.txt", delimiter=",", usecols=np.arange(34))
ion_y = np.genfromtxt("ionosphere.txt", delimiter=",", usecols=34, dtype="int")

iris_X_train, iris_X_test, iris_y_train, iris_y_test = train_test_split(iris['data'], iris['target'], random_state=2408)
ion_X_train, ion_X_test, ion_y_train, ion_y_test = train_test_split(ion_X, ion_y, random_state=2408)


def computeEuclideanNorm(vector: np.ndarray) -> float:
    """Computes the Euclidean norm of a vector by adding the squares of each value and square rooting"""

    sum = 0
    for i in range(0,vector.size):
        sum += np.power(vector[i], 2)
    
    return np.sqrt(sum)

def computeMinimum(a: np.ndarray):
    """Calculates the minimum value of an array and returns it, along with its index"""

    current_min = np.inf
    min_index = np.inf
    for n in range(a.size):
        if current_min > a[n]:
            current_min = a[n]
            min_index = n
    
    return current_min, min_index

def calculateNNs(sample_X: np.ndarray, sample_y: np.ndarray, X_training_set: np.ndarray, y_training_set: np.ndarray):
    nn_dist_same = np.inf
    nn_index_same = np.inf
    nn_dist_diff = np.inf
    nn_index_diff = np.inf

    for i in range(0,len(X_training_set)):
        if y_training_set[i] == sample_y:
            d = calculateEuclideanDistance(sample_X, X_training_set[i])
            if d < nn_dist_same:
                nn_dist_same = d
                nn_index_same = i
        else:
            d = calculateEuclideanDistance(sample_X, X_training_set[i])
            if d < nn_dist_diff:
                nn_dist_diff = d
                nn_index_diff = i

    # print("Nearest (same):", X_training_set[nn_index_same], "Class:", y_training_set[nn_index_same], "Distance:", nn_dist_same)
    # print("Nearest (diff):", X_training_set[nn_index_diff], "Class:", y_training_set[nn_index_diff], "Distance:", nn_dist_diff)

    return nn_dist_same, nn_index_same, nn_dist_diff, nn_index_diff

def calculateEuclideanDistance(v1: np.ndarray, v2: np.ndarray) -> float: 
    """Calculates the Euclidean distance between two points by computing the Euclidean norm of the vector distance"""
    diff = np.subtract(v1, v2)
    return (computeEuclideanNorm(diff))

def computeDistances(sample: np.ndarray, training_set: np.ndarray) -> np.ndarray:
    """Calculates the distances from the given sample to all other points"""    
    result = np.zeros(len(training_set))
    for i in range(0, len(training_set)):
        result[i] = calculateEuclideanDistance(sample, training_set[i])

    return result

def calculateConformityScores(sample_X: np.ndarray, sample_y: np.ndarray, X_training_set, y_training_set):
    """Calculates the conformity score of a sample, using the formula"""
    X_aug = np.concatenate((sample_X.reshape(1, -1), X_training_set), axis=0)
    y_aug = np.concatenate(([sample_y], y_training_set)) # additional sample will be the FIRST in augmented set
    scores = np.zeros(len(X_aug))

    for i in range(0, len(y_aug)):
        X_new = np.delete(X_aug, i, axis=0)
        y_new = np.delete(y_aug, i)
        nn_dist_same, _, nn_dist_diff, _ = calculateNNs(X_aug[i], y_aug[i], X_new, y_new)
        conformity_score = 0
        if nn_dist_same == 0:
            if nn_dist_diff == 0:
                conformity_score = 0
            conformity_score = np.inf
        else:
            conformity_score =  nn_dist_diff / nn_dist_same

        scores[i] = conformity_score

    return scores

def calculatePValue(scores):
    test_score = scores[0]
    other_scores = scores[0:]

    rank = 0
    for score in other_scores:
        if score <= test_score:
            rank += 1

    p_value = rank / len(scores)
    return p_value

def calculateNearestNeighbour(sample: np.ndarray, X_training_set: np.ndarray, y_training_set: np.ndarray):
    """Calculates the nearest neighbour"""

    d, i = computeMinimum(computeDistances(sample, X_training_set))
    print("Nearest sample:", X_training_set[i],"Class:", y_training_set[i], "\nDistance:", d)
    return d, i

start = time.time()

for i in range(0,len(iris_X_test)):
    print('='*70)
    print("Testing sample:", iris_X_test[i],"Class:", iris_y_test[i])
    print('='*70)
    scores = calculateConformityScores(iris_X_test[i], iris_y_test[i], iris_X_train, iris_y_train)
    print("Conformity Scores:", scores)
    print(calculatePValue(scores))


print("\nCompleted in", time.time() - start, "seconds")

Testing sample: [4.4 2.9 1.4 0.2] Class: 0
Conformity Scores: [15.41103501  7.10130773  4.50924975  0.70710678  5.47722558  5.49331144
  3.0394235   4.41588043 23.4520788   1.60727513  7.5854111  10.30372101
  2.64575131 23.13006701  1.14017543  5.09901951  2.67101635  5.04267503
  6.81693393  1.68325082  3.45669176 21.30727575  2.88675135  8.06225775
 22.18107301  3.7859389   9.5289034   3.51188458  1.23091491  5.19615242
  3.24037035  7.67112468  0.52704628  9.88264472  7.95822426  1.8973666
 15.18222645 16.20185175  1.26929552  0.80659929  1.96638416 20.90454496
  1.94935887  2.17944947 11.40175425  5.38516481  2.5819889   6.5345237
 14.19506957  2.56347978  2.47991935  4.58257569 16.34013464  2.9519969
  3.21455025 14.50861813  6.33333333 15.77973384  1.53896753  5.35652011
  1.47196014  6.8556546   2.26384628  8.30662386  7.78620575  7.74596669
  1.60727513  2.7202941   2.26778684  5.4379618   3.49284984  3.
  2.92326094  1.84390889  2.86744176  1.69774938  0.96362411  1.51657509
