# Assignment 1

In [70]:
import numpy as np
import time
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split

iris = load_iris()
ion_X = np.genfromtxt("ionosphere.txt", delimiter=",", usecols=np.arange(34))
ion_y = np.genfromtxt("ionosphere.txt", delimiter=",", usecols=34, dtype="int")

iris_X_train, iris_X_test, iris_y_train, iris_y_test = train_test_split(iris['data'], iris['target'], random_state=2408)
ion_X_train, ion_X_test, ion_y_train, ion_y_test = train_test_split(ion_X, ion_y, random_state=2408)


def computeEuclideanNorm(vector: np.ndarray) -> float:
    """Computes the Euclidean norm of a vector by adding the squares of each value and square rooting"""

    sum = 0
    for i in range(0,vector.size):
        sum += np.power(vector[i], 2)
    
    return np.sqrt(sum)


def calculateEuclideanDistance(v1: np.ndarray, v2: np.ndarray) -> float: 
    """Calculates the Euclidean distance between two points by computing the Euclidean norm of the vector distance"""
    diff = np.subtract(v1, v2)
    return (computeEuclideanNorm(diff))


def computeDistances(sample: np.ndarray, training_set: np.ndarray) -> np.ndarray:
    """Calculates the distances from the given sample to all other points"""    
    result = np.zeros(len(training_set))
    for i in range(0, len(training_set)):
        result[i] = calculateEuclideanDistance(sample, training_set[i])

    return result


def computeMinimum(a: np.ndarray):
    """Calculates the minimum value of an array and returns it, along with its index"""

    current_min = np.inf
    min_index = np.inf
    for n in range(a.size):
        if current_min > a[n]:
            current_min = a[n]
            min_index = n
    
    return current_min, min_index


def calculateNNs(sample_X: np.ndarray, sample_y: np.ndarray, X_training_set: np.ndarray, y_training_set: np.ndarray):
    nn_dist_same = np.inf
    nn_index_same = np.inf
    nn_dist_diff = np.inf
    nn_index_diff = np.inf

    for i in range(0,len(X_training_set)):
        if y_training_set[i] == sample_y:
            d = calculateEuclideanDistance(sample_X, X_training_set[i])
            if d < nn_dist_same:
                nn_dist_same = d
                nn_index_same = i
        else:
            d = calculateEuclideanDistance(sample_X, X_training_set[i])
            if d < nn_dist_diff:
                nn_dist_diff = d
                nn_index_diff = i

    # print("Nearest (same):", X_training_set[nn_index_same], "Class:", y_training_set[nn_index_same], "Distance:", nn_dist_same)
    # print("Nearest (diff):", X_training_set[nn_index_diff], "Class:", y_training_set[nn_index_diff], "Distance:", nn_dist_diff)

    return nn_dist_same, nn_index_same, nn_dist_diff, nn_index_diff


def calculateConformityScores(sample_X: np.ndarray, sample_y: np.ndarray, X_training_set, y_training_set):
    """Calculates the conformity score of a sample, using the formula"""
    X_aug = np.concatenate((sample_X.reshape(1, -1), X_training_set), axis=0)
    y_aug = np.concatenate(([sample_y], y_training_set)) # additional sample will be the FIRST in augmented set
    scores = np.zeros(len(X_aug))

    for i in range(0, len(y_aug)):
        X_new = np.delete(X_aug, i, axis=0)
        y_new = np.delete(y_aug, i)
        nn_dist_same, _, nn_dist_diff, _ = calculateNNs(X_aug[i], y_aug[i], X_new, y_new)
        conformity_score = 0
        if nn_dist_same == 0:
            if nn_dist_diff == 0:
                conformity_score = 0
            else:
                conformity_score = np.inf
        else:
            conformity_score =  nn_dist_diff / nn_dist_same

        scores[i] = conformity_score

    return scores


def calculatePValue(scores):
    test_score = scores[0]
    other_scores = scores[1:]

    rank = 0
    for score in other_scores:
        if score <= test_score:
            rank += 1

    p_value = (rank + 1) / len(scores)
    return p_value



start = time.time()

train_X = np.array([[0,3], [2,2],[3,3],[-1,1],[-1,-1],[0,1]])
train_y = np.array([1,1,1,-1,-1,-1])
test_X = np.array([0,0])
test_y = np.array(1)



for i in range(0, len(ion_X_test)):
    print('='*70)
    print("Testing sample:", ion_X_test[i],"Class:", ion_y_test[i])
    print('='*70)
    scores = calculateConformityScores(ion_X_test[i], ion_y_test[i], ion_X_train, ion_y_train)
    print("P-value:", calculatePValue(scores))


print("\nCompleted in", time.time() - start, "seconds")

Testing sample: [ 0.     0.    -1.    -1.     0.     0.    -1.     1.     1.    -0.375
  0.     0.     0.     0.     0.     0.     1.    -1.    -1.    -1.
  1.    -1.     0.     0.     1.    -1.    -1.     1.    -1.    -1.
  0.     0.    -1.     1.   ] Class: -1
P-value: 0.25757575757575757
Testing sample: [ 1.       0.       1.       0.09802  1.       0.25101  0.9839   0.33044
  0.80365  0.5302   0.74977  0.60297  0.56937  0.71942  0.55311  0.74079
  0.29452  0.82193  0.21137  0.79777  0.09709  0.82162 -0.01734  0.7987
 -0.15144  0.75596 -0.22839  0.69187 -0.31713  0.60948 -0.40291  0.54522
 -0.42815  0.44534] Class: 1
P-value: 0.7916666666666666
Testing sample: [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 1. 0. 0. 0. 0. 0. 0. 0.] Class: -1
P-value: 0.42424242424242425
Testing sample: [ 1.       0.       1.      -0.18829  0.93035 -0.36156 -0.10868 -0.93597
  1.      -0.04549  0.50874 -0.67743  0.34432 -0.69707 -0.51685 -0.97515
  0.05499 -0.62237  0.

KeyboardInterrupt: 