# Implementing the K Nearest Neighbours algorithm for a general K.

In [560]:
import numpy as np
import math
import time
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split

def getDistances(X_test,X_train):
    distances = []
    """Function to calculate Euclidian distance between the test data point and training data point"""
    for j in range(X_test.shape[0]):
        for i in range(X_train.shape[0]):    
            distances.append(math.sqrt(sum(((X_test[j]-X_train[i])**2))))
    distances = np.array(distances).reshape(X_test.shape[0],X_train.shape[0])# each row stores disnces to corresponding test set.eg. row0, contains distance between test set1 and 112 training data
    return distances
def getNeighborsIndex(distances,X_test,X_train,num_neighbors=1):
    """Function to find the indexes of the neighbors""" 
    neighbors_idx = np.array([])
    for n in range(distances.shape[0]):    
        neighbors_idx = np.append(neighbors_idx , np.argpartition(distances[n], num_neighbors))
    neighbors_idx = neighbors_idx.reshape(X_test.shape[0],X_train.shape[0])#first 'num_neighbors' columns of the neighbors_idx contains the index of the nearest neighbors in training point
    kneighbors_idx = neighbors_idx[:,:num_neighbors].astype(int)
    return kneighbors_idx

def predictLabel(kneighbors_idx,num_neighbors=1):
    """Function to predict the labels"""
    yhat= np.array([])
    for m in range(kneighbors_idx.shape[0]):
        neighbors_label = []
        for p in range(num_neighbors):
            idx =  kneighbors_idx[m][p]#accessing neighbors index from kneighbors_idx array
            neighbors_label.append(y_train[idx])#accessing the label of the neighbors from the ytrain

        vote_dict = {q:neighbors_label.count(q) for q in neighbors_label}    
        yhat = np.append(yhat,max(vote_dict, key=vote_dict.get))
        yhat = yhat.astype(int)
    return yhat

X = np.genfromtxt("iris_data.txt")
y = np.genfromtxt("iris_target.txt")
k = int(input("Enter the value of number of neighbors: "))
X_train,X_test,y_train, y_test = train_test_split(X, y, random_state = 709)
distances = getDistances(X_test,X_train)
kneighbors_idx = getNeighborsIndex(distances,X_test,X_train, num_neighbors = k)
yhat=predictLabel(kneighbors_idx,num_neighbors = k)
print("Test Error rate is: ",np.mean(yhat!=y_test))       


Enter the value of number of neighbors: 3
Test Error rate is:  0.05263157894736842
Execution time:  4.343353748321533 seconds


# iris.txt and K = 1

In [551]:
import numpy as np
import math
import time
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split

def getDistances(X_test,X_train):
    distances = []
    """Function to calculate Euclidian distance between the test data point and training data point"""
    for j in range(X_test.shape[0]):
        for i in range(X_train.shape[0]):    
            distances.append(math.sqrt(sum(((X_test[j]-X_train[i])**2))))
    distances = np.array(distances).reshape(X_test.shape[0],X_train.shape[0])# each row stores disnces to corresponding test set.eg. row0, contains distance between test set1 and 112 training data
    return distances
def getNeighborsIndex(distances,X_test,X_train):
    """Function to find the indexes of the neighbors""" 
    neighbors_idx = np.array([])
    for n in range(distances.shape[0]):    
        neighbors_idx = np.append(neighbors_idx , np.argpartition(distances[n], num_neighbors))
    neighbors_idx = neighbors_idx.reshape(X_test.shape[0],X_train.shape[0])#first 'num_neighbors' columns of the neighbors_idx contains the index of the nearest neighbors in training point
    kneighbors_idx = neighbors_idx[:,:num_neighbors].astype(int)
    return kneighbors_idx

def predictLabel(kneighbors_idx,num_neighbors=1):
    """Function to predict the labels"""
    yhat= np.array([])
    for m in range(kneighbors_idx.shape[0]):
        neighbors_label = []
        for p in range(num_neighbors):
            idx =  kneighbors_idx[m][p]#accessing neighbors index from kneighbors_idx array
            neighbors_label.append(y_train[idx])#accessing the label of the neighbors from the ytrain

        vote_dict = {q:neighbors_label.count(q) for q in neighbors_label}    
        yhat = np.append(yhat,max(vote_dict, key=vote_dict.get))
        yhat = yhat.astype(int)
    return yhat

X = np.genfromtxt("iris_data.txt")
y = np.genfromtxt("iris_target.txt")
start = time.time()
k=1
X_train,X_test,y_train, y_test = train_test_split(X, y, random_state = 709)
distances = getDistances(X_test,X_train)
kneighbors_idx = getNeighborsIndex(distances,X_test,X_train)
yhat=predictLabel(kneighbors_idx,num_neighbors=k)
print("Test Error rate is: ",np.mean(yhat!=y_test))
print("Execution time: ", time.time()-start, "seconds")       
    

Test Error rate is:  0.07894736842105263
Execution time:  0.023937463760375977 seconds


# Validating iris.txt and K = 1

In [552]:
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.datasets import load_iris
import time
start = time.time()
k=1
X = np.genfromtxt("iris_data.txt")
y = np.genfromtxt("iris_target.txt")
X_train,X_test,y_train, y_test = train_test_split(X, y, random_state = 709)
knn = KNeighborsClassifier(n_neighbors = 1)
knn.fit(X_train, y_train)
knn.predict(X_test)
score = knn.score(X_test, y_test)
print("Test Error rate is : ",(1-score))
print("Execution time: ", time.time()-start, "seconds")

Test Error rate is :  0.07894736842105265
Execution time:  0.015957117080688477 seconds


# ionosphere.txt and K = 1

In [524]:
import numpy as np
import math
import time
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split

def getDistances(X_test,X_train):
    distances = []
    """Function to calculate Euclidian distance between the test data point and training data point"""
    for j in range(X_test.shape[0]):
        for i in range(X_train.shape[0]):    
            distances.append(math.sqrt(sum(((X_test[j]-X_train[i])**2))))
    distances = np.array(distances).reshape(X_test.shape[0],X_train.shape[0])# each row stores disnces to corresponding test set.eg. row0, contains distance between test set1 and 112 training data
    return distances
def getNeighborsIndex(distances,X_test,X_train):
    # finding nearest neighbors
    neighbors_idx = np.array([])
    for n in range(distances.shape[0]):    
        neighbors_idx = np.append(neighbors_idx , np.argpartition(distances[n], num_neighbors))
    neighbors_idx = neighbors_idx.reshape(X_test.shape[0],X_train.shape[0])#first 'num_neighbors' columns of the neighbors_idx contains the index of the nearest neighbors in training point
    kneighbors_idx = neighbors_idx[:,:num_neighbors].astype(int)
    return kneighbors_idx
    
def predictLabel(kneighbors_idx,num_neighbors=1):
    #prediction:
    yhat= np.array([])
    for m in range(kneighbors_idx.shape[0]):
        neighbors_label = []
        for p in range(num_neighbors):
            idx =  kneighbors_idx[m][p]#accessing neighbors index from kneighbors_idx array
            neighbors_label.append(y_train[idx])#accessing the label of the neighbors from the ytrain

        vote_dict = {q:neighbors_label.count(q) for q in neighbors_label}    
        yhat = np.append(yhat,max(vote_dict, key=vote_dict.get))
        yhat = yhat.astype(int)
    return yhat

start = time.time()
X = np.genfromtxt("ionosphere.txt", delimiter = ",", usecols = np.arange(34))
y = np.genfromtxt("ionosphere.txt", delimiter = ",", usecols = 34, dtype = int)
X_train,X_test,y_train, y_test = train_test_split(X, y, random_state = 709)
distances = getDistances(X_test,X_train)
kneighbors_idx = getNeighborsIndex(distances,X_test,X_train)
yhat = predictLabel(kneighbors_idx,num_neighbors=1)
print("Test Error rate is: ",np.mean(yhat!=y_test))
print("Execution time: ", time.time()-start, "seconds")
        
    

0.8977272727272727
0.3924741744995117


# Validating ionosphere.txt and K = 1

In [547]:
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.datasets import load_iris
import time
start = time.time()
X = np.genfromtxt("ionosphere.txt", delimiter = ",", usecols = np.arange(34))
y = np.genfromtxt("ionosphere.txt", delimiter = ",", usecols = 34, dtype = int)
knn = KNeighborsClassifier(n_neighbors = 1)
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state = 709)
knn.fit(X_train, y_train)
knn.predict(X_test)
print("Test Error rate is : ",(1-score))
print("Execution time: ", time.time()-start, "seconds")

0.8977272727272727
0.0249326229095459


# iris.txt and K = 3

In [553]:
import numpy as np
import math
import time
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split

def getDistances(X_test,X_train):
    distances = []
    """Function to calculate Euclidian distance between the test data point and training data point"""
    for j in range(X_test.shape[0]):
        for i in range(X_train.shape[0]):    
            distances.append(math.sqrt(sum(((X_test[j]-X_train[i])**2))))
    distances = np.array(distances).reshape(X_test.shape[0],X_train.shape[0])# each row stores disnces to corresponding test set.eg. row0, contains distance between test set1 and 112 training data
    return distances
def getNeighborsIndex(distances,X_test,X_train,num_neighbors=1):
    """Function to find the indexes of the neighbors""" 
    neighbors_idx = np.array([])
    for n in range(distances.shape[0]):    
        neighbors_idx = np.append(neighbors_idx , np.argpartition(distances[n], num_neighbors))
    neighbors_idx = neighbors_idx.reshape(X_test.shape[0],X_train.shape[0])#first 'num_neighbors' columns of the neighbors_idx contains the index of the nearest neighbors in training point
    kneighbors_idx = neighbors_idx[:,:num_neighbors].astype(int)
    return kneighbors_idx

def predictLabel(kneighbors_idx,num_neighbors=1):
    """Function to predict the labels"""
    yhat= np.array([])
    for m in range(kneighbors_idx.shape[0]):
        neighbors_label = []
        for p in range(num_neighbors):
            idx =  kneighbors_idx[m][p]#accessing neighbors index from kneighbors_idx array
            neighbors_label.append(y_train[idx])#accessing the label of the neighbors from the ytrain

        vote_dict = {q:neighbors_label.count(q) for q in neighbors_label}    
        yhat = np.append(yhat,max(vote_dict, key=vote_dict.get))
        yhat = yhat.astype(int)
    return yhat

start = time.time()
k=3
X = np.genfromtxt("iris_data.txt")
y = np.genfromtxt("iris_target.txt")
X_train,X_test,y_train, y_test = train_test_split(X, y, random_state = 709)
distances = getDistances(X_test,X_train)
kneighbors_idx = getNeighborsIndex(distances,X_test,X_train,num_neighbors=k)
yhat=predictLabel(kneighbors_idx,num_neighbors=k)
print("Test Error rate is: ",np.mean(yhat!=y_test))
print("Execution time: ", time.time()-start, "seconds")

Test Error rate is:  0.05263157894736842
Execution time:  0.026899099349975586 seconds


# Validating iris.txt and K = 3

In [554]:
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.datasets import load_iris
import time
start = time.time()
knn = KNeighborsClassifier(n_neighbors = 3)
X = np.genfromtxt("iris_data.txt")
y = np.genfromtxt("iris_target.txt")
X_train,X_test,y_train, y_test = train_test_split(X, y, random_state = 709)
knn.fit(X_train, y_train)
knn.predict(X_test)
score = knn.score(X_test, y_test)
print("Test Error rate is : ",(1-score))
print("Execution time: ", time.time()-start, "seconds")

Test Error rate is :  0.052631578947368474
Execution time:  0.008975982666015625 seconds


# ionosphere.txt and K = 3

In [543]:
import numpy as np
import math
import time
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split

def getDistances(X_test,X_train):
    distances = []
    """Function to calculate Euclidian distance between the test data point and training data point"""
    for j in range(X_test.shape[0]):
        for i in range(X_train.shape[0]):    
            distances.append(math.sqrt(sum(((X_test[j]-X_train[i])**2))))
    distances = np.array(distances).reshape(X_test.shape[0],X_train.shape[0])# each row stores disnces to corresponding test set.eg. row0, contains distance between test set1 and 112 training data
    return distances
def getNeighborsIndex(distances,X_test,X_train,num_neighbors):
    # finding nearest neighbors
    neighbors_idx = np.array([])
    for n in range(distances.shape[0]):    
        neighbors_idx = np.append(neighbors_idx , np.argpartition(distances[n], num_neighbors))
    neighbors_idx = neighbors_idx.reshape(X_test.shape[0],X_train.shape[0])#first 'num_neighbors' columns of the neighbors_idx contains the index of the nearest neighbors in training point
    kneighbors_idx = neighbors_idx[:,:num_neighbors].astype(int)
    return kneighbors_idx
    
def predictLabel(kneighbors_idx,num_neighbors=1):
    #prediction:
    yhat= np.array([])
    for m in range(kneighbors_idx.shape[0]):
        neighbors_label = []
        for p in range(num_neighbors):
            idx =  kneighbors_idx[m][p]#accessing neighbors index from kneighbors_idx array
            neighbors_label.append(y_train[idx])#accessing the label of the neighbors from the ytrain
        vote_dict = {q:neighbors_label.count(q) for q in neighbors_label}    
        yhat = np.append(yhat,max(vote_dict, key=vote_dict.get))
        yhat = yhat.astype(int)
    return yhat

start = time.time()
X = np.genfromtxt("ionosphere.txt", delimiter = ",", usecols = np.arange(34))
y = np.genfromtxt("ionosphere.txt", delimiter = ",", usecols = 34, dtype = int)
X_train,X_test,y_train, y_test = train_test_split(X, y, random_state = 709)
k = 3
distances = getDistances(X_test,X_train)
kneighbors_idx = getNeighborsIndex(distances,X_test,X_train,num_neighbors=k)
yhat = predictLabel(kneighbors_idx,num_neighbors=k)
print("Test Error rate is: ",np.mean(yhat!=y_test))
print("Execution time: ", time.time()-start, "seconds")

Test Error rate is:  0.10227272727272728
Execution time:  0.24436068534851074 seconds


# Validating ionosphere.txt and K = 1

In [544]:
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.datasets import load_iris
import time
start = time.time()
X = np.genfromtxt("ionosphere.txt", delimiter = ",", usecols = np.arange(34))
y = np.genfromtxt("ionosphere.txt", delimiter = ",", usecols = 34, dtype = int)
knn = KNeighborsClassifier(n_neighbors = 3)
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state = 709)
knn.fit(X_train, y_train)
knn.predict(X_test)
score = knn.score(X_test, y_test)
print("Test Error rate is : ",(1-score))
print("Execution time: ", time.time()-start, "seconds")

Test Error rate is :  0.10227272727272729
Execution time:  0.034946441650390625 seconds


# The average false p-value for the Nearest Neighbour conformal pre-dictor applied to iris.txt

In [566]:
#Conformal Predictors#Transductive classifier
import numpy as np
import math
import time
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
iris = load_iris()

def getConformityMeausres(conformity_measure='ratio_of_distances',min_oppositeclass_neighbor_distance=1, min_sameclass_neighbor_distance=1, nearest_neighbor_distance=1):
    try:
        if conformity_measure == 'ratio_of_distances':
            CM = min_oppositeclass_neighbor_distance/min_sameclass_neighbor_distance             
            
        if conformity_measure == 'oppositeclass_distance':
            CM = min_oppositeclass_neighbor_distance
        if conformity_measure == 'sameclass_distance':
            CM = 1/min_sameclass_neighbor_distance
        if conformity_measure == 'nearest_neighbor_distance':
            CM = nearest_neighbor_distance
    except:
        print('Error in calculating conformity measure for {} th training data'.format(i)+str(p))
        print(min_oppositeclass_neighbor_distance)
        print(min_sameclass_neighbor_distance)
    return CM

X = np.genfromtxt("iris_data.txt")
y = np.genfromtxt("iris_target.txt")
X_train,X_test,y_train, y_test = train_test_split(X, y, random_state = 709)

#X_train = np.array([[0,3],[2,2],[3,3],[-1,1],[-1,-1],[0,1]])
#y_train = np.array([1,1,1,-1,-1,-1])
#X_test = np.array([[0,0]])
#y_test = np.array([1])
start = time.time()
labels = sorted(list(set(y_train)))
conformity_measure = 'ratio_of_distances'
alpha=[]

X_fit = (X_train-X_train[:,None])**2
X_distances = np.sqrt(X_fit.sum(axis=-1))#all diagonal elements are zero.
for i in range(X_distances.shape[0]):
    sameclass_distances = []
    oppositeclass_distances = []
    min_sameclass_neighbor_distance = 0
    min_oppositeclass_neighbor_distance = 0
    nearest_neighbor_distance = 0
    sameclass_index = np.array(np.where(y_train == y_train[i]))
    oppositeclass_index = list(set(np.arange(X_distances.shape[0]).flat)-set(sameclass_index.flat))
    sameclass_index=list(sameclass_index.flat)
    if (conformity_measure == 'ratio_of_distances') or (conformity_measure == 'oppositeclass_distance'):
        for j in oppositeclass_index:
            oppositeclass_distances.append(X_distances[i][j])
        min_oppositeclass_neighbor_distance =oppositeclass_distances[np.argmin(oppositeclass_distances)]
   
    if (conformity_measure == 'ratio_of_distances') or (conformity_measure == 'sameclass_distance' ):
        for k in sameclass_index:
            sameclass_distances.append(X_distances[i][k])
        min_sameclass_neighbor_distance = sameclass_distances[np.argpartition(sameclass_distances,2)[1]]#s 0 is the first min .we igore it and take second smallest
    if (conformity_measure == 'nearest_neighbor_distance'):
        nearest_neighbor_distance = X_distances[i,np.argpartition(X_distances[i],2)[1]]
    alpha.append(getConformityMeausres(conformity_measure='ratio_of_distances',min_oppositeclass_neighbor_distance=min_oppositeclass_neighbor_distance, min_sameclass_neighbor_distance=min_sameclass_neighbor_distance))


#************************************prediction part*****************************************
p_values = []
for p in range(X_test.shape[0]):
    test_distances=[]
    for q in range(X_train.shape[0]):
        test_distances.append(math.sqrt(sum(((X_test[p]-X_train[q])**2))))
    for test_label in labels:
        test_sameclass_distances = []
        test_oppositeclass_distances = []
        test_min_sameclass_neighbor_distance = 0
        test_min_oppositeclass_neighbor_distance = 0
        test_nearest_neighbor_distance = 0
        test_sameclass_index = np.array(np.where(y_train == test_label))
        test_oppositeclass_index = list(set(np.arange(len(test_distances)).flat)-set(test_sameclass_index.flat))
        test_sameclass_index=list(test_sameclass_index.flat)
        if (conformity_measure == 'ratio_of_distances') or (conformity_measure == 'oppositeclass_distance'):
            for r in test_oppositeclass_index:
                test_oppositeclass_distances.append(test_distances[r])
            test_min_oppositeclass_neighbor_distance = test_oppositeclass_distances[np.argmin(test_oppositeclass_distances)]
        if (conformity_measure == 'ratio_of_distances') or (conformity_measure == 'sameclass_distance' ):
            for s in test_sameclass_index:
                test_sameclass_distances.append(test_distances[s])
            test_min_sameclass_neighbor_distance = test_sameclass_distances[np.argmin(test_sameclass_distances)]
        if conformity_measure == 'nearest_neighbor_distance':
            test_nearest_neighbor_distance = test_distances[i,np.argmin(test_distances[p])]
        if int(test_min_sameclass_neighbor_distance) == 0:
            test_alpha = math.inf
        else:
            test_alpha=getConformityMeausres(conformity_measure='ratio_of_distances',min_oppositeclass_neighbor_distance=test_min_oppositeclass_neighbor_distance, min_sameclass_neighbor_distance=test_min_sameclass_neighbor_distance)
        alpha.append(test_alpha)
        sorted_alpha = list(set(sorted(alpha)))#removing the duplicates to find the ranks
        rank_test_alpha = sorted_alpha.index(test_alpha)+1
        p_values.append(rank_test_alpha/(X_train.shape[0]+1))
        alpha.pop()
        
p_values = np.array(p_values).reshape(X_test.shape[0],len(labels))

#***********False P value Calculation***************
false_pvalues = np.zeros((X_test.shape[0],len(labels)-1))
for u in range(p_values.shape[0]):   
        for w in range(len(labels)):
            if y_test[u] == labels[w]:                
                false_pvalues[u]=np.delete(p_values[u],w)
                break
average_false_pvalue = (sum(sum(false_pvalues)))/false_pvalues.size
print("Average False p-value for iris.txt is: ",average_false_pvalue)
print("Execution time: ", time.time()-start, "seconds")


Average False p-value for iris.txt is:  0.6326269212855147
Execution time:  0.054965972900390625 seconds


  if sys.path[0] == '':


# the average false p-value for the Nearest Neighbour conformal pre-dictor applied to ionosphere.txt.

In [3]:
#Conformal Predictors#Transductive classifier
import numpy as np
import math
import time
from sklearn.model_selection import train_test_split

def getConformityMeausres(conformity_measure='ratio_of_distances',min_oppositeclass_neighbor_distance=1, min_sameclass_neighbor_distance=1, nearest_neighbor_distance=1):
    try:
        if conformity_measure == 'ratio_of_distances':
            CM = min_oppositeclass_neighbor_distance/min_sameclass_neighbor_distance             
            
        if conformity_measure == 'oppositeclass_distance':
            CM = min_oppositeclass_neighbor_distance
        if conformity_measure == 'sameclass_distance':
            CM = 1/min_sameclass_neighbor_distance
        if conformity_measure == 'nearest_neighbor_distance':
            CM = nearest_neighbor_distance
    except:
        print('Error in calculating conformity measure for {} th training data'.format(i)+str(p))
        print(min_oppositeclass_neighbor_distance)
        print(min_sameclass_neighbor_distance)
    return CM

X = np.genfromtxt("ionosphere.txt", delimiter = ",", usecols = np.arange(34))
y = np.genfromtxt("ionosphere.txt", delimiter = ",", usecols = 34, dtype = int)
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state = 709)

#X_train = np.array([[0,3],[2,2],[3,3],[-1,1],[-1,-1],[0,1]])
#y_train = np.array([1,1,1,-1,-1,-1])
#X_test = np.array([[0,0]])
#y_test = np.array([1])
start = time.time()

labels = sorted(list(set(y_train)))
conformity_measure = 'ratio_of_distances'
alpha=[]

X_fit = (X_train-X_train[:,None])**2
X_distances = np.sqrt(X_fit.sum(axis=-1))#all diagonal elements are zero.
for i in range(X_distances.shape[0]):
    sameclass_distances = []
    oppositeclass_distances = []
    min_sameclass_neighbor_distance = 0
    min_oppositeclass_neighbor_distance = 0
    nearest_neighbor_distance = 0
    sameclass_index = np.array(np.where(y_train == y_train[i]))
    oppositeclass_index = list(set(np.arange(X_distances.shape[0]).flat)-set(sameclass_index.flat))
    sameclass_index=list(sameclass_index.flat)
    if (conformity_measure == 'ratio_of_distances') or (conformity_measure == 'oppositeclass_distance'):
        for j in oppositeclass_index:
            oppositeclass_distances.append(X_distances[i][j])
        min_oppositeclass_neighbor_distance =oppositeclass_distances[np.argmin(oppositeclass_distances)]
   
    if (conformity_measure == 'ratio_of_distances') or (conformity_measure == 'sameclass_distance' ):
        for k in sameclass_index:
            sameclass_distances.append(X_distances[i][k])
        min_sameclass_neighbor_distance = sameclass_distances[np.argpartition(sameclass_distances,2)[1]]#s 0 is the first min .we igore it and take second smallest
    if (conformity_measure == 'nearest_neighbor_distance'):
        nearest_neighbor_distance = X_distances[i,np.argpartition(X_distances[i],2)[1]]
    alpha.append(getConformityMeausres(conformity_measure='ratio_of_distances',min_oppositeclass_neighbor_distance=min_oppositeclass_neighbor_distance, min_sameclass_neighbor_distance=min_sameclass_neighbor_distance))


#************************************prediction part*****************************************
p_values = []
for p in range(X_test.shape[0]):
    test_distances=[]
    for q in range(X_train.shape[0]):
        test_distances.append(math.sqrt(sum(((X_test[p]-X_train[q])**2))))
    for test_label in labels:
        test_sameclass_distances = []
        test_oppositeclass_distances = []
        test_min_sameclass_neighbor_distance = 0
        test_min_oppositeclass_neighbor_distance = 0
        test_nearest_neighbor_distance = 0
        test_sameclass_index = np.array(np.where(y_train == test_label))
        test_oppositeclass_index = list(set(np.arange(len(test_distances)).flat)-set(test_sameclass_index.flat))
        test_sameclass_index=list(test_sameclass_index.flat)
        if (conformity_measure == 'ratio_of_distances') or (conformity_measure == 'oppositeclass_distance'):
            for r in test_oppositeclass_index:
                test_oppositeclass_distances.append(test_distances[r])
            test_min_oppositeclass_neighbor_distance = test_oppositeclass_distances[np.argmin(test_oppositeclass_distances)]
        if (conformity_measure == 'ratio_of_distances') or (conformity_measure == 'sameclass_distance' ):
            for s in test_sameclass_index:
                test_sameclass_distances.append(test_distances[s])
            test_min_sameclass_neighbor_distance = test_sameclass_distances[np.argmin(test_sameclass_distances)]
        if conformity_measure == 'nearest_neighbor_distance':
            test_nearest_neighbor_distance = test_distances[i,np.argmin(test_distances[p])]
        if int(test_min_sameclass_neighbor_distance) == 0:
            test_alpha = math.inf
        else:
            test_alpha=getConformityMeausres(conformity_measure='ratio_of_distances',min_oppositeclass_neighbor_distance=test_min_oppositeclass_neighbor_distance, min_sameclass_neighbor_distance=test_min_sameclass_neighbor_distance)
        alpha.append(test_alpha)
        sorted_alpha = list(set(sorted(alpha)))#removing the duplicates to find the ranks
        rank_test_alpha = sorted_alpha.index(test_alpha)+1
        p_values.append(rank_test_alpha/(X_train.shape[0]+1))
        alpha.pop()
        
p_values = np.array(p_values).reshape(X_test.shape[0],len(labels))

#***********False P value Calculation***************
false_pvalues = np.zeros((X_test.shape[0],len(labels)-1))
for u in range(p_values.shape[0]):   
        for w in range(len(labels)):
            if y_test[u] == labels[w]:                
                false_pvalues[u]=np.delete(p_values[u],w)
                break
average_false_pvalue = (sum(sum(false_pvalues)))/false_pvalues.size
print("Average False p-value for ionosphere.txt is: ",average_false_pvalue)
print("Execution time: ", time.time()-start, "seconds")

Average False p-value for ionosphere.txt is:  0.47753099173553726
Execution time:  0.7199456691741943 seconds


##Experiments with different conformity measures
##lines 78 to 89 and the function getConformityMeausres handles diffrent conformity measures.Give the value of conformity measure to be used to the variable 'conformity_measure' at the begining of the script

##Justifying your convention for 0/0
Did not encountered a 0/0 situation.Instead faced a x/0 situation , when calculating conformity measure for a test data. Ie when minimum distance to same class becomes zero. That means X_train and X_test are same. Which shows that X+-test should have a maximum conformity measure.So I assigned math.inf as confmity measure. When sorted the test alha will have the highest rank.


##Implementing the K Nearest Neighbours algorithm for a general K.
The first cell of the this jupiter notebook involves the implementation of kNN algorithm for general K. Which asks the user to input the value of K

