In [3]:
import numpy as np

In [4]:
def read():
    '''reading the two file 1) training data 2) test data'''
    trainingData = np.genfromtxt('regression/trainingData.csv', delimiter=',')
    testData = np.genfromtxt('regression/testData.csv', delimiter=',')

    return trainingData, testData

In [5]:
def calculate_distances(traininginstance, queryinstance):
    '''
    :param traininginstance:
    :param queryinstance:
    :return: euclidean distance
    --------------------------------------------------------------
    this function from inside predict function
    it will calculate euclidean distance of each query intance with all other training instance
    '''
    return np.sqrt(np.sum(np.square(queryinstance - traininginstance), axis=1))

In [6]:
def predict(traininginstance, queryinstance, trainingData, k):
    '''
    :param traininginstance: all the 12 features of all row
    :param queryinstance: Features from testData.csv
    :param trainingData: this values is taken to find the prediction value
    :param k: kNN Performance for K= 3
    :return: the predicted value before calculating the r2 score

    Here this function perform some task
    a) call calculate distance function where it gets distance of between each query instance with all training instance
    b) it will select 3 nearest neighbours (mention k=3)
    c) calculate average distance between the target value and the 3 neares neighbours
    '''
    c_distance = calculate_distances(traininginstance, queryinstance)
    index_value = np.argsort(c_distance)
    
    # Simple KNN
    predict_Simple_KNN = np.mean(trainingData[index_value[0:k], -1])
    
    # Inverse distance
    predict_Inverse_Distance_Weighted = np.sum(trainingData[index_value[0:k], -1] *
                                               (1 / (c_distance[index_value[0:k]]))) / np.sum(
        (1 / (c_distance[index_value[0:k]])))
    
    # Square inverse distance weighted
    predict_Square_Inverse_Distance_Weighted = np.sum(trainingData[index_value[0:k],-1] *
            np.square(1/(c_distance[index_value[0:k]]))) / np.sum(np.square( 1 / (c_distance[index_value[0:k]])))
    
    
    return predict_Simple_KNN, predict_Inverse_Distance_Weighted, predict_Square_Inverse_Distance_Weighted

In [7]:
def calculate_r2(targetData, predictedData):
    '''
    this function will take
    :param targetData: this values are taken from testData.csv
    :param predictedData: this values are returned from predict function
    :return: it will return the R2 Score
    '''
    sum_square = np.sum(np.square(predictedData - targetData))
    sum_square_mean = np.sum(np.square(np.mean(targetData) - targetData))
    div =(sum_square / sum_square_mean)
    return 1 - div

In [8]:
if __name__ == '__main__':
    k = 3
    predictionSimpleKNN = []
    predictionInverseDistanceWeighted = []
    predictionSquareInverseDistanceWeighted = []

    '''Function Read will read the training and test data files'''
    trainingData, testData = read()
    len_test_data = len(testData)

    '''this loop will run for length of test data instances'''
    for p in range(len_test_data):
        predict_Simple_KNN, predict_Inverse_Distance_Weighted, predict_Square_Inverse_Distance_Weighted = predict(trainingData[:, 0:-1], testData[p, 0:-1], trainingData, k )
        
        #Append Simple KNN into list
        predictionSimpleKNN.append(predict_Simple_KNN)
        
        #Append Inverse Distance Weighted into list
        predictionInverseDistanceWeighted.append(predict_Inverse_Distance_Weighted)
        
        # Append Square Inverse Distance Weighted
        predictionSquareInverseDistanceWeighted.append(predict_Square_Inverse_Distance_Weighted)
        

    '''this function will use actual target value with predicted value'''
    R2_score_Simple_KNN = calculate_r2(testData[:, -1], np.array(predictionSimpleKNN))
    R2_score_Inverse_Distance_Weighted = calculate_r2(testData[:, -1], np.array(predictionInverseDistanceWeighted))
    R2_score_Square_Inverse_Distance_Weighted = calculate_r2(testData[:, -1], np.array(predictionSquareInverseDistanceWeighted))
    
    print("R2 Score for Simple KNN = ", R2_score_Simple_KNN)
    print("R2 Score for Inverse Distance Weighted = ", R2_score_Inverse_Distance_Weighted)
    print("R2 Score for Square Inverse Distance Weighted = ", R2_score_Square_Inverse_Distance_Weighted)
    

R2 Score for Simple KNN =  0.8165934427865968
R2 Score for Inverse Distance Weighted =  0.8185732982178427
R2 Score for Square Inverse Distance Weighted =  0.8197170982577303
