In [None]:
# Creating a nearest neighbors algorithm to predict the label of the point

class KNearestNeighbors(object):
        
    def points_distance(self, data, sample):
        """To calculate Euclidian distance between two points"""
        return np.sqrt(np.sum(((data - sample)**2), axis = 1))
    
    def fit(self, X_train, Y_train):
        """To fit the training data"""
        self.X_train = X_train
        self.Y_train = Y_train

    @staticmethod
    def nearest_neighbors(self, sample, k):
        """Generalised function to find the k-nearest neighbors."""
        indices = np.arange(self.X_train.shape[0])
        neighbors = self.points_distance(self.X_train, sample)
        neighbors = np.vstack((neighbors, indices)).T
        nearest_neighbors = neighbors[np.argsort(neighbors[:, 0])][:k]
        return nearest_neighbors

    def predict(self, sample, k):
        """Function to predict label of a sample point"""
        neighbors = self.nearest_neighbors(self, sample, k)[:,1].astype(np.int).tolist()
        floor_values, floor_counts = np.unique(self.Y_train[neighbors, 2], return_counts=True)
        pred_floor = floor_values[np.argmax(floor_counts)]
        building_values, building_counts = np.unique(self.Y_train[neighbors, 3], return_counts=True)
        pred_building = building_values[np.argmax(building_counts)]
        sample_prediction = [pred_floor, pred_building]
        return sample_prediction
        

    def score(self, X_test, Y_test, knn):
        """Function to calculate accuracy of the algorithm against a validation set"""
        if(type(knn) == list):
            accuracies = []
            for k in knn:
                error_floor = 0
                preds = []
                for index, sample in enumerate(X_test):
                    preds.append(self.predict(sample, k))
                np_preds = np.asarray(preds)
                accuracies.append(((np.sum(np_preds[:,0] == Y_test[:,2]) / Y_test[:,2].shape[0]) * 0.67 + (np.sum(np_preds[:,1] == Y_test[:,3]) / Y_test[:,3].shape[0]) * 0.33))
            return np.asarray(accuracies)
        else:
            preds = []
            for index, sample in enumerate(X_test):
                target = Y_test[index,0:4]  
                preds.append(self.predict(sample, knn))
            np_preds = np.asarray(preds)
            accuracy = ((np.sum(np_preds[:,0] == Y_test[:,2]) / Y_test[:,2].shape[0]) * 0.67 + (np.sum(np_preds[:,1] == Y_test[:,3]) / Y_test[:,3].shape[0]) * 0.33) 
            return np.asarray([accuracy])

In [None]:
# Creating a clustered nearest neighbors algorithm to predict the label of the point

class clusteredKNN(object):

    def find_distances(self, sample, data):
        """To calculate Euclidian distance between two points"""
        return np.sqrt(np.sum(((data - sample)**2), axis = 1))
    
    def fit(self, X_train, Y_train, k):
        """To fit the training data into differet clusters or kmeans fitting"""
        length = X_train.shape[0]
        data = X_train
        points = random.sample(range(0, length), k)
        centroids = data[points,:]
        old_centroids = np.zeros(shape = (length, X_train.shape[1])) 
        clusters = np.zeros(length)
        while (centroids.any() != old_centroids.any()).any():
            for index, sample in enumerate(data):
                distances = self.find_distances(sample, centroids)
                clusters[index] = np.argmin(distances)
            for i in range(len(centroids)):
                centroids[i] = np.mean(data[np.where(clusters == i)], axis = 0)
            old_centroids = centroids
        self.X_train = X_train
        self.Y_train = Y_train
        self.clusters = clusters
        self.centroids = centroids
        
    def find_clusters(self):
        return self.clusters
    
    def find_centroids(self):
        return self.centroids
    
    def predict_knn(self, sample, k):
        """To predict the label of a sample point using knn over the delegated cluster"""
        delegate = np.argmin(self.find_distances(sample, self.centroids))
        X = self.X_train[np.where(self.clusters == delegate)]
        Y = self.Y_train[np.where(self.clusters == delegate)]
        if Y.size == 0:
            X = np.full((2, self.X_train.shape[1]), 100)
            Y = np.full((2, self.X_train.shape[1]), 0)
        knn = KNearestNeighbors()
        knn.fit(X, Y)
        return knn.predict(sample, k)
    
    def kmeans_knn_score(self, X_test, Y_test, k):
        """To calculate accuracy of the algorithm"""
        preds = []
        for index, sample in enumerate(X_test):
            preds.append(self.predict_knn(sample, k))
        np_preds = np.asarray(preds)
        return ((np.sum(np_preds[:,0] == Y_test[:,2]) / Y_test[:,2].shape[0]) * 0.67 + (np.sum(np_preds[:,1] == Y_test[:,3]) / Y_test[:,3].shape[0]) * 0.33)


In [None]:
# Creating linear regression algorithm to predict the label of the point

class LinearRegression(object):
        
    def points_distance(self, data, sample):
        """To calculate Euclidian distance between two points"""
        return np.sqrt(np.sum(((data - sample)**2), axis = 1))
    
    def fit(self, X_train, Y_train):
        """To fit the training data"""
        self.X_train = X_train
        self.Y_train_floor = Y_train[ : , 2]
        self.Y_train_building = Y_train[ : , 3]
        self.b_floor = np.linalg.inv((X_train.transpose().dot(X_train))).dot(X_train.transpose()).dot(self.Y_train_floor)
        self.b_building = np.linalg.inv((X_train.transpose().dot(X_train))).dot(X_train.transpose()).dot(self.Y_train_building)
#         self.b1 = np.sum(np.multiply(((X_train - np.mean(X_train)).T),(Y_train - np.mean(Y_train))), axis = 1) / np.sum((X_train - np.mean(X_train)) ** 2 )
#         self.b0 = (np.mean(Y_train) - np.multiply(self.b1,np.mean(X_train)))

    def predict(self, X_test, alpha):
        """To predict the label of a sample point"""
        y_floor = []
        y_building = []
        for sample in X_test:
            y_floor.append(np.rint(np.sum(np.multiply(self.b_floor.transpose(), sample)) + alpha))
            y_building.append(np.rint(np.sum(np.multiply(self.b_building.transpose(), sample)) + alpha))
        return (y_floor, y_building)

    def score(self, X_test, Y_test, alpha):
        """To calculate accuracy of the algorithm"""        
        errors = []
        for diff_alpha in alpha:
            preds = []
            y_pred = np.array(self.predict(X_test, diff_alpha))
            np_preds = np.asarray(y_pred).T
            errors.append((np.sum(np_preds[:,0] == Y_test[:,2]) / Y_test[:,2].shape[0]) * 0.67 + (np.sum(np_preds[:,1] == Y_test[:,3]) / Y_test[:,3].shape[0]) * 0.33)
        return np.asarray(errors)
