In [3]:
import csv
import numpy

In [25]:
train_data = numpy.loadtxt("data/galaxies_train.csv", delimiter=",", skiprows=1)
test_data = numpy.loadtxt("data/galaxies_test.csv", delimiter=",", skiprows=1)

X_train = train_data[:,1:]
t_train = train_data[:,0]
X_test = test_data[:,1:]
t_test = test_data[:,0]
print("Number of training instances: %i" % X_train.shape[0])
print("Number of test instances: %i" % X_test.shape[0])
print("Number of attributes: %i" % X_train.shape[1])


Number of training instances: 500
Number of test instances: 500
Number of attributes: 10
[0.50876446 0.62521107 0.66252301 0.619162   0.71985599 0.49442102
 0.51949223 0.55780273 0.48060094 0.60915503]


### NOTE: You are supposed to use this strucuture, i.e., 
# the pre-defined functions and variables. If you 
# have difficulties to keep this structure, you ARE 
# ALLOWED to adapt/change the code structure slightly!
# You might also want to add additional functions or
# variables.

class NearestNeighborRegressor:
    
    def __init__(self, n_neighbors=1, dist_measure="euclidean", dist_matrix=None):
        """
        Initializes the model.
        
        Parameters
        ----------
        n_neighbors : The number of nearest neigbhors (default 1)
        dist_measure : The distance measure used (default "euclidean")
        dist_matrix : The distance matrix if needed (default "None")
        """
        
        self.n_neighbors = n_neighbors
        self.dist_measure = dist_measure
        self.dist_matrix = dist_matrix
    
    def fit(self, X, t):
        """
        Fits the nearest neighbor regression model.

        Parameters
        ----------
        X : Array of shape [n_samples, n_features]
        t : Array of length n_samples
        """ 
        
        self.X_train = X
        self.t_train = t
    
    def predict(self, X):
        """
        Computes predictions for a new set of points.

        Parameters
        ----------
        X : Array of shape [n_samples, n_features]

        Returns
        -------
        predictions : Array of length n_samples
        """         
        self.fit(X_train, t_train)
        predictions = []
        for i in range(len(X)):
            closest = (self.distance(X[i]))
            closest = numpy.argsort(closest)
            predictions.append(numpy.mean(self.t_train[closest[:self.n_neighbors]]))
        print(predictions)
        predictions = numpy.array(predictions)
        return predictions
    def distance(self, X_row):
        distance = []
        for i in range(len(self.X_train)):
            distance.append(self.euclDist(X_row, self.X_train[i]))
        return distance
    def euclDist(self,X,Y):
        return numpy.sqrt(numpy.dot((X-Y).T,(X-Y)))
    #TODO: 
        
    def rmse(self, t, tp):
        return numpy.sqrt(numpy.mean((t-tp)**2))   
          
        
        

In [217]:
reg = NearestNeighborRegressor(n_neighbors=3)

pred = reg.predict(X_test)
print(reg.rmse(t_test, pred))

[1.2057945333333333, 1.7062795999999996, 1.3931373333333334, 1.2947684666666666, 1.2247829333333333, 1.1351229, 1.3202671, 1.9155443333333333, 1.2308658, 1.4627713333333332, 1.8977570000000001, 1.4503520333333333, 4.5524206666666664, 1.8520873333333334, 1.198041, 1.5714430000000001, 1.1585854, 0.63464305, 3.9322866666666663, 0.8943876, 1.768069, 1.7244113333333335, 1.6284356666666664, 1.4586396666666666, 1.7390833333333333, 1.1585854, 1.1539769333333334, 1.2777227, 1.2233667333333333, 1.0358142666666665, 1.1586277999999999, 1.73816, 1.7907665333333334, 3.9880303666666665, 1.4443870333333333, 1.4452473333333333, 1.6787433666666667, 1.912782, 1.2987636666666666, 1.8231243333333333, 1.6028720000000003, 1.3654753666666668, 1.2523396, 1.763371, 1.7549456666666667, 2.268326666666667, 3.0841333333333334, 1.0630873666666667, 3.0282016666666665, 1.2559252666666667, 1.6242223333333332, 1.9085233333333331, 0.7892165666666666, 0.9597696666666667, 0.20953376666666665, 0.6995908333333335, 1.16789356