In [1]:
import numpy as np
from sklearn import datasets
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import classification_report, confusion_matrix
from scipy.spatial import distance


class KNN():

    #doesnt calc mu and sigma,but serves the purpose here
    def fit(self, X_train, Y_train):
        self.X_train = X_train
        self.Y_train = Y_train

    def predict(self, X_test, num_neighbors):
        predictions = []
        for row in X_test:
            output = self.predict_classification(row, num_neighbors)
            predictions.append(output)
        return np.array(predictions)

    # Returning the most frequent label among the 'k' nearest neighbors
    def predict_classification(self, test_row, num_neighbors):
        neighbors = self.get_neighbors(test_row, num_neighbors)
        output_values = [round(row[-1], 0) for row in neighbors]
        prediction = max(set(output_values), key=output_values.count)
        return prediction

    # Locate the 'k' nearest neighbors
    def get_neighbors(self, test_row, num_neighbors):
        distances = []
        for train_row in self.X_train:
            dist = distance.euclidean(test_row, train_row)
            distances.append((train_row, dist))
        distances.sort(key=lambda tup: tup[1])
        neighbors = []
        for i in range(num_neighbors):
            neighbors.append(distances[i][0])
        return neighbors


iris = datasets.load_iris()

X = iris.data
Y = iris.target

X_train, X_test, Y_train, Y_test = train_test_split(X,
                                                    Y,
                                                    test_size=0.20,
                                                    random_state=0)

classifier = KNN()
classifier.fit(X_train, Y_train)

n_neighbors = 5
y_pred = classifier.predict(X_test, n_neighbors)

print(confusion_matrix(Y_test, y_pred))
print(classification_report(Y_test, y_pred))

[[11  0  0]
 [ 0 12  1]
 [ 0  0  6]]
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        11
           1       1.00      0.92      0.96        13
           2       0.86      1.00      0.92         6

    accuracy                           0.97        30
   macro avg       0.95      0.97      0.96        30
weighted avg       0.97      0.97      0.97        30



In [2]:
#comparison with inbuilt sklearn module
from sklearn.neighbors import KNeighborsClassifier
classifier = KNeighborsClassifier(n_neighbors=5)
classifier.fit(X_train, Y_train)
y_pred = classifier.predict(X_test)
from sklearn.metrics import classification_report, confusion_matrix
print(confusion_matrix(Y_test, y_pred))
print(classification_report(Y_test, y_pred))

[[11  0  0]
 [ 0 12  1]
 [ 0  0  6]]
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        11
           1       1.00      0.92      0.96        13
           2       0.86      1.00      0.92         6

    accuracy                           0.97        30
   macro avg       0.95      0.97      0.96        30
weighted avg       0.97      0.97      0.97        30



In [3]:
import numpy as np
import sklearn.datasets as datasets
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
import sklearn.metrics as metrics

house = datasets.load_boston()

X = house.data
Y = house.target

X_train, X_test, Y_train, Y_test = train_test_split(X,
                                                    Y,
                                                    test_size=0.2,
                                                    random_state=1337)

#scaling the data
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

lr = LinearRegression()
lr.fit(X_train, Y_train)

y_pred = lr.predict(X_test)

print('Mean Absolute Error:', metrics.mean_absolute_error(Y_test, y_pred))
print('Mean Squared Error:', metrics.mean_squared_error(Y_test, y_pred))
print('Root Mean Squared Error:',
      np.sqrt(metrics.mean_squared_error(Y_test, y_pred)))

Mean Absolute Error: 3.1651685365005955
Mean Squared Error: 23.50056214741596
Root Mean Squared Error: 4.847737838148424


In [4]:
#using statsmodel.api
import statsmodels.api as sm