In [1]:
# necessary imports
import numpy as np
import pandas as pd
import seaborn as sns

import matplotlib.pyplot as plt

%matplotlib inline 

In [2]:
from sklearn import (datasets,
                     metrics,
                     model_selection as skms,
                     naive_bayes,
                     linear_model,
                     neighbors)

# dataset for classification
iris = datasets.load_iris()

# train-test split
(iris_train_ftrs, iris_test_ftrs, 
 iris_train_tgt, iris_test_tgt) = skms.train_test_split(iris.data,
                                                        iris.target, 
                                                        test_size=.90,
                                                        random_state=42) 

# dataset for regression
diabetes = datasets.load_diabetes()

# train-test split
tts =  skms.train_test_split(diabetes.data,
                             diabetes.target, 
                             test_size=.25)
(diabetes_train, diabetes_test, 
 diabetes_train_tgt, diabetes_test_tgt) = tts

In [13]:
import memory_profiler

def msr_mem(go, args):
    base = memory_profiler.memory_usage()
    mu = memory_profiler.memory_usage((go, args),
                                       max_usage=True)
    print("{:<3}: ~{:.4f} MiB".format(go.__name__, mu-base))
    
def split_data(dataset):
    split = skms.train_test_split(dataset.data,
                                  dataset.target,
                                  test_size=.25)
    return split[:-1] # don't need test tgt

# Classification

In [4]:
# define some models
models = {'3-NN': neighbors.KNeighborsClassifier(n_neighbors=3),
          '5-NN': neighbors.KNeighborsClassifier(n_neighbors=5),
          'NB'  : naive_bayes.GaussianNB()}

# in turn, fit-predict with those models
for name, model in models.items():
    fit = model.fit(iris_train_ftrs, 
                    iris_train_tgt)
    predictions = fit.predict(iris_test_ftrs)
    
    score = metrics.accuracy_score(iris_test_tgt, predictions)
    print("{:>4s}: {:0.2f}".format(name,score))

3-NN: 0.96
5-NN: 0.61
  NB: 0.81


In [5]:
%%timeit -r1

knn   = neighbors.KNeighborsClassifier(n_neighbors=3)
fit   = knn.fit(iris_train_ftrs, iris_train_tgt)
preds = fit.predict(iris_test_ftrs)

metrics.accuracy_score(iris_test_tgt, preds)

3.96 ms ± 0 ns per loop (mean ± std. dev. of 1 run, 100 loops each)


In [6]:
nb = naive_bayes.GaussianNB()
fit = nb.fit(iris_train_ftrs, iris_train_tgt)
%timeit -r1 preds = fit.predict(iris_test_ftrs)

knn   = neighbors.KNeighborsClassifier(n_neighbors=3)
fit = knn.fit(iris_train_ftrs, iris_train_tgt)
%timeit -r1 preds = fit.predict(iris_test_ftrs)

134 µs ± 0 ns per loop (mean ± std. dev. of 1 run, 10000 loops each)
3.52 ms ± 0 ns per loop (mean ± std. dev. of 1 run, 100 loops each)


In [7]:
def nb_go(train_ftrs, test_ftrs, train_tgt):
    nb    = naive_bayes.GaussianNB()
    fit   = nb.fit(train_ftrs, train_tgt)
    preds = fit.predict(test_ftrs)

def knn3_go(train_ftrs, test_ftrs, train_tgt):
    nb    = neighbors.KNeighborsClassifier(n_neighbors=3)
    fit   = nb.fit(train_ftrs, train_tgt)
    preds = fit.predict(test_ftrs)

def knn5_go(train_ftrs, test_ftrs, train_tgt):
    nb    = neighbors.KNeighborsClassifier(n_neighbors=5)
    fit   = nb.fit(train_ftrs, train_tgt)
    preds = fit.predict(test_ftrs)

In [14]:
sd = split_data(datasets.load_iris())
msr_mem(nb_go, sd)
msr_mem(knn3_go, sd)
msr_mem(knn5_go, sd)

TypeError: unsupported operand type(s) for -: 'float' and 'list'

# Regression

In [9]:
# stand alone code
from sklearn import (datasets, 
                     linear_model, 
                     model_selection as skms,
                     metrics,
                     neighbors)

# dataset
diabetes = datasets.load_diabetes()

# tts
tts =  skms.train_test_split(diabetes.data,
                             diabetes.target, 
                             test_size=.25)
(diabetes_train, diabetes_test, 
 diabetes_train_tgt, diabetes_test_tgt) = tts

# define some models
models = {'3-NN': neighbors.KNeighborsRegressor(n_neighbors=3),
          '5-NN': neighbors.KNeighborsRegressor(n_neighbors=5),
          '10-NN': neighbors.KNeighborsRegressor(n_neighbors=10),
          '20-NN': neighbors.KNeighborsRegressor(n_neighbors=20),
          'linreg' : linear_model.LinearRegression()}

for name, model in models.items():
    fit   = model.fit(diabetes_train, diabetes_train_tgt)
    preds = fit.predict(diabetes_test)
    score = np.sqrt(metrics.mean_squared_error(diabetes_test_tgt, 
                                               preds))
    print("{:>6s} : {:0.2f}".format(name,score))

  3-NN : 64.05
  5-NN : 63.78
 10-NN : 60.10
 20-NN : 58.37
linreg : 57.94


In [10]:
def knn_go(train_ftrs, test_ftrs, train_tgt):
    knn = neighbors.KNeighborsRegressor(n_neighbors=3)
    fit   = knn.fit(train_ftrs, train_tgt)
    preds = fit.predict(test_ftrs)

def lr_go(train_ftrs, test_ftrs, train_tgt):
    linreg = linear_model.LinearRegression()
    fit   = linreg.fit(train_ftrs, train_tgt)
    preds = fit.predict(test_ftrs)