# Setup

In [1]:
import numpy as np
import pandas as pd
import seaborn as sns

import matplotlib.pyplot as plt

%matplotlib inline 

In [2]:
from sklearn import (datasets,
                     metrics,
                     model_selection as skms,
                     naive_bayes,
                     neighbors)

In [3]:
import warnings
warnings.filterwarnings('ignore')
np.random.seed(42)

# Standalone Learning Evaluation

In [1]:
# could live in a standalone .py file
# (repeated some of the imports)
from sklearn import (datasets, 
                     metrics, 
                     model_selection as skms,
                     naive_bayes, 
                     neighbors)

# data
iris = datasets.load_iris()

# train-test split
(iris_train_ftrs, iris_test_ftrs, 
 iris_train_tgt, iris_test_tgt) = skms.train_test_split(iris.data,
                                                        iris.target, 
                                                        test_size=.90,
                                                        random_state=42) 
# define some models
models = {'3-NN': neighbors.KNeighborsClassifier(n_neighbors=3),
          '5-NN': neighbors.KNeighborsClassifier(n_neighbors=5),
          'NB'  : naive_bayes.GaussianNB()}

# in turn, fit-predict with those models
for name, model in models.items():
    fit = model.fit(iris_train_ftrs, 
                    iris_train_tgt)
    predictions = fit.predict(iris_test_ftrs)
    
    score = metrics.accuracy_score(iris_test_tgt, predictions)
    print("{:>4s}: {:0.2f}".format(name,score))

3-NN: 0.96
5-NN: 0.61
  NB: 0.81


# Timing

In [4]:
%timeit -r1 datasets.load_iris()

1.52 ms ± 0 ns per loop (mean ± std. dev. of 1 run, 1000 loops each)


In [5]:
%%timeit -r1 -n1
(iris_train_ftrs, iris_test_ftrs, 
 iris_train_tgt,  iris_test_tgt) = skms.train_test_split(iris.data,
                                                         iris.target, 
                                                         test_size=.25)

2.83 ms ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)


In [6]:
%%timeit -r1

nb    = naive_bayes.GaussianNB()
fit   = nb.fit(iris_train_ftrs, iris_train_tgt)
preds = fit.predict(iris_test_ftrs)

metrics.accuracy_score(iris_test_tgt, preds)

1.51 ms ± 0 ns per loop (mean ± std. dev. of 1 run, 1000 loops each)


In [7]:
%%timeit -r1

knn   = neighbors.KNeighborsClassifier(n_neighbors=3)
fit   = knn.fit(iris_train_ftrs, iris_train_tgt)
preds = fit.predict(iris_test_ftrs)

metrics.accuracy_score(iris_test_tgt, preds)

7.74 ms ± 0 ns per loop (mean ± std. dev. of 1 run, 100 loops each)


In [8]:
nb = naive_bayes.GaussianNB()
%timeit -r1 fit = nb.fit(iris_train_ftrs, iris_train_tgt)

knn   = neighbors.KNeighborsClassifier(n_neighbors=3)
%timeit -r1 fit = knn.fit(iris_train_ftrs, iris_train_tgt)

760 µs ± 0 ns per loop (mean ± std. dev. of 1 run, 1000 loops each)
468 µs ± 0 ns per loop (mean ± std. dev. of 1 run, 1000 loops each)


In [9]:
nb = naive_bayes.GaussianNB()
fit = nb.fit(iris_train_ftrs, iris_train_tgt)
%timeit -r1 preds = fit.predict(iris_test_ftrs)

knn   = neighbors.KNeighborsClassifier(n_neighbors=3)
fit = knn.fit(iris_train_ftrs, iris_train_tgt)
%timeit -r1 preds = fit.predict(iris_test_ftrs)

341 µs ± 0 ns per loop (mean ± std. dev. of 1 run, 1000 loops each)
8.47 ms ± 0 ns per loop (mean ± std. dev. of 1 run, 100 loops each)


# Memory

In [10]:
%load_ext memory_profiler

In [11]:
%%memit
nb = naive_bayes.GaussianNB()
fit = nb.fit(iris_train_ftrs, iris_train_tgt)
preds = fit.predict(iris_test_ftrs)

peak memory: 91.11 MiB, increment: 0.79 MiB


In [12]:
%%memit
knn = neighbors.KNeighborsClassifier()
fit = knn.fit(iris_train_ftrs, iris_train_tgt)
preds = knn.predict(iris_test_ftrs)

peak memory: 91.49 MiB, increment: 0.31 MiB


In [15]:
import memory_profiler

def nb_go(train_ftrs, test_ftrs, train_tgt):
    nb    = naive_bayes.GaussianNB()
    fit   = nb.fit(train_ftrs, train_tgt)
    preds = fit.predict(test_ftrs)
    
def split_data(dataset):
    split = skms.train_test_split(dataset.data,
                                  dataset.target,
                                  test_size=.25)
    return split[:-1] # don't need test tgt

def msr_mem(go, args):
    base = memory_profiler.memory_usage()[0]
    mu = memory_profiler.memory_usage((go, args),
                                       max_usage=True)[0]
    print("{:<3}: ~{:.4f} MiB".format(go.__name__, mu-base))

sd = split_data(datasets.load_iris())
msr_mem(nb_go, sd)

nb_go: ~0.0000 MiB


# Scripts

In [18]:
!cat scripts/knn_memtest.py

import sys
import memory_profiler
from sklearn import (datasets,
                     model_selection as skms,
                     neighbors)

@memory_profiler.profile(precision=4)
def knn_memtest(train, train_tgt, test):
    knn   = neighbors.KNeighborsClassifier(n_neighbors=3)
    fit   = knn.fit(train, train_tgt)
    preds = fit.predict(test)

if __name__ == "__main__":
    iris = datasets.load_iris()
    tts = skms.train_test_split(iris.data,
                                iris.target,
                               test_size=.25)
    (iris_train_ftrs, iris_test_ftrs,
     iris_train_tgt,  iris_test_tgt) = tts
    tup = (iris_train_ftrs, iris_train_tgt, iris_test_ftrs)
    knn_memtest(*tup)


In [19]:
!python scripts/knn_memtest.py

Filename: scripts/knn_memtest.py

Line #    Mem usage    Increment   Line Contents
     7  78.4453 MiB  78.4453 MiB   @memory_profiler.profile(precision=4)
     8                             def knn_memtest(train, train_tgt, test):
     9  78.4453 MiB   0.0000 MiB       knn   = neighbors.KNeighborsClassifier(n_neighbors=3)
    10  78.6172 MiB   0.1719 MiB       fit   = knn.fit(train, train_tgt)
    11  78.6953 MiB   0.0781 MiB       preds = fit.predict(test)




In [20]:
!python scripts/perf_01.py mem nb
!python scripts/perf_01.py time nb

nb_go: ~0.1680 MiB
nb_go : ~0.1164 sec


In [21]:
!python scripts/perf_01.py mem knn
!python scripts/perf_01.py time knn

knn_go: ~0.3984 MiB
knn_go: ~0.3039 sec
