In [1]:
import pandas as pd
import numpy as np
import scipy
import matplotlib.pyplot as plt
from sklearn import neighbors
from scipy import stats
%matplotlib inline

music = pd.DataFrame()
music['duration'] = [184, 134, 243, 186, 122, 197, 294, 382, 102, 264, 
                     205, 110, 307, 110, 397, 153, 190, 192, 210, 403,
                     164, 198, 204, 253, 234, 190, 182, 401, 376, 102]
music['loudness'] = [18, 34, 43, 36, 22, 9, 29, 22, 10, 24, 
                     20, 10, 17, 51, 7, 13, 19, 12, 21, 22,
                     16, 18, 4, 23, 34, 19, 14, 11, 37, 42]
music['bpm'] = [ 105, 90, 78, 75, 120, 110, 80, 100, 105, 60,
                  70, 105, 95, 70, 90, 105, 70, 75, 102, 100,
                  100, 95, 90, 80, 90, 80, 100, 105, 70, 65]

## Model 1

In [2]:
# Build our model.
knn = neighbors.KNeighborsRegressor(n_neighbors=10)
X = pd.DataFrame(music.loudness)
Y = music.bpm
knn.fit(X, Y)

KNeighborsRegressor(algorithm='auto', leaf_size=30, metric='minkowski',
          metric_params=None, n_jobs=None, n_neighbors=10, p=2,
          weights='uniform')

## Model 2

In [3]:
# Same as Model 1 but this time with weights.
knn_w = neighbors.KNeighborsRegressor(n_neighbors=10, weights='distance')
X1 = pd.DataFrame(music.loudness)
Y1 = music.bpm
knn_w.fit(X1, Y1)

KNeighborsRegressor(algorithm='auto', leaf_size=30, metric='minkowski',
          metric_params=None, n_jobs=None, n_neighbors=10, p=2,
          weights='distance')

## Model 3

In [4]:
# Same as Model 2 but with duration included
knn_d = neighbors.KNeighborsRegressor(n_neighbors=10, weights='distance')
X2 = music[['loudness', 'duration']]
Y2 = music.bpm
knn_d.fit(X2, Y2)

KNeighborsRegressor(algorithm='auto', leaf_size=30, metric='minkowski',
          metric_params=None, n_jobs=None, n_neighbors=10, p=2,
          weights='distance')

## Model 4

In [5]:
# Same as Model 1 but with imput based on z-scores
knn_z = neighbors.KNeighborsRegressor(n_neighbors=10, weights='distance')
X3 = pd.DataFrame({
    'loudness': stats.zscore(music.loudness)
})
Y3 = music.bpm
knn_z.fit(X3, Y3)

KNeighborsRegressor(algorithm='auto', leaf_size=30, metric='minkowski',
          metric_params=None, n_jobs=None, n_neighbors=10, p=2,
          weights='distance')

## Model 5

In [6]:
# Same as Model 3 but with imput based on z-scores
knn_z2 = neighbors.KNeighborsRegressor(n_neighbors=10, weights='distance')
X4 = pd.DataFrame({
    'loudness': stats.zscore(music.loudness),
    'duration': stats.zscore(music.duration)
})
Y4 = music.bpm
knn_z2.fit(X4, Y4)

KNeighborsRegressor(algorithm='auto', leaf_size=30, metric='minkowski',
          metric_params=None, n_jobs=None, n_neighbors=10, p=2,
          weights='distance')

## Model 6

In [8]:
# Same as Model 3 but with k = 5
knn_d5 = neighbors.KNeighborsRegressor(n_neighbors=5, weights='distance')
X5 = music[['loudness', 'duration']]
Y5 = music.bpm
knn_d.fit(X5, Y5)

KNeighborsRegressor(algorithm='auto', leaf_size=30, metric='minkowski',
          metric_params=None, n_jobs=None, n_neighbors=10, p=2,
          weights='distance')

## Model 7

In [17]:
# Same as Model 3 but with k = 4
knn_d4 = neighbors.KNeighborsRegressor(n_neighbors=4, weights='distance')
X6 = music[['loudness', 'duration']]
Y6 = music.bpm
knn_d.fit(X6, Y6)

KNeighborsRegressor(algorithm='auto', leaf_size=30, metric='minkowski',
          metric_params=None, n_jobs=None, n_neighbors=10, p=2,
          weights='distance')

## Model 8

In [18]:
# Same as Model 3 but with k = 3
knn_d3 = neighbors.KNeighborsRegressor(n_neighbors=3, weights='distance')
X7 = music[['loudness', 'duration']]
Y7 = music.bpm
knn_d3.fit(X7, Y7)

KNeighborsRegressor(algorithm='auto', leaf_size=30, metric='minkowski',
          metric_params=None, n_jobs=None, n_neighbors=3, p=2,
          weights='distance')

## Model 9

In [20]:
# Same as Model 3 but with k = 1
knn_d1 = neighbors.KNeighborsRegressor(n_neighbors=1, weights='distance')
X8 = music[['loudness', 'duration']]
Y8 = music.bpm
knn_d1.fit(X8, Y8)

KNeighborsRegressor(algorithm='auto', leaf_size=30, metric='minkowski',
          metric_params=None, n_jobs=None, n_neighbors=1, p=2,
          weights='distance')

In [21]:
from sklearn.model_selection import cross_val_score
score = cross_val_score(knn, X, Y, cv=5)
print("Model 1 Accuracy: %0.2f (+/- %0.2f)" % (score.mean(), score.std() * 2))
score_w = cross_val_score(knn_w, X1, Y1, cv=5)
print("Model 2 Accuracy: %0.2f (+/- %0.2f)" % (score_w.mean(), score_w.std() * 2))
score_d = cross_val_score(knn_d, X2, Y2, cv=5)
print("Model 3 Accuracy: %0.2f (+/- %0.2f)" % (score_d.mean(), score_d.std() * 2))
score_z = cross_val_score(knn_z, X3, Y3, cv=5)
print("Model 4 Accuracy: %0.2f (+/- %0.2f)" % (score_z.mean(), score_z.std() * 2))
score_z2 = cross_val_score(knn_z2, X4, Y4, cv=5)
print("Model 5 Accuracy: %0.2f (+/- %0.2f)" % (score_z2.mean(), score_z2.std() * 2))
score_d5 = cross_val_score(knn_d5, X5, Y5, cv=5)
print("Model 6 Accuracy: %0.2f (+/- %0.2f)" % (score_d5.mean(), score_d5.std() * 2))
score_d4 = cross_val_score(knn_d4, X6, Y6, cv=5)
print("Model 7 Accuracy: %0.2f (+/- %0.2f)" % (score_d4.mean(), score_d4.std() * 2))
score_d3 = cross_val_score(knn_d3, X7, Y7, cv=5)
print("Model 8 Accuracy: %0.2f (+/- %0.2f)" % (score_d3.mean(), score_d3.std() * 2))
score_d1 = cross_val_score(knn_d1, X8, Y8, cv=5)
print("Model 9 Accuracy: %0.2f (+/- %0.2f)" % (score_d1.mean(), score_d1.std() * 2))

Model 1 Accuracy: -0.18 (+/- 0.66)
Model 2 Accuracy: 0.11 (+/- 0.94)
Model 3 Accuracy: -0.26 (+/- 0.74)
Model 4 Accuracy: 0.12 (+/- 0.91)
Model 5 Accuracy: -0.11 (+/- 1.05)
Model 6 Accuracy: -0.51 (+/- 1.24)
Model 7 Accuracy: -0.63 (+/- 1.64)
Model 8 Accuracy: -0.59 (+/- 1.75)
Model 9 Accuracy: -1.54 (+/- 4.67)
