In [1]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report

In [2]:
from pyimagesearch.preprocessing.simplepreprocessor import SimplePreprocessor
from pyimagesearch.datasets.simpledatasetloader import SimpleDatasetLoader

In [3]:
import os

In [4]:
dataset_path = "/Users/laksh/Dropbox/workspace/pyimagesearch/ImageNet Bundle/datasets/animals/"
image_paths = []

In [5]:
for path, subdirs, files in os.walk(dataset_path):
    for animal in subdirs:
        animal_path = os.path.join(path, animal)
        image_paths += [os.path.join(animal_path, image) for image in os.listdir(animal_path)]

In [6]:
sp = SimplePreprocessor(32, 32)

In [7]:
sdl = SimpleDatasetLoader(preprocessors=[sp])

In [8]:
(data, labels) = sdl.load(image_paths, verbose=500)

[INFO] processed 500/3000
[INFO] processed 1000/3000
[INFO] processed 1500/3000
[INFO] processed 2000/3000
[INFO] processed 2500/3000
[INFO] processed 3000/3000


In [10]:
data.shape

(3000, 32, 32, 3)

In [11]:
labels.shape

(3000,)

In [12]:
data = data.reshape((data.shape[0], 3072))
data.shape

(3000, 3072)

In [15]:
print(f"[INFO] features matrix: {data.nbytes / (1024 * 1000.0)}MB")

[INFO] features matrix: 9.0MB


In [18]:
le = LabelEncoder()

In [20]:
labels = le.fit_transform(labels)

In [32]:
le.classes_

array(['cats', 'dogs', 'panda'], dtype='<U5')

In [27]:
X_train, X_test, y_train, y_test = train_test_split(data, labels, test_size=0.25, random_state=42)

In [28]:
model = KNeighborsClassifier()

In [29]:
model.fit(X_train, y_train)

KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
           metric_params=None, n_jobs=None, n_neighbors=5, p=2,
           weights='uniform')

In [34]:
print(classification_report(y_test, model.predict(X_test), target_names=le.classes_))

              precision    recall  f1-score   support

        cats       0.40      0.60      0.48       249
        dogs       0.46      0.52      0.49       262
       panda       0.89      0.30      0.45       239

   micro avg       0.48      0.48      0.48       750
   macro avg       0.58      0.47      0.47       750
weighted avg       0.58      0.48      0.47       750

