In [1]:
import cv2
import os
import pickle
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score

In [2]:
def load_data_from_direction(root):
    filenames = os.listdir(root)
    data = {}
    for name in ['dog', 'cat']:
        data[name] = []
    for filename in filenames:
        name_animal = filename.split('.')[0]
        if name_animal in data:
            data[name_animal].append(os.path.join(root, filename))
    return data
    
def save_training_data(data, feature_path = "features.pkl", label_path="labels.pkl" ):
    features = []
    labels = []
    for label, filenames in data.items():
        for path in filenames:
            image = cv2.imread(path)
            image = cv2.resize(image, (64, 64))
            image = np.reshape(image, (1, -1))
            features.append(image)
            if label is 'dog':
                labels.append(1)
            else:
                labels.append(0)
    with open(feature_path, 'wb') as f:
        pickle.dump(features, f)
    with open(label_path, 'wb') as f:
        pickle.dump(labels, f)
    
    print('Load finished!!')
        


In [3]:
root = "data/train"
data = load_data_from_direction(root)
save_training_data(data)

Load finished!!


In [4]:
def load_training_data(feature_path = 'features.pkl', label_path = 'labels.pkl'):
    with open(feature_path, 'rb') as f:
        features = pickle.load(f)
    with open(label_path, "rb") as f:
        labels = pickle.load(f)
        
    return features, labels


In [5]:
features, labels = load_training_data()

In [6]:
print(features[0].shape)

(1, 12288)


In [7]:
features, labels = np.squeeze(np.asarray(features)), np.asarray(labels)

In [8]:
training_data = np.concatenate((features, labels.reshape(-1, 1)), axis=1)


In [9]:
train, test = train_test_split(training_data, test_size=0.2, shuffle=True)
X_train, y_train = train[:, :-1], train[:, -1]
X_test, y_test = test[:, :-1], test[:, -1]

In [10]:
from sklearn.naive_bayes import MultinomialNB
classifier = MultinomialNB()
classifier.fit(X_train, y_train)
with open("model.pkl", "wb") as f:
    pickle.dump(classifier, f)

In [11]:
with open("model.pkl", "rb") as f:
    classifier = pickle.load(f)

In [12]:
prediction = classifier.predict(X_test)
accuracy = accuracy_score(y_test, prediction)
print(accuracy)

0.5572139303482587


## Do e lấy ít ảnh (1505 ảnh dog và 1505 ảnh cat) mà train 0.8 nên dự đoán hơi thấp. 