In [18]:
import os
import cv2
import pickle
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import OneHotEncoder

In [38]:
def load_data_from_directory(root):
    images_name = os.listdir(root)
    data={}
    for image_name in images_name:
        name=image_name[0:3]
        if name not in data:
            data[name]=[]
        if len(data[name]) < 2000:
            data[name].append(os.path.join(root, image_name))
            
    return data

def save_training_data(data, feature_path = "features.pkl", label_path="labels.pkl"):
    features = []
    labels = []
    for label, images_path in data.items():
        for path in images_path:
            image = cv2.imread(path)
            image = cv2.resize(image, (100, 100)) 
            image = np.reshape(image, (1, -1))
            features.append(image)
            labels.append(label)
    with open(feature_path, "wb") as f:
        pickle.dump(features, f)
    
    with open(label_path, "wb") as f:
        pickle.dump(labels, f)
    
    print("Save finished!!!")
    
root = "train/"
data = load_data_from_directory(root)
save_training_data(data)

Save finished!!!


In [39]:
def load_training_data(feature_path = "features.pkl", label_path="labels.pkl"):
    with open(feature_path, "rb") as f:
        features = pickle.load(f)
    
    with open(label_path, "rb") as f:
        labels = pickle.load(f)
    
    return np.squeeze(np.asarray(features)), np.asarray(labels)

features, labels = load_training_data()
onehot = OneHotEncoder()
labels=onehot.fit_transform(labels.reshape(-1,1)).toarray()
training_data = np.concatenate((features, labels), axis=1)
print("Load finisned!!")

Load finisned!!


In [40]:
train, test = train_test_split(training_data, test_size=0.2, shuffle=True)
X_train, y_train = train[:, :-1], train[:, -1]
X_test, y_test = test[:, :-1], test[:, -1]

In [41]:
from sklearn.naive_bayes import GaussianNB
classifier = GaussianNB()
classifier.fit(X_train, y_train)
with open("model.pkl", "wb") as f:
    pickle.dump(classifier, f)

In [42]:
with open("model.pkl", "rb") as f:
    classifier = pickle.load(f)

In [43]:
prediction = classifier.predict(X_test)
accuracy = accuracy_score(y_test, prediction)
print(accuracy)

1.0
