In [2]:

from random import shuffle
import os
from PIL import Image
import numpy as np
import tensorflow as tf
from sklearn.metrics import confusion_matrix
import sklearn as sk
import matplotlib
import matplotlib.pyplot as plt
from sklearn.metrics import precision_recall_curve
from sklearn.metrics import auc 
import math

  from ._conv import register_converters as _register_converters


In [3]:
import cv2

# Load Data

In [4]:
"""Dataset & Document Paths"""
train_data_dir = "D:/1 Project Model/Dataset/Layout/map&other/train"
test_data_dir = "D:/1 Project Model/Dataset/Layout/map&other/test"

In [5]:
"""Read and Preprocess dataset"""
def read_train_data(data_dir):
    """Read training dataset."""
    datas = []
    labels = []
    fpaths = []
    for fname in os.listdir(data_dir):
        fpath = os.path.join(data_dir, fname)
        fpaths.append(fpath)
        label = int(fname.split("_")[0])
        
        # No autoaugmentation
        image = Image.open(fpath)
        data = np.array(image) / 255.0
        datas.append(data)
        labels.append(label)

    datas = np.array(datas)
    labels = np.array(labels)

    print("shape of training datas: {}\tshape of labels: {}".format(datas.shape, labels.shape))
    return fpaths, datas, labels

def read_test_data(data_dir):
    """Read testing datset."""
    datas = []
    labels = []
    fpaths = []
    for fname in os.listdir(data_dir):
        fpath = os.path.join(data_dir, fname)
        fpaths.append(fpath)
        image = Image.open(fpath)
        data = np.array(image) / 255.0
        label = int(fname.split("_")[0])
        datas.append(data)
        labels.append(label)

    datas = np.array(datas)
    labels = np.array(labels)

    print("shape of testing datas: {}\tshape of labels: {}".format(datas.shape, labels.shape))
    return fpaths, datas, labels

In [6]:
"""Load datasets"""
fpaths, datas, labels = read_train_data(train_data_dir)
test_paths, test_data, test_label = read_test_data(test_data_dir)

# The number of training dataset
num_train_examples = datas.shape[0]
# The number of testing dataset
num_test_examples = test_data.shape[0]

shape of training datas: (650, 256, 256, 3)	shape of labels: (650,)
shape of testing datas: (60, 256, 256, 3)	shape of labels: (60,)


# Extract Feature

## Advanced Feature

In [27]:
from features import *

num_color_bins = 10 # Number of bins in the color histogram
feature_fns = [lambda img: color_histogram_hsv(img, nbin=num_color_bins)]
X_train_feats = extract_features(datas, feature_fns, verbose=True)
X_test_feats = extract_features(test_data, feature_fns)

# Preprocessing: Subtract the mean feature
mean_feat = np.mean(X_train_feats, axis=0, keepdims=True)
X_train_feats -= mean_feat
X_test_feats -= mean_feat

# Preprocessing: Divide by standard deviation. This ensures that each feature
# has roughly the same scale.
std_feat = np.std(X_train_feats, axis=0, keepdims=True)
X_train_feats /= std_feat
X_test_feats /= std_feat

# Preprocessing: Add a bias dimension
X_train_feats = np.hstack([X_train_feats, np.ones((X_train_feats.shape[0], 1))])
X_test_feats = np.hstack([X_test_feats, np.ones((X_test_feats.shape[0], 1))])

## Basic Feature

In [7]:
from features import *

num_color_bins = 10 # Number of bins in the color histogram
feature_fns = [lambda img: color_histogram_hsv(img, nbin=num_color_bins)]
X_train_feats = extract_features(datas, feature_fns, verbose=True)
X_test_feats = extract_features(test_data, feature_fns)

# SVM

In [28]:
from linear_classifier import LinearSVM

learning_rates = 1e-7
regularization_strengths = 4000000

svm = LinearSVM()
loss_hist = svm.train(X_train_feats, labels, learning_rates, regularization_strengths, num_iters=6000)
y_train_pred = svm.predict(X_train_feats)
train_accuracy = np.mean(labels == y_train_pred)
y_test_pred = svm.predict(X_test_feats)
test_accuracy = np.mean(test_label== y_test_pred)

print(test_accuracy)

0.9333333333333333


## Decision Tree

In [251]:
"""Dataset & Document Paths"""
train_data_dir = "D:/1 Project Model/Dataset/Platform/train_2000"
test_data_dir = "D:/1 Project Model/Dataset/Platform//test"

In [252]:
"""Load datasets"""
fpaths, datas, labels = read_train_data(train_data_dir)
test_paths, test_data, test_label = read_test_data(test_data_dir)

# The number of training dataset
num_train_examples = datas.shape[0]
# The number of testing dataset
num_test_examples = test_data.shape[0]

shape of training datas: (2000, 256, 256, 3)	shape of labels: (2000,)
shape of testing datas: (100, 256, 256, 3)	shape of labels: (100,)


In [20]:
from features import *

num_color_bins = 10 # Number of bins in the color histogram
feature_fns = [lambda img: color_histogram_hsv(img, nbin=num_color_bins)]
X_train_feats = extract_features(datas, feature_fns, verbose=True)
X_test_feats = extract_features(test_data, feature_fns)

In [8]:
from sklearn.tree import DecisionTreeClassifier

dtc = DecisionTreeClassifier()
dtc.fit(X_train_feats, labels)
y_predict = dtc.predict(X_test_feats)
print("准确度:", dtc.score(X_test_feats, test_label))

准确度: 0.55


In [None]:
# 