In [13]:
import os
import sys
import glob
import numpy as np
from skimage import io
from sklearn import datasets

IMAGE_SIZE=40
COLOR_BYTE=3
CATEGORY_NUM=6
prepath="./learning_sample/06/data/"

def load_handimage(path):
    path=os.path.join(prepath,path)
    files=glob.glob(os.path.join(path,'*/*.png'))
    # 処理を早くするため構造だけ定義した空の配列を用意する
    images=np.ndarray((len(files),IMAGE_SIZE,IMAGE_SIZE,COLOR_BYTE),dtype=np.uint8)
    labels=np.ndarray(len(files),dtype=np.int)
    
    # 画像（40*40 RBG）格納→images　画像名をラベルにする→labels
    for i,file in enumerate(files):
        image=io.imread(file)
        images[i]=image
        
        # 入れ子のフォルダ名（スラッシュ区切り）をsplitして一番最後の要素
        label=os.path.split(os.path.dirname(file))[-1]
        labels[i]=int(label)
    
    # ファイル数の分だけ分割したフラットなデータにする
    flat_data=images.reshape((-1, IMAGE_SIZE**2*COLOR_BYTE))
    images=flat_data.view()  # view()共同管理　これをする意味はわからない
    
    # scikit-learnのBunchクラスにして返す
    return datasets.base.Bunch(
                    data=flat_data, # imagesと共同データ　別の名前になってる
                    target=labels.astype(np.int),  # imagesに対する正解ラベル
                    target_names=np.arange(CATEGORY_NUM),  # 0,1,2,3,4,5
                    images=images,
                    DESCR=None
                    )
    

In [25]:
from sklearn import svm,metrics
paths_for_test=["m01","m03"]
paths_for_train=["m02","m04","m05","m06","m04c"]

print('test',paths_for_test)
print('train',paths_for_train)

data=[]
label=[]
for i in range(len(paths_for_train)):
    path=paths_for_train[i]
    d=load_handimage(path)  # Bunchデータを格納
    data.append(d.data)
    label.append(d.target)

# 最外部の囲みを排除し、同列データ化する
train_data=np.concatenate(data)
train_label=np.concatenate(label)

classifier=svm.LinearSVC()
classifier.fit(train_data,train_label)

for path in paths_for_test:
    d=load_handimage(path)
    
    predicted=classifier.predict(d.data)
    
    print(": : : %s : : :"% path)
    print("Accuracy:\n%s"% metrics.accuracy_score(d.target,predicted))
    print("Classification report:\n%s\n"% metrics.classification_report(d.target,predicted))

test ['m01', 'm03']
train ['m02', 'm04', 'm05', 'm06', 'm04c']
: : : m01 : : :
Accuracy:
0.5533333333333333
Classification report:
             precision    recall  f1-score   support

          0       0.80      0.98      0.88       100
          1       0.37      0.48      0.42       100
          2       0.24      0.28      0.26       100
          3       0.60      0.36      0.45       100
          4       0.76      0.22      0.34       100
          5       0.71      1.00      0.83       100

avg / total       0.58      0.55      0.53       600


: : : m03 : : :
Accuracy:
0.4816666666666667
Classification report:
             precision    recall  f1-score   support

          0       0.93      0.27      0.42       100
          1       0.63      0.39      0.48       100
          2       0.26      0.76      0.39       100
          3       0.47      0.28      0.35       100
          4       0.63      0.19      0.29       100
          5       0.76      1.00      0.86       100



クロスバリデーション

In [32]:
datalist=["m01", "m02", "m03", "m04", "m05", "m06", "m07", "m08", "m09", "m10", 
                    "m11", "m12", "m13", "m14", "m15", "m16"]
lst=[]
for paths_ in datalist:
    lst.append([d for d in datalist if d!=paths_])
    
for i,paths_for_train in enumerate(lst):
    paths_for_test=[datalist[i]]
    print("paths_for_test: ",paths_for_test)
    print("paths_for_train: ",paths_for_train)
    data=[]
    label=[]
    for j in range(len(paths_for_train)):
        path=paths_for_train[j]
        d=load_handimage(path)
        data.append(d.data)
        label.append(d.target)
    train_data=np.concatenate(data)
    train_label=np.concatenate(label)
    
    classifier=svm.LinearSVC()
    
    classifier.fit(train_data,train_label)
    
    for path in paths_for_test:
        d=load_handimage(path)
        predicted=classifier.predict(d.data)
        print(": : %s : :"% path)
        print("Accuracy:\n%s"% metrics.accuracy_score(d.target,predicted))

paths_for_test:  ['m01']
paths_for_train:  ['m02', 'm03', 'm04', 'm05', 'm06', 'm07', 'm08', 'm09', 'm10', 'm11', 'm12', 'm13', 'm14', 'm15', 'm16']
: : m01 : :
Accuracy:
0.695
paths_for_test:  ['m02']
paths_for_train:  ['m01', 'm03', 'm04', 'm05', 'm06', 'm07', 'm08', 'm09', 'm10', 'm11', 'm12', 'm13', 'm14', 'm15', 'm16']
: : m02 : :
Accuracy:
0.6916666666666667
paths_for_test:  ['m03']
paths_for_train:  ['m01', 'm02', 'm04', 'm05', 'm06', 'm07', 'm08', 'm09', 'm10', 'm11', 'm12', 'm13', 'm14', 'm15', 'm16']
: : m03 : :
Accuracy:
0.715
paths_for_test:  ['m04']
paths_for_train:  ['m01', 'm02', 'm03', 'm05', 'm06', 'm07', 'm08', 'm09', 'm10', 'm11', 'm12', 'm13', 'm14', 'm15', 'm16']
: : m04 : :
Accuracy:
0.35333333333333333
paths_for_test:  ['m05']
paths_for_train:  ['m01', 'm02', 'm03', 'm04', 'm06', 'm07', 'm08', 'm09', 'm10', 'm11', 'm12', 'm13', 'm14', 'm15', 'm16']
: : m05 : :
Accuracy:
0.635
paths_for_test:  ['m06']
paths_for_train:  ['m01', 'm02', 'm03', 'm04', 'm05', 'm07', 'm

In [33]:
from sklearn import svm, metrics
from sklearn.model_selection import cross_val_score

data = []
label = []
for i in range(len(paths_for_train)):
    path = paths_for_train[i]
    d = load_handimage(path)
    data.append(d.data)
    label.append(d.target)
train_data = np.concatenate(data)
train_label = np.concatenate(label)

model = svm.LinearSVC()

score = cross_val_score(model, train_data, train_label,cv=16)

print(score)

for i,sc in enumerate(score):
    print("m{:02}: {:f}".format(i+1,sc))

[0.66843972 0.70390071 0.75531915 0.53546099 0.7393617  0.83333333
 0.7535461  0.7322695  0.68439716 0.55141844 0.53191489 0.78191489
 0.6953405  0.7311828  0.66487455 0.64695341]
m01: 0.668440
m02: 0.703901
m03: 0.755319
m04: 0.535461
m05: 0.739362
m06: 0.833333
m07: 0.753546
m08: 0.732270
m09: 0.684397
m10: 0.551418
m11: 0.531915
m12: 0.781915
m13: 0.695341
m14: 0.731183
m15: 0.664875
m16: 0.646953


In [34]:
# TP FN FP TN の行列を返す
metrics.confusion_matrix(d.target,predicted)

array([[ 67,  24,   4,   4,   1,   0],
       [ 35,  48,  11,   5,   0,   1],
       [  0,  15,  10,  52,   3,  20],
       [  0,   5,  26,  69,   0,   0],
       [  0,   0,   0,  38,  61,   1],
       [  0,   0,   0,   0,   0, 100]], dtype=int64)

In [35]:
from sklearn.model_selection import cross_val_score
# closs_val_scoreはclassifierとトレーニング用データ、テスト用データを指定してその制度を割りだせる
# estimator 推定者
cross_val_score(estimator,X,y=None,groups=None,scoring=None,cv=None,
               n_jobs=1,verbose=1,fit_params=None,pre_dispatch='2*n_jobs')
#scoring スコアのつけ方　accuracyやaverage_precision,f1など
#cv crossvalidation(交差検証)の略　データのsplit方法の指定

In [37]:
import numpy as np
a=np.array([1,2,3])
b=a.copy()
print(a,b)
a[0]=0
print(a,b)

[1 2 3] [1 2 3]
[0 2 3] [1 2 3]


In [38]:
import numpy as np
a=np.array([1,2,3])
b=a.view()
print(a,b)
a[0]=0
print(a,b)

[1 2 3] [1 2 3]
[0 2 3] [0 2 3]
