In [1]:
# p.271 11.7 CRFを用いたアルファベット列の識別
import string
import numpy as np
from pystruct.datasets import load_letters
from pystruct.models import ChainCRF
from pystruct.learners import FrankWolfeSSVM

In [2]:
class CRFModel(object):
    def __init__(self, c_val=1.0):
        self.clf = FrankWolfeSSVM(model=ChainCRF(),
                                 C=c_val, max_iter=50)
        
    def load_data(self):
        alphabets = load_letters()
        X = np.array(alphabets['data'])
        y = np.array(alphabets['labels'])
        folds = alphabets['folds']
        return X, y, folds
    
    def train(self, X_train, y_train):
        self.clf.fit(X_train, y_train)
        
    def evaluate(self, X_test, y_test):
        return self.clf.score(X_test, y_test)
    
    def classify(self, input_data):
        return self.clf.predict(input_data)[0]

In [3]:
def convert_to_letters(indices):
    # 全アルファベットのnumpy配列を作る
    alphabets = np.array(list(string.ascii_lowercase))
    
    # 入力インデックスに対応した文字を抽出する
    output = np.take(alphabets, indices)
    output = ' '.join(output)
    
    return output

In [4]:
crf = CRFModel(1.0)

In [5]:
X, y, folds = crf.load_data()
X_train, X_test = X[folds == 1], X[folds != 1]
y_train, y_test = y[folds == 1], y[folds != 1]

print('Training the CRF model...')
crf.train(X_train, y_train)

score = crf.evaluate(X_test, y_test)
print('Accuracy score =', str(round(score*100, 2)) + '%')

Training the CRF model...
Accuracy score = 77.96%


In [6]:
indices = range(3000, len(y_test), 200)
for index in indices:
    print("\nOriginal =", convert_to_letters(y_test[index]))
    predicted = crf.classify([X_test[index]])
    print("Predicted =", convert_to_letters(predicted))


Original = r o j e c t i o n s
Predicted = r o j e c t i o n g

Original = u f f
Predicted = u f r

Original = k i i n g
Predicted = k i i n g

Original = e c o m p r e s s
Predicted = e c o m e r t i g

Original = u z z
Predicted = v e x

Original = p o i l i n g
Predicted = a n i t i n g

Original = u i z z i c a l l y
Predicted = u z z z i c a l l y

Original = o m p a r a t i v e l y
Predicted = o m p a r a t i v e l y

Original = a b u l o u s l y
Predicted = a b u l o u a l y

Original = o r m a l i z a t i o n
Predicted = o r m a l i s a t i o n

Original = a k e
Predicted = a k a

Original = a f e t e r i a
Predicted = a t e t e r i a

Original = o b b l e
Predicted = o b b l e

Original = h a d o w
Predicted = h a b o w

Original = n d u s t r i a l i z e d
Predicted = n d u s q r i a l y l e d

Original = y m p a t h e t i c a l l y
Predicted = y m p n s h e t i c a l l y
