# Buidling Tenserflow version of the Readability Measure

## Import Data and take `READABILITY_SCORES` and `POS_DENSITY` features

In [1]:
import sys
sys.path.append('../../data')
from corpus import load_corpus
data = load_corpus('weebit')

X_train = data['X_train']
y_train = data['y_train']
y_train_onehot = data['y_train_onehot']
X_test = data['X_test']
y_test = data['y_test']
y_test_onehot = data['y_test_onehot']

FEATURES_NAMES = data['FEATURES_NAMES']
features = FEATURES_NAMES['READABILITY_SCORES'] + FEATURES_NAMES['POS_DENSITY']

X_train = X_train[features]
X_test = X_test[features]

Using TensorFlow backend.


## Evaluation Functions

In [2]:
import numpy as np
from sklearn.metrics import accuracy_score, make_scorer
from sklearn.model_selection import cross_val_score

def threshold_socre(y_true, y_pred):
    return np.sum(np.abs(y_true - y_pred) <= 1) / len(y_true)

def calc_scores(y_true, y_pred_probs):
    y_pred_avg = (y_pred_probs * np.arange(5)).sum(axis=1)
    y_pred_avg_classes = y_pred_avg.round().clip(0, 4).astype(int)
    return {'accuracy': accuracy_score(y_true, y_pred_avg_classes),
            'threshold': threshold_socre(y_true, y_pred_avg_classes)}

threshold_scorer = make_scorer(threshold_socre)
accuracy_scorer = make_scorer(accuracy_score)

def calc_scores_with_cv(model, X, y, cv=5):
    return {'accuracy': np.mean(cross_val_score(model, X, y, scoring=accuracy_scorer, cv=cv)),
            'threshold': np.mean(cross_val_score(model, X, y, scoring=threshold_scorer, cv=cv))}


## Build Softmax layer for the SVM Probabilites Calculation

In [3]:
from sklearn.svm import SVC

model_ovo = SVC(kernel='rbf', C=1, probability=True, decision_function_shape='ovo')

print('CV:', calc_scores_with_cv(model_ovo, X_train, y_train))

model_ovo.fit(X_train, y_train)

y_pred = model_ovo.predict(X_test)
y_pred_probs = model_ovo.predict_proba(X_test)
print('Test:', calc_scores(y_test, y_pred_probs))
print('Test - as normal classifier accuracy:', model_ovo.score(X_test, y_test))

CV: {'threshold': 0.9068099410863921, 'accuracy': 0.6990836779451828}
Test: {'threshold': 0.929945054945055, 'accuracy': 0.6167582417582418}
Test - as normal classifier accuracy: 0.7060439560439561


In [4]:
from keras.models import Sequential
from keras.layers import Dense
from keras.wrappers.scikit_learn import KerasClassifier


def create_prob_calculator_ovo():
    prob_calculator_ovo = Sequential()
    prob_calculator_ovo.add(Dense(5, input_dim=10, activation='softmax'))

    prob_calculator_ovo.compile(optimizer='rmsprop',
                        loss='categorical_crossentropy',
                        metrics=['accuracy'])

    return prob_calculator_ovo


prob_calculator_ovo = KerasClassifier(build_fn=create_prob_calculator_ovo,
                                      epochs=10, 
                                      verbose=0)


X_dist_ovo_train = model_ovo.decision_function(X_train)
X_dist_ovo_test = model_ovo.decision_function(X_test)

print('CV:', calc_scores_with_cv(prob_calculator_ovo, X_dist_ovo_train, y_train))


prob_calculator_ovo.fit(X_dist_ovo_train, y_train_onehot)

y_pred_probs = prob_calculator_ovo.predict_proba(X_dist_ovo_test)
print('Test:', calc_scores(y_test, y_pred_probs))

CV: {'threshold': 0.9360375226975648, 'accuracy': 0.779224112946632}
Test: {'threshold': 0.9285714285714286, 'accuracy': 0.6126373626373627}


## Extract Softmax Layer Weights

In [5]:
W_prob_array, b_prob_array = prob_calculator_ovo.model.get_weights()

## NumPy Version (with Testing)

In [6]:
# it is easier to extract the support vectors and dual coefficients with OneVsOneClassifier
from sklearn.multiclass import OneVsOneClassifier

model_ovo_cls = OneVsOneClassifier( SVC(kernel='rbf', C=1))

model_ovo_cls.fit(X_train, y_train)

print('Test Accuracy:', model_ovo_cls.score(X_test, y_test)) # should be ~0.706

Test Accuracy: 0.7087912087912088


In [7]:
from numpy.linalg import norm

def rbf(x1, x2, gamma):
    return np.exp(-gamma * norm(x1-x2, axis=1)**2)

def decision_funcion(single_model, x):
    return ((single_model.dual_coef_
             @ rbf(single_model.support_vectors_, x[None, :], single_model._gamma))
            + single_model.intercept_)

def generate_X_kernel_transformed(model, X):
    kernel_transformed_X = []
    for _, x in X_test.iterrows():
        kernel_transformed_X.append([
            decision_funcion(single_model, x)[0] for single_model in model.estimators_
        ])
    return -np.array(kernel_transformed_X)

# test decision functions
for estimator in model_ovo_cls.estimators_:
    for _, x in X_test.iterrows():
        np.testing.assert_almost_equal(decision_funcion(estimator, x), estimator.decision_function([x]))

In [8]:
X_kernel_transformed_test = generate_X_kernel_transformed(model_ovo_cls, X_test)
y_pred_probs = prob_calculator_ovo.predict_proba(X_kernel_transformed_test)
y_pred_avg = (y_pred_probs * np.arange(5)).sum(axis=1)
calc_scores(y_test, y_pred_probs)

{'accuracy': 0.6126373626373627, 'threshold': 0.9285714285714286}

## Tensorflow Version (with Testing)

In [9]:
import tensorflow as tf

In [10]:
g = tf.Graph()

# Bulding
with g.as_default():

    parameters = []
    for index, estimator in enumerate(model_ovo_cls.estimators_):
        name_format = 'estimator_' + str(index)
        parameters.append({
        'dual_coef': tf.constant(estimator.dual_coef_.squeeze(), name=name_format+'_dual_coef', dtype=tf.float32),
        'gamma': tf.constant(estimator._gamma, name=name_format+'_gamma', dtype=tf.float32),
        'intercept': tf.constant(estimator.intercept_.squeeze(), name=name_format+'_intercept', dtype=tf.float32),
        'sv': tf.constant(estimator.support_vectors_, name=name_format+'_sv', dtype=tf.float32),
    })
    
    
    def tf_rbf(x1, x2, gamma):
        return tf.exp(-gamma * tf.norm(x1-x2, axis=1)**2)

    
    def tf_decision_funcion(single_parameters, x):
        with tf.name_scope('rbf'):
            rbf_dist = tf_rbf(single_parameters['sv'], x[None, :], single_parameters['gamma'])
        return (tf.tensordot(single_parameters['dual_coef'], rbf_dist, axes=1)
                + single_parameters['intercept'])
    
    
    with tf.name_scope('redability_score') as scope:

        W_prob = tf.constant(W_prob_array.T, tf.float32, name='W_prob')
        b_prob = tf.constant(b_prob_array, tf.float32, name='b_prob')

        x = tf.placeholder(tf.float32, 27, name='input_features')

        svm_vals = []
        for index, single_parameters in enumerate(parameters):
            with tf.name_scope('decision_funcion_' + str(index)) as scope:
                svm_vals.append(-tf_decision_funcion(single_parameters, x))

        svm_vals_tensor = tf.convert_to_tensor(svm_vals, name='svm_vals')

        with tf.name_scope('softmax_logits') as scope:
            logits = tf.tensordot(W_prob, svm_vals_tensor, 1) + b_prob
            probs = tf.nn.softmax(logits)

        with tf.name_scope('mean') as scope:
            readbility_score = tf.reduce_sum(tf.multiply(probs, np.arange(5)))

In [11]:
with g.as_default():

    sess = tf.Session()

    # Initializing
    init = tf.global_variables_initializer()
    sess.run(init)


    # Testing
    np.testing.assert_allclose(sess.run([svm_vals], feed_dict={x: X_test.iloc[200]})[0],
                                      X_kernel_transformed_test[200], rtol=1e-4)

    np.testing.assert_allclose(sess.run([probs], feed_dict={x: X_test.iloc[200]})[0],
                                      y_pred_probs[200], rtol=1e-4)

    np.testing.assert_allclose(sess.run([readbility_score], feed_dict={x: X_test.iloc[200]})[0],
                                      y_pred_avg[200], rtol=1e-4)


    # Saving
    !rm -rf readability_score_tensorflow

    LOGDIR='readability_score_tensorflow'
    train_writer = tf.summary.FileWriter(LOGDIR)
    train_writer.add_graph(g)

    tf.saved_model.simple_save(sess,
                LOGDIR + '/simple',
                inputs={'x': x},
                outputs={'readbility_score': readbility_score})

    !cp -rf ./readability_score_tensorflow/* /cache/tensorboard-logdir/

INFO:tensorflow:Assets added to graph.
INFO:tensorflow:No assets to write.
INFO:tensorflow:SavedModel written to: b'readability_score_tensorflow/simple/saved_model.pb'
