this script is an attempt to classify good and bad translations from RusLTC on bilingual vectorised representations of lempos (no stopwords) in sources and targets

supposedly in this set up the algorithm captures accuracy of the translation rather than (morphosyntactic) fluency

Ideas to try out after the failure of the initial naive approach that also lacked in theoretical justification (concatenated ST and TT, represented with respective bilingual embeddings and padded to the max text length treated as instances)

1. go for decontextualised sentences, not texts: get sentences with annotated content errors from bad translations and error-free sentences from good translations for classes
2. represent texts as a sequence of vectorised sentences: assign extra weights on badly translated sentences in bad translations (based on error stats normalised per word)
3. two inputs to the LSTM: pass aligned sources and targets separately, measure similarity between the outputs of the two biLSTMs and use these similarities as input to the Dense Output layer (542 similarities are weighted to get 542 prediction outcomes)

In [24]:
## this is supposed to ensure reproducible results at each run of the script
from numpy.random import seed
seed(42)
from tensorflow import set_random_seed
set_random_seed(42)

import io ### https://docs.python.org/3/library/io.html
import sys,os
import pandas as pd
import time
import random

import numpy as np
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA
import seaborn as sns

from sklearn.metrics import f1_score
from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import cross_validate
import gensim
from collections import OrderedDict

# TensorFlow and tf.keras
import logging

import tensorflow as tf
# from tensorflow import keras

from keras.callbacks import TensorBoard, EarlyStopping
from keras import backend, preprocessing
from keras.layers import Dense, Input, LSTM, Bidirectional, Lambda, concatenate
from keras.models import Model
from keras.models import load_model as load_keras_model
from keras.layers import Embedding
from keras.utils.vis_utils import plot_model
from IPython.display import SVG
from keras.utils.vis_utils import model_to_dot
import pydot
from keras.utils import to_categorical
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
from sklearn.metrics import precision_recall_fscore_support

print(tf.__version__)

## to avoid reloading kernel after changes to imported modules
# import importlib
# import HTQ_functions as mm
# importlib.reload(mm)

## import the functions from the helper scripts
# from acc_functions import 


%matplotlib inline
logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.INFO)
logger = logging.getLogger(__name__)

1.12.0


In [2]:
def preparedata(directory):
    ourdic = []
    print('Collecting data from the files...')
    for subdir in os.listdir(directory):
        files = [f for f in os.listdir(os.path.join(directory, subdir)) if f.endswith('.conllu')]
        for f in files:
            rowdic = {'doc': f.strip(), 'group': subdir}
            doc = open(os.path.join(directory, subdir, f))
            text = doc.read().strip() #.replace('\n', ' ')
            doc.close()
            rowdic['text'] = text
            ourdic.append(rowdic)
    ourdic = pd.DataFrame(ourdic)
    return ourdic

In [3]:
## preprosessing from https://github.com/akutuzov/webvectors/blob/master/preprocessing/rus_preprocessing_udpipe.py
datadir = '/home/masha/accuracy/data/lempos/'  # The path to where subdirectories whith text files are
df = preparedata(datadir)

Collecting data from the files...


**=====transform the df to align each target with its source: doc, src, tgt, label=====**

In [4]:
targets_df = df[~(df['group']=='source')]
print(targets_df.shape)
sources_df = df[df['group']=='source']
sources_df = sources_df.sort_values('doc')
sources_df.head()

(542, 3)


Unnamed: 0,doc,group,text
608,EN_1_101.conllu,source,pussy_PROPN riot_PROPN putin_ADJ v_NOUN punk_N...
580,EN_1_102.conllu,source,anti_PROPN drug_PROPN agency_PROPN seeks_PROPN...
576,EN_1_114.conllu,source,product_NOUN placement_NOUN on_ADV broadway_VE...
557,EN_1_116.conllu,source,d_PROPN 'oh_PROPN german_PROPN broadcaster_PRO...
615,EN_1_121.conllu,source,how_ADV be_AUX you_PRON social_PROPN security_...


In [5]:
aligned = targets_df.copy()
aligned.insert(loc=2, column='source', value=None)
aligned.columns = ['doc', 'group', 'source', 'target']

aligned.head()

Unnamed: 0,doc,group,source,target
0,RU_1_244_35.conllu,bad,,весь_DET правда_NOUN о_ADP гмый_PROPN сторонни...
1,RU_1_271_4.conllu,bad,,не_PART посещать_VERB 'д_NOUN странный_ADJ реб...
2,RU_1_150_2.conllu,bad,,последний_ADJ книга_NOUN роджер::пенроуз_PROPN...
3,RU_1_244_1.conllu,bad,,весь_DET правда_NOUN о_ADP генетически_ADV мод...
4,RU_1_274_41.conllu,bad,,газета_PROPN нью-йорк::таймз_PROPN xx_NUM октя...


In [6]:
aligned['temp'] = aligned['doc'].replace(to_replace='RU',value=r'EN', regex=True)
aligned['temp'] = aligned['temp'].replace(to_replace='_\d+\.conllu',value=r'.conllu', regex=True)
aligned = aligned.sort_values('temp')
aligned.head()

Unnamed: 0,doc,group,source,target,temp
254,RU_1_101_9.conllu,good,,pussy_PROPN riot_PROPN путин_PROPN против_ADP ...,EN_1_101.conllu
62,RU_1_101_3.conllu,bad,,пусси::райот_PROPN путин_PROPN против_ADP панк...,EN_1_101.conllu
11,RU_1_101_8.conllu,bad,,пусси::райот_PROPN судебный_ADJ разбирательств...,EN_1_101.conllu
471,RU_1_102_6.conllu,good,,служба_NOUN по_ADP борьба_NOUN с_ADP наркотик_...,EN_1_102.conllu
119,RU_1_102_15.conllu,bad,,управление_PROPN по_ADP борьба_NOUN с_ADP нарк...,EN_1_102.conllu


In [7]:
sfns = aligned['temp'].tolist()
for i in sfns:
## two ways of putting the value from one column (text) in one df on the value in the other column (temp filename) into the specified column (source) of the other df, following the list respecting the order of the cells 
#     aligned.loc[aligned.temp == i, 'source'] = sources_df[sources_df['doc']=='EN_1_101.conllu']['text'].values[0]
    aligned.loc[aligned.temp == i, 'source'] = sources_df.loc[sources_df['doc'] == i, 'text'].item()
aligned.head()

Unnamed: 0,doc,group,source,target,temp
254,RU_1_101_9.conllu,good,pussy_PROPN riot_PROPN putin_ADJ v_NOUN punk_N...,pussy_PROPN riot_PROPN путин_PROPN против_ADP ...,EN_1_101.conllu
62,RU_1_101_3.conllu,bad,pussy_PROPN riot_PROPN putin_ADJ v_NOUN punk_N...,пусси::райот_PROPN путин_PROPN против_ADP панк...,EN_1_101.conllu
11,RU_1_101_8.conllu,bad,pussy_PROPN riot_PROPN putin_ADJ v_NOUN punk_N...,пусси::райот_PROPN судебный_ADJ разбирательств...,EN_1_101.conllu
471,RU_1_102_6.conllu,good,anti_PROPN drug_PROPN agency_PROPN seeks_PROPN...,служба_NOUN по_ADP борьба_NOUN с_ADP наркотик_...,EN_1_102.conllu
119,RU_1_102_15.conllu,bad,anti_PROPN drug_PROPN agency_PROPN seeks_PROPN...,управление_PROPN по_ADP борьба_NOUN с_ADP нарк...,EN_1_102.conllu


In [8]:
ru_xtrain0 = aligned['target']
en_xtrain0 = aligned['source']

yy_train = aligned['group'].tolist()

classes = sorted(list(set(aligned['group'].tolist())))
num_classes = len(classes)
logger.info('Number of classes: %d' % num_classes)
print('===========================')
print('Distribution of classes in the dataset:')
print(aligned.groupby('group').count())
print('===========================')

2019-10-11 01:00:31,948 : INFO : Number of classes: 2


Distribution of classes in the dataset:
       doc  source  target  temp
group                           
bad    213     213     213   213
good   329     329     329   329


In [9]:
## Have a look at a random document 
doc = random.choice(range(20))
# print(type(ru_xtrain0[doc]))
print('Random target doc:', ru_xtrain0[doc].split()[:20])
## or at the first text to avoid an extra variable
print('\nFirst target doc:', ru_xtrain0[0].split()[:20])

Random target doc: ['ученый_PROPN', 'против_ADP', 'нобелевский_ADJ', 'премия_NOUN', 'нобелевский_ADJ', 'премия_NOUN', 'по_ADP', 'психология_NOUN', 'медицина_NOUN', 'физик_NOUN', 'или_CCONJ', 'химия_NOUN', 'это_PRON', 'самый_ADJ', 'авторитетный_ADJ', 'и_CCONJ', 'почетный_ADJ', 'награда_NOUN', 'в_ADP', 'область_NOUN']

First target doc: ['весь_DET', 'правда_NOUN', 'о_ADP', 'гмый_PROPN', 'сторонник_NOUN', 'генномодифицировать_ADJ', 'зерно_NOUN', 'настаивать_VERB', 'что_SCONJ', 'это_PRON', 'единственный_ADJ', 'способ_NOUN', 'справляться_VERB', 'с_ADP', 'то_PRON', 'что_SCONJ', 'прокармливать_VERB', 'густонаселенный_ADJ', 'мир_NOUN', 'критика_NOUN']


**=====a neural model with an biLSTM layer=======** 

that is going to look at every word in ST+TT represented by corresponding bilingual embeddings of size 300

In [10]:
### concatenate embeddings externally and use the new embeddings file in this function (mind the necessity to update the first line and delete the first line of the appended file)
src_path = '/home/masha/MUSE/rig1_res/vectors-en.vec'
tgt_path = '/home/masha/MUSE/rig1_res/vectors-ru.vec'
def load_embeddings(embeddings_file):
    # Определяем формат модели по её расширению:
    if embeddings_file.endswith('.bin.gz') or embeddings_file.endswith('.bin'):  # Бинарный формат word2vec
        emb_model = gensim.models.KeyedVectors.load_word2vec_format(
            embeddings_file, binary=True, unicode_errors='replace')
    elif embeddings_file.endswith('.txt.gz') or embeddings_file.endswith('.txt') \
            or embeddings_file.endswith('.vec.gz') or embeddings_file.endswith('.vec'):  # Текстовый формат word2vec
        emb_model = gensim.models.KeyedVectors.load_word2vec_format(
            embeddings_file, binary=False, unicode_errors='ignore') ##unicode_errors='replace'
    else:  # Нативный формат Gensim?
        emb_model = gensim.models.KeyedVectors.load(embeddings_file)
    emb_model.init_sims(replace=True)  # normalise vectors just in case
    return emb_model

en_embeddings = load_embeddings(src_path)
ru_embeddings = load_embeddings(tgt_path)

logger.info('Loading word vectors')

## an alternative (and leaner) way to build model vocabulary
# ext_vocab = external_embeddings.index2entity
# ext_word_index = {}
#     for nr, word in enumerate(ext_vocab):
#         ext_word_index[word] = nr

en_voc = en_embeddings.vocab
ru_voc = ru_embeddings.vocab
print(type(en_voc))
print(len(en_voc))
print(list(en_voc.keys())[0:5])
print(list(ru_voc.keys())[0:5])

2019-10-11 01:00:32,081 : INFO : loading projection weights from /home/masha/MUSE/rig1_res/vectors-en.vec
2019-10-11 01:01:27,606 : INFO : loaded (296630, 300) matrix from /home/masha/MUSE/rig1_res/vectors-en.vec
2019-10-11 01:01:27,607 : INFO : precomputing L2-norms of word weight vectors
2019-10-11 01:01:29,588 : INFO : loading projection weights from /home/masha/MUSE/rig1_res/vectors-ru.vec
2019-10-11 01:02:17,243 : INFO : loaded (248978, 300) matrix from /home/masha/MUSE/rig1_res/vectors-ru.vec
2019-10-11 01:02:17,244 : INFO : precomputing L2-norms of word weight vectors
2019-10-11 01:02:18,960 : INFO : Loading word vectors


<class 'dict'>
296630
['Hou_PROPN', 'Aeronautica_PROPN', 'imagination_NOUN', 'Vélez::Sársfield_PROPN', 'shotcrete_NOUN']
['андрей::голубев_PROPN', 'заслуживать_VERB', 'sag_PROPN', 'женькин_ADJ', 'huey_PROPN']


In [11]:
# Convert class labels into indices
y_train0 = [classes.index(i) for i in yy_train]
print(y_train0[:5])

[1, 0, 0, 1, 0]


In [12]:
## Convert indices to categorical values to use with (binary_ or categorical_)crossentropy loss
y_train = to_categorical(y_train0, num_classes)
print('We have a binary classification (see number of columns):', y_train.shape)

We have a binary classification (see number of columns): (542, 2)


In [13]:
def get_index(word, vocabulory=None):
    ## get word index in the models vocabulary
    if word in vocabulory:
        return vocabulory[word].index
    ## set index to OOV items like putin_ADJ to 0
    else:
        return 0

In [14]:
print('First source text:', aligned['source'][0].split()[:20])
print('\nThe first of the aligned translations:', aligned['target'][0].split()[:20])

First source text: ['the_DET', 'truth_NOUN', 'about_ADP', 'genetically_ADV', 'modify_ADJ', 'food_NOUN', 'proponent_NOUN', 'of_ADP', 'genetically_ADV', 'modify_VERB', 'crops_NOUN', 'say_VERB', 'the_DET', 'technology_NOUN', 'be_AUX', 'the_DET', 'only_ADJ', 'way_NOUN', 'to_PART', 'feed_VERB']

The first of the aligned translations: ['весь_DET', 'правда_NOUN', 'о_ADP', 'гмый_PROPN', 'сторонник_NOUN', 'генномодифицировать_ADJ', 'зерно_NOUN', 'настаивать_VERB', 'что_SCONJ', 'это_PRON', 'единственный_ADJ', 'способ_NOUN', 'справляться_VERB', 'с_ADP', 'то_PRON', 'что_SCONJ', 'прокармливать_VERB', 'густонаселенный_ADJ', 'мир_NOUN', 'критика_NOUN']


In [15]:
##convert them to embedding indices in a joint biling embedding file
entrain0 = [[get_index(w,en_voc) for w in text.split()] for text in aligned['source']]
rutrain0 = [[get_index(w,ru_voc) for w in text.split()] for text in aligned['target']]

## here we are getting rid of OOV items:
entrain = [[id for id in text if id != 0] for text in entrain0]
rutrain = [[id for id in text if id != 0] for text in rutrain0]

print('First source text:', entrain0[0][:20])
print('First source text with no OOV (tot num of enOOV=123146! ruOOV=71558):', entrain[0][:20])

First source text: [0, 0, 0, 0, 3814, 0, 87, 0, 948, 0, 0, 0, 0, 163, 0, 5402, 9, 0, 807, 69]
First source text with no OOV (tot num of enOOV=123146! ruOOV=71558): [3814, 87, 948, 163, 5402, 9, 807, 69, 1215, 1545, 12455, 96613, 82718, 153, 90, 278, 1455, 4675, 402, 31]


In [16]:
logger.info('Average SOURCE text length: %s tokens'
                % "{0:.1f}".format(np.mean(list(map(len, entrain))), 1)) #aligned['source'].str.split()
logger.info('Average TARGET text length: %s tokens'
                % "{0:.1f}".format(np.mean(list(map(len, rutrain))), 1))
logger.info('Maximum SOURCE text length: %s tokens'
                % "{0:.1f}".format(np.max(list(map(len, entrain))), 1))
logger.info('Maximum TARGET text length: %s tokens'
                % "{0:.1f}".format(np.max(list(map(len, rutrain))), 1))

2019-10-11 01:02:19,230 : INFO : Average SOURCE text length: 198.5 tokens
2019-10-11 01:02:19,232 : INFO : Average TARGET text length: 257.5 tokens
2019-10-11 01:02:19,233 : INFO : Maximum SOURCE text length: 403.0 tokens
2019-10-11 01:02:19,234 : INFO : Maximum TARGET text length: 685.0 tokens


In [17]:
# Padding: make all texts lengths equal to absolute max in the data by filling in zeros
max_seq_length = 685  
vectorized_en = preprocessing.sequence.pad_sequences(
    entrain, maxlen=max_seq_length, truncating='post', padding='post')
vectorized_ru = preprocessing.sequence.pad_sequences(
    rutrain, maxlen=max_seq_length, truncating='post', padding='post')
print('The translation turned into %s ...,\n its length is %s' % (vectorized_ru[0][:5], len(vectorized_ru[0]))) #, 
print('Data parameters:', vectorized_ru.shape)

The translation turned into [38362 26358  5961 18779  1532] ...,
 its length is 685
Data parameters: (542, 685)


In [18]:
print(type(vectorized_en))
print(vectorized_en[:5])
print(len(vectorized_en))
print(len(vectorized_ru))

<class 'numpy.ndarray'>
[[3814   87  948 ...    0    0    0]
 [3814   87  948 ...    0    0    0]
 [3814   87  948 ...    0    0    0]
 [ 885 1089  698 ...    0    0    0]
 [ 885 1089  698 ...    0    0    0]]
542
542


**====BUILDING the model====**

In [19]:
## see https://github.com/keras-team/keras/issues/5541
## https://gist.github.com/GKarmakar/3aa0c643ddb0688a9bfc44b43b84edd8

## K.exp = math_ops.exp(x)???
## https://www.tensorflow.org/api_docs/python/tf/math/exp: Computes exponential of x element-wise: y= e^x
## about e=2.7182818 (Euler's number) https://www.mathsisfun.com/numbers/e-eulers-number.html

import keras.backend as K
## a negative exponent can be expressed as its positive reciprocal: 5^-3 = 1/5^3, so the result of my function is either pos or neg, but always smaller than -1:1
def exponent_neg_manhattan_distance(source, target):
    """ Helper function for the similarity estimate of the LSTMs outputs"""
    return K.exp(-K.sum(K.abs(source - target), axis=1, keepdims=True))


def exponent_neg_euclidean_distance(source, target):
    """ Helper function for the similarity estimate of the LSTMs outputs"""
    ## K.exp() is a constant e to the power of -x, where x is the distance; the purpose of it is to put the result into the range of -1;1
    return K.exp(-K.sqrt(K.sum(K.square(source - target), axis=1, keepdims=True)))

def exponent_neg_cosine_distance(source, target):
    """ Helper function for the similarity estimate of the LSTMs outputs"""

    return K.exp(K.dot(source, target) / (K.sqrt(K.dot(source, source)) * K.sqrt(K.dot(target,target))), axis=-1, keepdims=True)

In [25]:
def createmodel(loss='binary_crossentropy'): # 'binary_crossentropy', 'categorical_crossentropy'
    
    ## encoder
    en_input = Input(shape=(max_seq_length,), name='ST_wsequences')
    ## decoder
    ru_input = Input(shape=(max_seq_length,), name='TT_wsequences')
    
    ## represent words in texts with their embeddings (what happens to OOVwords with index=0)
    en_vectors = en_embeddings.vectors
    ru_vectors = ru_embeddings.vectors
    print(ru_vectors.shape)
    ## Input shape: 2D tensor with shape: (batch_size, sequence_length)
    ## Output shape: 3D tensor with shape: (batch_size, sequence_length, output_dim)
    en_emb = Embedding(input_dim=en_vectors.shape[0], output_dim=en_vectors.shape[1], weights=[en_vectors],
                    input_length=max_seq_length, trainable=False, name='en_embeddings')(en_input)
    ru_emb = Embedding(input_dim=ru_vectors.shape[0], output_dim=ru_vectors.shape[1], weights=[ru_vectors],
                input_length=max_seq_length, trainable=False, name='ru_embeddings')(ru_input)
    # units is a hyperparameter; the output of the lstm layer
    # this is where the model would process the input sequence in both directions
    
    # To share a layer across different inputs, simply instantiate the layer once, then call it on as many inputs as you want
    ## # When we reuse the same layer instance, the weights of the layer are also being reused
    ## (it is effectively *the same* layer)
    shared_lstm = LSTM(units=128, name="biLSTM") ##Bidirectional(
    
    en_x = shared_lstm(en_emb)  
    ru_x = shared_lstm(ru_emb)
    
    # Calculates the pair-wise distance between ST and TT; the output should be length 542
#     distance = Lambda(function=lambda x: exponent_neg_manhattan_distance(x[0], x[1]),
#                                output_shape=lambda x: (x[0][0], 1))([en_x, ru_x])
#     distance = Lambda(function=lambda x: exponent_neg_euclidean_distance(x[0], x[1]),
#                                output_shape=lambda x: (x[0][0], 1))([en_x, ru_x])
    
#     print(type(distance))
#     print(distance.shape)
    
    ## instead of similarity, use concatenation of the text vectors output by LSTMs:
    merged = concatenate([en_x, ru_x], axis=-1)

    output = Dense(num_classes, activation='softmax', name='Output')(merged) # distance # this is the output layer which predicts the labels

    ## compile the model
    model = Model(inputs=[en_input, ru_input], outputs=output)
    model.compile(loss=loss, optimizer='adam', metrics=['accuracy'])

    # print the model architecture
    print(model.summary())

    return model

In [26]:
model0 = createmodel()

(248978, 300)
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
ST_wsequences (InputLayer)      (None, 685)          0                                            
__________________________________________________________________________________________________
TT_wsequences (InputLayer)      (None, 685)          0                                            
__________________________________________________________________________________________________
en_embeddings (Embedding)       (None, 685, 300)     88989000    ST_wsequences[0][0]              
__________________________________________________________________________________________________
ru_embeddings (Embedding)       (None, 685, 300)     74693400    TT_wsequences[0][0]              
_______________________________________________________________________________________________

In [None]:
# Train the model on our data
from sklearn.utils import class_weight
from sklearn.model_selection import train_test_split
from sklearn.model_selection import StratifiedKFold

## compute the class weights to pass to the scoring function which generates the update weights (values)
class_weights = class_weight.compute_class_weight('balanced',
                                                 np.unique(y_train0),
                                                 y_train0)
## convert class_weights to dict
class_weight_dict = dict(enumerate(class_weights))
print('class weights calculated from the distribution:',class_weight_dict)

earlystopping = EarlyStopping(monitor='val_acc', min_delta=0.0001, patience=5, verbose=1, mode='max')
# val_split = 0.1 

start = time.time()
## batch_size: number of samples per gradient update.
## verbosity: 0 = silent, 1 = progress bar, 2 = one line per epoch.
## validation_split: evaluate the loss and any model metrics on this data at the end of each epoch
## shuffle: whether to shuffle the training data before each epoch
## class_weight: "pay more attention" to samples from an under-represented class; https://datascience.stackexchange.com/questions/13490/how-to-set-class-weights-for-imbalanced-classes-in-keras
## {0: 2., 1: 1.} "treat every instance of class 0 (bad) as 2 instances of class 0" > the loss becomes a weighted average

## make sure that the testset held out for testing each epoch and evaluating the model has a proportional distribution of each class samples
enX_train, enX_test, Y_train, Y_test = train_test_split(vectorized_en, y_train,
                                                    stratify=y_train, 
                                                    test_size=0.1, random_state=42)
print(Y_test[:5])
ruX_train, ruX_test, Y_train0, Y_test0 = train_test_split(vectorized_ru, y_train,
                                                    stratify=y_train, 
                                                    test_size=0.1, random_state=42)
# print('test that Ys are the same': Y_test, Y_test)
## iterating on the data in batches of 4 samples
history = model0.fit([enX_train,ruX_train], Y_train, epochs=10, verbose=1, validation_data=([enX_test,ruX_test],Y_test),
         batch_size=4, shuffle=False, class_weight=class_weight_dict,callbacks=[earlystopping]) ## class_weight=class_weight_dict, class_weight=class_weight_dict, validation_split=val_split

end = time.time()
training_time = int(end - start)
logger.info('Trained in %s minutes' % str(round(training_time/60)))

class weights calculated from the distribution: {0: 1.272300469483568, 1: 0.8237082066869301}
[[0. 1.]
 [0. 1.]
 [1. 0.]
 [0. 1.]
 [0. 1.]]
Train on 487 samples, validate on 55 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
104/487 [=====>........................] - ETA: 27s - loss: 0.6746 - acc: 0.6635

In [32]:
# Evaluate the model:
print(model0.metrics_names)
score = model0.evaluate([enX_test,ruX_test],Y_test, verbose=1)
logger.info('Loss function value: %s' % "{0:.4f}".format(score[0]))
logger.info('Accuracy on the test set: %s' % "{0:.4f}".format(score[1]))

['loss', 'acc']


2019-10-11 01:27:12,387 : INFO : Loss function value: 0.6730
2019-10-11 01:27:12,388 : INFO : Accuracy on the test set: 0.6000


In [33]:
# Используем функцию из sklearn чтобы посчитать F1 по каждому классу:
predictions = model0.predict([enX_test,ruX_test])
predictions = np.around(predictions)  # проецируем предсказания модели в бинарный диапазон {0, 1}

In [34]:
# Конвертируем предсказания обратно из чисел в текстовые метки классов
y_test_real = [classes[np.argmax(pred)] for pred in Y_test]
predictions = [classes[np.argmax(pred)] for pred in predictions]

logger.info('Classification results on the test set:')
print(classification_report(y_test_real, predictions))
print("Confusion matrix\n", confusion_matrix(y_test_real, predictions))

fscore = precision_recall_fscore_support(y_test_real, predictions, average='macro')[2]
logger.info('Macro-F1 on the test set: %s' % "{0:.4f}".format(fscore))

2019-10-11 01:27:14,665 : INFO : Classification results on the test set:
  'precision', 'predicted', average, warn_for)
2019-10-11 01:27:14,676 : INFO : Macro-F1 on the test set: 0.3750


              precision    recall  f1-score   support

         bad       0.00      0.00      0.00        22
        good       0.60      1.00      0.75        33

   micro avg       0.60      0.60      0.60        55
   macro avg       0.30      0.50      0.37        55
weighted avg       0.36      0.60      0.45        55

Confusion matrix
 [[ 0 22]
 [ 0 33]]
