In [None]:
# import sys
# !{sys.executable} -m pip install tensorflow_hub

In [None]:
import tensorflow as tf
import tensorflow_hub as hub
import pandas as pd
from sklearn import preprocessing
import keras
import numpy as np

In [None]:
# read training data
train_data = pd.read_csv('train_data.csv',
                            sep='\t',
                        encoding='utf-8',
                        index_col=0)

In [None]:
x_train = train_data['textbody'].values
y_train = train_data['hyperpartisan'].values

In [None]:
# prepare label encoder
le = preprocessing.LabelEncoder()
le.fit(y_train)

In [None]:
def encode(le, labels):
    enc = le.transform(labels)
    return keras.utils.to_categorical(enc)

def decode(le, one_hot):
    dec = np.argmax(one_hot, axis=1)
    return le.inverse_transform(dec)

In [None]:
# encode training labels
y_train_encoded = encode(le, y_train)

In [None]:
from keras.layers import Input, Lambda, Dense, Dropout
from keras.models import Model
import keras.backend as K
from keras.regularizers import l2

In [None]:
# get the universal sentence encoder
url = "https://tfhub.dev/google/universal-sentence-encoder-large/3"
embed = hub.Module(url)

In [None]:
def UniversalEmbedding(x):
    return embed(tf.squeeze(tf.cast(x, tf.string)))

# create model
input_text = Input(shape=(1,), dtype=tf.string)
embedding = Lambda(UniversalEmbedding, output_shape=(512, ))(input_text)
dropout_dense1 = Dropout(0.3)(embedding)
dense = Dense(256, activation='relu', activity_regularizer=l2(0.01))(dropout_dense1)
dropout_dense2 = Dropout(0.3)(dropout_dense1)
pred = Dense(2, activation='softmax')(dropout_dense2)
model = Model(inputs=[input_text], outputs=pred)
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

In [None]:
# run training
with tf.Session() as session:
    K.set_session(session)
    session.run(tf.global_variables_initializer())  
    session.run(tf.tables_initializer())
    model.load_weights('./model_hyperp.h5') 
    history = model.fit(x_train, y_train_encoded, epochs=2, batch_size=32, shuffle=True, verbose=1)
    model.save_weights('./model_hyperp.h5')

In [None]:
# read test data
test_data = pd.read_csv('test_data.csv',
                            sep='\t',
                        encoding='utf-8',
                        index_col=0)

In [None]:
x_test = test_data['textbody'].values
y_test = test_data['hyperpartisan'].values

In [None]:
# encode test labels
y_test_encoded = encode(le, y_test)

In [None]:
# run validation
with tf.Session() as session:
    K.set_session(session)
    session.run(tf.global_variables_initializer())
    session.run(tf.tables_initializer())
    model.load_weights('./model_hyperp.h5')  
    predicts = model.predict(x_test, batch_size=512)

In [None]:
# calculate classification report (precision, recall, f1)
y_test2 = decode(le, y_test_encoded)
y_preds = decode(le, predicts)

from sklearn import metrics

print(metrics.classification_report(y_test2, y_preds))

In [None]:
# calculate accuracy
metrics.accuracy_score(y_test2, y_preds)