In [1]:
# go to project root
import os

os.chdir("..")

In [2]:
# imports
import pandas as pd
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

import numpy as np
import tensorflow as tf

import json

from IPython.display import display
import plotly
import plotly.graph_objs as go
import cufflinks as cf
cf.go_offline()

from jalef.statistics import evaluate_result
from jalef.plots import plot_confusion_matrix, enable_plotly_in_cell

from jalef.preprocessing import BertPreprocessor
from jalef.layers import Bert

In [4]:
# set gpu vram usage limit
config = tf.ConfigProto()
config.gpu_options.per_process_gpu_memory_fraction = 0.5
sess = tf.Session(config=config)


def initialize_vars(sess):
    sess.run(tf.local_variables_initializer())
    sess.run(tf.global_variables_initializer())
    sess.run(tf.tables_initializer())
    tf.keras.backend.set_session(sess)
    
initialize_vars(sess)

In [17]:
# set random seeds
np.random.seed(1234)
tf.set_random_seed(1234)

# constatnts
MAX_SEQ_LEN = 128
MIN_SEQ_LEN = int(MAX_SEQ_LEN / 2)
DATASET_PATH = "data/coursera_dataset_top50/"
TEST_NAME = "dataset=coursera_embedding=bert_intents=50"
PRETRAINED_MODEL_PATH = "./models/bert/"
# modify if model changed to other H-XXXX!!!!
OUTPUT_SIZE = 1024

# read the lookup table
lut = pd.read_csv(DATASET_PATH + "lut.csv", names=["Intent"], index_col=0, header=0)
reverse_lut = pd.read_csv(DATASET_PATH + "lut.csv", names=["Label"], index_col=1, header=0)

print("Please choose from the following categories when giving a test sentence:")
display(lut)

Please choose from the following categories when giving a test sentence:


Unnamed: 0,Intent
0,linear-circuits-dcanalysis
1,dsp
2,happiness
3,crypto
4,genetics-evolution
5,calculus1
6,organizational-analysis
7,statistical-reasoning-1
8,modern-world-2
9,algorithms-divide-conquer


In [18]:
# load preprocessor
preprocessor = BertPreprocessor(max_sequence_length=MAX_SEQ_LEN, pretrained_model_path=PRETRAINED_MODEL_PATH)

# create model from saved configs and load best weights
with tf.keras.utils.CustomObjectScope({'Bert': Bert}):
    with open("logs/configs/" + TEST_NAME + "_configs.json") as file:
        json_str = json.load(file)
        model = tf.keras.models.model_from_json(json_str)
model.load_weights("logs/weights/" + TEST_NAME + "_weights.hdf5")

# Random saentences (out-of-sample, not even in test set)

Stocks rallied sharply on Tuesday after the U.S. Trade Representative office said that some types of goods imported to the U.S. from China would be exempt from new tariffs recently announced by President Trump, while tariffs on some other Chinese products would be delayed until year end. Widely seen as a concession by Trump in an otherwise intensifying trade conflict between the two countries, the move was welcomed by investors as a significant step towards de-escalation.

The Romans only began to achieve significant originality in architecture around the beginning of the Imperial period, after they had combined aspects of their original Etruscan architecture with others taken from Greece, including most elements of the style we now call classical architecture. They moved from trabeated construction mostly based on columns and lintels to one based on massive walls, punctuated by arches, and later domes, both of which greatly developed under the Romans.

The atmospheric proportions of hydrogen and helium are close to the theoretical composition of the primordial solar nebula. Neon in the upper atmosphere only consists of 20 parts per million by mass, which is about a tenth as abundant as in the Sun. Helium is also depleted to about 80% of the Sun's helium composition. This depletion is a result of precipitation of these elements into the interior of the planet.

In [21]:
sentence = input("Test sentence: ")
print()

input_ = preprocessor.transform([sentence])

preds = model.predict(x=[input_[0], input_[1], input_[2]])

print("The result: {}\n".format(lut.at[np.argmax(preds[0]), "Intent"]))

for pred, intent in zip(preds[0], lut.values):
    print("{}: {:.2f}%".format(str(intent), pred))

Test sentence: The atmospheric proportions of hydrogen and helium are close to the theoretical composition of the primordial solar nebula. Neon in the upper atmosphere only consists of 20 parts per million by mass, which is about a tenth as abundant as in the Sun. Helium is also depleted to about 80% of the Sun's helium composition. This depletion is a result of precipitation of these elements into the interior of the planet.

The result: astro

['linear-circuits-dcanalysis']: 0.00%
['dsp']: 0.00%
['happiness']: 0.00%
['crypto']: 0.00%
['genetics-evolution']: 0.00%
['calculus1']: 0.00%
['organizational-analysis']: 0.00%
['statistical-reasoning-1']: 0.00%
['modern-world-2']: 0.00%
['algorithms-divide-conquer']: 0.00%
['algorithms-part2']: 0.00%
['comparch']: 0.00%
['money-banking']: 0.00%
['nand2tetris2']: 0.00%
['fe-exam']: 0.00%
['social-economic-networks']: 0.00%
['sciwrite']: 0.00%
['advanced-neurobiology1']: 0.00%
['chemistry-1']: 0.00%
['roman-architecture']: 0.00%
['neurobiology'