In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [7]:
import numpy as np
import nltk

nltk.download('punkt')
nltk.download('averaged_perceptron_tagger')
nltk.download('wordnet')

from tensorflow import keras

from Datasets import SequenceDataset
from models import SequenceModel
from utility import top_k_metric, calculating_class_weights, get_weighted_loss

[nltk_data] Downloading package punkt to /Users/synch/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /Users/synch/nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!
[nltk_data] Downloading package wordnet to /Users/synch/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


In [5]:
train_ds = SequenceDataset(mode="train", tag_func=nltk.pos_tag_sents)
val_ds = SequenceDataset(mode="valid", tag_func=nltk.pos_tag_sents, input_tokenizer=train_ds.input_tokenizer, target_tokenizer=train_ds.target_tokenizer)
test_ds = SequenceDataset(mode="test", tag_func=nltk.pos_tag_sents, input_tokenizer=train_ds.input_tokenizer, target_tokenizer=train_ds.target_tokenizer)

In [6]:
train_data, train_labels = train_ds.get_data_target(whole_dialog=False, data_type="tfidf")
val_data, val_labels = val_ds.get_data_target(whole_dialog=True, data_type="tfidf")
test_data, test_labels = test_ds.get_data_target(whole_dialog=True, data_type="tfidf")

finished loading corpus
finished loading descriptions
finished extracting contexts
finished extracting targets (OOCs)
finished loading corpus
finished loading descriptions
finished extracting contexts
finished extracting targets (OOCs)
finished loading corpus
finished loading descriptions
finished extracting contexts
finished extracting targets (OOCs)


## Model

In [8]:
class_weights = calculating_class_weights(train_labels)

In [14]:
model = keras.Sequential([
    keras.layers.Dense(1024, activation="relu"),
    keras.layers.Dense(1024, activation="relu"),
    keras.layers.Dense(1024, activation="relu"),
    keras.layers.Dense(train_labels.shape[-1], activation="relu"),
])

model.compile(optimizer=keras.optimizers.Adam(learning_rate=1e-5),
              loss=get_weighted_loss(class_weights),
              metrics=[
                  keras.metrics.Precision(name="precision", top_k=5),
                  keras.metrics.Recall(name="recall", top_k=5),
              ])

In [15]:
model.fit(train_data, train_labels, validation_data=(val_data, val_labels), batch_size=32, epochs=3)

Train on 8614 samples, validate on 1062 samples
Epoch 1/3

KeyboardInterrupt: 

## Evaluation

In [None]:
train_pred = model.infer(train_data)
val_pred = model.infer(val_data)
test_pred = model.infer(test_data)

In [None]:
for i in range(1, 11):
    metric = top_k_metric(train_pred, train_labels, i)
    print(f"top {i}: \nprecision: {metric[0]}\nrecall: {metric[1]}\nf1: {metric[2]}")

In [None]:
for i in range(1, 11):
    metric = top_k_metric(val_pred, val_labels, i)
    print(f"top {i}: \nprecision: {metric[0]}\nrecall: {metric[1]}\nf1: {metric[2]}")

In [None]:
for i in range(1, 11):
    metric = top_k_metric(test_pred, test_labels, i)
    print(f"top {i}: \nprecision: {metric[0]}\nrecall: {metric[1]}\nf1: {metric[2]}")