In [3]:
from rasa_nlu.training_data  import load_data
from rasa_nlu.config import RasaNLUModelConfig
from rasa_nlu.model import Trainer
from rasa_nlu import config
from joblib import load, dump 
from rasa_nlu.model import Metadata, Interpreter

In [11]:
train_data = load_data('./data/nlu.md')

In [12]:
trainer = Trainer(config.load('config_spacy.yaml'))

In [13]:
# trainer.train(train_data)

  self.MIN_EXAMPLES_PER_INTENT))
  self.MIN_EXAMPLES_PER_INTENT))


Fitting 2 folds for each of 6 candidates, totalling 12 fits


[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done  12 out of  12 | elapsed:    0.1s finished


<rasa_nlu.model.Interpreter at 0x10b365b50>

In [14]:
model_directory = trainer.persist('./projects/')

In [15]:
import spacy
nlp = spacy.load('en')

In [22]:
json_reviews_by_sent = load('../json_review_dataframe_by_sent')
informative_df = json_reviews_by_sent[(json_reviews_by_sent['classification'] == 'informative')]
interpreter = Interpreter.load(model_directory)

In [183]:
intentions = dict()
for i in range(1000):
    try:
        classification = interpreter.parse(text=informative_df['review_body'][i])
        intent = classification['intent']['name']
        if intent not in intentions:
            intentions[intent] = []
        intentions[intent].append(classification['text'])
    except KeyError:
        pass

In [213]:
intentions_raw_text = dict()
for i, v in intentions.items():
    intentions_raw_text[i] = ' '.join(v)
dump(intentions_raw_text,'intentions_raw_text', compress=True)
dump(intentions, 'intentions', compress=True)

['intentions']

In [192]:
keys = list(intentions.keys())
processed_text = intentions_raw_text[keys[0]]

In [193]:
# cut the text in semi-redundant sequences of maxlen characters
maxlen = 40
step = 3
sentences = []
next_chars = []
for i in range(0, len(processed_text) - maxlen, step):
    sentences.append(processed_text[i: i + maxlen])
    next_chars.append(processed_text[i + maxlen])
print('nb sequences:', len(sentences))

nb sequences: 11576


In [6]:
keys = list(intentions.keys())
intentions_raw_text = load('intentions_raw_text')
intentions = load('intentions')
from collections import Counter
def get_vocab(lst):
    vocabcount = Counter(w for txt in lst for w in txt.split())
    vocab = map(lambda x: x[0], sorted(vocabcount.items(), key=lambda x: -x[1]))
    return vocab, vocabcount

In [8]:
v, vocab_ = get_vocab(intentions_raw_text[keys[0]])

# SUMMARIZATION COMPONENT 

In [202]:
from tensorflow.keras.callbacks import LambdaCallback
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import LSTM
from tensorflow.keras.optimizers import RMSprop
from tensorflow.keras.utils import get_file
import numpy as np
import random
import sys
import io
import requests
import re

In [203]:
print('corpus length:', len(processed_text))
chars = sorted(list(set(processed_text)))
print('total chars:', len(chars))
char_indices = dict((c, i) for i, c in enumerate(chars))
indices_char = dict((i, c) for i, c in enumerate(chars))

corpus length: 34767
total chars: 80


In [204]:
print('Vectorization...')
x = np.zeros((len(sentences), maxlen, len(chars)), dtype=np.bool)
y = np.zeros((len(sentences), len(chars)), dtype=np.bool)
for i, sentence in enumerate(sentences):
    for t, char in enumerate(sentence):
        x[i, t, char_indices[char]] = 1
    y[i, char_indices[next_chars[i]]] = 1

Vectorization...


In [205]:
x.shape

(11576, 40, 80)

In [206]:
y.shape

(11576, 80)

In [207]:
# build the model: a single LSTM
print('Build model...')
model = Sequential()
model.add(LSTM(128, input_shape=(maxlen, len(chars))))
model.add(Dense(len(chars), activation='softmax'))

optimizer = RMSprop(lr=0.01)
model.compile(loss='categorical_crossentropy', optimizer=optimizer)

Build model...


In [208]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm (LSTM)                  (None, 128)               107008    
_________________________________________________________________
dense (Dense)                (None, 80)                10320     
Total params: 117,328
Trainable params: 117,328
Non-trainable params: 0
_________________________________________________________________


In [209]:
def sample(preds, temperature=1.0):
    # helper function to sample an index from a probability array
    preds = np.asarray(preds).astype('float64')
    preds = np.log(preds) / temperature
    exp_preds = np.exp(preds)
    preds = exp_preds / np.sum(exp_preds)
    probas = np.random.multinomial(1, preds, 1)
    return np.argmax(probas)

In [210]:
def on_epoch_end(epoch, _):
    # Function invoked at end of each epoch. Prints generated text.
    print("****************************************************************************")
    print('----- Generating text after Epoch: %d' % epoch)

    start_index = random.randint(0, len(processed_text) - maxlen - 1)
    for temperature in [0.2, 0.5, 1.0, 1.2]:
        print('----- temperature:', temperature)

        generated = ''
        sentence = processed_text[start_index: start_index + maxlen]
        generated += sentence
        print('----- Generating with seed: "' + sentence + '"')
        sys.stdout.write(generated)

        for i in range(400):
            x_pred = np.zeros((1, maxlen, len(chars)))
            for t, char in enumerate(sentence):
                x_pred[0, t, char_indices[char]] = 1.

            preds = model.predict(x_pred, verbose=0)[0]
            next_index = sample(preds, temperature)
            next_char = indices_char[next_index]

            generated += next_char
            sentence = sentence[1:] + next_char

            sys.stdout.write(next_char)
            sys.stdout.flush()
        print()

In [211]:
# Ignore useless W0819 warnings generated by TensorFlow 2.0.  Hopefully can remove this ignore in the future.
# See https://github.com/tensorflow/tensorflow/issues/31308
import logging, os
logging.disable(logging.WARNING)
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"

# Fit the model
print_callback = LambdaCallback(on_epoch_end=on_epoch_end)

model.fit(x, y,
          batch_size=128,
          epochs=60,
          callbacks=[print_callback])

Train on 11576 samples
Epoch 1/60
----- Generating text after Epoch: 0
----- temperature: 0.2
----- Generating with seed: "stead of finding an external app to so s"
stead of finding an external app to so sore the the the the the the the tho the the the pothe the the the pong the the the the the the the the the the the the the the tho the the the the the the the tho the the the the the the the tho the the the the the the the the the the the the the the the bothe the the the the the tho the the the tho lo the the the who pong ooplo pong the the the the the the pothe the the the some the the the the t
----- temperature: 0.5
----- Generating with seed: "stead of finding an external app to so s"
stead of finding an external app to so soree toccapp hond tot bothe soter the lopg boont of jof the blithe at the the long jo long ind ant alld oot oule ood the app tho she loce the the care ionithe thot ther the ing tho uome pimem it some pole ap ate vh me bong, toctor retoollthe iove the bos the t

 there been problems loading the page. She pixes te bro gos an oFtre haviow ah of of phople ag, seod mus's im No it winc a videos al your to on B0 nowhar bl ng a disk thw 2s ...A! My whell the bo:w. ald it showsh the app fult all but fon ca look burate the app losts, is but my uctions. Soakt Ever bar star a tol ad the back brom of As'm grout are sien sabfe say. OIm han if I can of in a cam of everytha sopalat of the make ore, a comment 
----- temperature: 1.2
----- Generating with seed: " there been problems loading the page. S"
 there been problems loading the page. Somm of tre a apsane bur shaps problem. The euting to froad but thes gee have ifbe to lakk, it someo the triep Ihs wishoy wheck I notly . The labker No 3s sange uppiIs, clock everyy, meseans and youlrrect ookver tup go donuven I sayd comnonl, app. you's upsage I ry I han on home posssaufiwallabll, thorewapl.!...!FWs's been your vidioso,ebut of no Snopbacicnotion videts In the cepe a picturos. Ofe g
Epoch 8/60
----- Generat

s delivered to me an hour ago. Though nater to open for most paye the same the lay ut so I usay even is a posts is masning. I've read the pagks that is got it dods make that worke make my friends and it won't seed the app on to reed is a disler. I say to go to start corntry on app to comple on the same I'm with to click is that add with the becous the app on to my problem. I stolly have my playlist, attrag with to catcourd and it alboun
----- temperature: 0.5
----- Generating with seed: "s delivered to me an hour ago. Though na"
s delivered to me an hour ago. Though nate

KeyboardInterrupt: 

In [None]:
# Abstractive Summarization Training - Trained online for better resource usage 
# https://colab.research.google.com/drive/1XOfx9gLHhKlKLjgoJuD4jpjWrG5vyDPC     