In [None]:
!pip install fse

In [None]:
!pip install pytorch-nlp

In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load in 

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the "../input/" directory.
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

import re
import string 
import collections
import fse
import random

import torch
import torch.nn as nn
from torch import optim
import torch.nn.functional as F
from torchnlp.metrics import get_moses_multi_bleu

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
torch.manual_seed(1)

# Any results you write to the current directory are saved as output.

# Data Pre-Processing

In [None]:
#yelp
root = "../input/style-transfer-dataset/yelp/"

d_pos_path = root+"sentiment.train.1"
d_neg_path = root+"sentiment.train.0"

#Problem: Data imbalance? -> decoder might be biased towards positive
d_pos = pd.read_csv(d_pos_path, sep="\n", header=None)#.iloc[:,0]
d_neg = pd.read_csv(d_neg_path, sep="\n", header=None)#.iloc[:,0]
d_both = pd.concat((d_pos, d_neg), ignore_index=True)

#Problem: Data imbalance? -> decoder might be biased towards positive
d_pos = pd.read_csv(d_pos_path, sep="\n", header=None)#.iloc[:,0]
d_neg = pd.read_csv(d_neg_path, sep="\n", header=None)#.iloc[:,0]
d_both = pd.concat((d_pos, d_neg), ignore_index=True)

In [None]:
d_labels = pd.DataFrame(np.concatenate( ( np.ones((d_pos.size, 1)), np.zeros((d_neg.size, 1)) ) ))

In [None]:
d_all = pd.concat((d_pos, d_neg), ignore_index=True)
d_all['labels'] = d_labels
d_all.columns = ['text', 'labels']

In [None]:
cols = d_all.columns.tolist()
cols = cols[-1:] + cols[:-1]
d_all = d_all[cols]

In [None]:
d_pos=d_pos.iloc[:,0]
d_neg=d_neg.iloc[:,0]
d_both=d_both.iloc[:,0]

In [None]:
d_pos_ref_path = root+"reference.1"
d_pos_ref = []
d_pos_ref_file = open(d_pos_ref_path, 'r')
for line in d_pos_ref_file.readlines():
    pair = line.split("\t")
    pair[1] = ' '.join(re.findall(r"\w+|[^\w\s]", pair[1], re.UNICODE))  #split punctuation
    d_pos_ref.append(pair)
    
d_neg_ref_path = root+"reference.0"
d_neg_ref = []
d_neg_ref_file = open(d_neg_ref_path, 'r')
for line in d_neg_ref_file.readlines():
    pair = line.split("\t")
    pair[1] = ' '.join(re.findall(r"\w+|[^\w\s]", pair[1], re.UNICODE))  #split punctuation
    d_neg_ref.append(pair)

# Components

### Content & Attributes Separation

In [None]:
#Parameters:
param_smooth = 1
param_threshold = 15
param_span = 4

param_backoff_limit = 3

In [None]:
#ngram has punctuation
def has_punctuation(ngram): #damn I'm very proud of making this from scratch lol, looks elegant in one line
    return True in [x in string.punctuation for x in ngram]

def generate_ngrams(lines, min_length=1, max_length=param_span):
#     lines = placeholder + lines
    lengths = range(min_length, max_length + 1)
    ngrams = {length: [] for length in lengths}
    queue = collections.deque(maxlen=max_length)
    
    def add_queue():
        current = tuple(queue)
        for length in lengths:
            if len(current) >= length and not has_punctuation(current[:length]):
                ngrams[length].append(current[:length])
    
    short_by = 0
    for line in lines:
        short_by = max(0, max_length - len(lines))
        for word in line.split():
            queue.append(word)
            if len(queue) >= max_length-short_by:
                add_queue()                

    while len(queue) > min_length:
        queue.popleft()
        add_queue()
    return ngrams

#modified from & fixed their error of ngram with # of words < 4: https://gist.github.com/benhoyt/dfafeab26d7c02a52ed17b6229f0cb52
def count_ngrams(lines, min_length=1, max_length=param_span):
    """Iterate through given lines iterator (file object or list of
    lines) and return n-gram frequencies. The return value is a dict
    mapping the length of the n-gram to a collections.Counter
    object of n-gram tuple and number of times that n-gram occurred.
    Returned dict includes n-grams of length min_length to max_length.
    """
    lengths = range(min_length, max_length + 1)
    ngrams = {length: collections.Counter() for length in lengths}
    queue = collections.deque(maxlen=max_length)

    # Helper function to add n-grams at start of current queue to dict
    def add_queue():
        current = tuple(queue)
        for length in lengths:
            if len(current) >= length and not has_punctuation(current[:length]):
                ngrams[length][current[:length]] += 1

    # Loop through all lines and words and add n-grams to dict
    short_by = 0
    for line in lines:
        short_by = max(0, max_length - len(lines))
        for word in line.split():
            queue.append(word)
            if len(queue) >= max_length - short_by:
                add_queue()

    # Make sure we get the n-grams at the tail end of the queue
    while len(queue) > min_length:
        queue.popleft()
        add_queue()

    return ngrams

In [None]:
#Generate ngram counts for d_pos & d_neg
d_pos_ngrams_counts = count_ngrams(d_pos.tolist())
d_neg_ngrams_counts = count_ngrams(d_neg.tolist())

def get_counts(list1, counted_ngrams):
    counts = []
    list1_ngrams = generate_ngrams(list1)
    list2_counts = counted_ngrams
    
    for length in range(param_span,0, -1):
        for v in list1_ngrams[length]:
            counts.append([list2_counts[length][v], v])
    return np.array(counts)

In [None]:
#these are methods that will become useful when extracting attribute markers
#why do we need all this? well... that's like 5 hours of debugging...
def flatten(foo):
    return list(_flatten(foo))

def _flatten(foo):
    for x in foo:
        if isinstance(x, collections.Iterable) and not isinstance(x, str):
            for y in _flatten(x):
                yield y
        else:
            yield x

def array_to_string(a):
    return ' '.join(flatten(a))

def is_in_string_array(elements, original): #deprecated, does not take into account sequence order
    return np.isin(array_to_string(elements).split(), array_to_string(original).split()).any()

def insert_string(string, inserted_string, index):
    return string[:index] + inserted_string + string[index:]

# modified from https://stackoverflow.com/questions/41752946/replacing-a-character-from-a-certain-index
def replace_string(s, newstring, index, nofail=False):
    # raise an error if index is outside of the string
    if not nofail and index not in range(len(s)):
        raise ValueError("index outside given string. index:" + index)

    # if not erroring, but the index is still not in the correct range..
    if index < 0:  # add it to the beginning
        return newstring + s
    if index > len(s):  # add it to the end
        return s + newstring

    # insert the new string between "slices" of the original
    return s[:index] + newstring + s[index + len(newstring):]

In [None]:
def get_attribute_markers(s, style_src):
    sentence = [s]
    
    ngrams = get_counts(sentence, d_pos_ngrams_counts)
    if len(ngrams) > 0:
        ngrams = ngrams[:,1]
    
    pos_counts = get_counts(sentence, d_pos_ngrams_counts)
    if len(pos_counts) > 0:
        pos_counts = pos_counts[:,0]
    
    neg_counts = get_counts(sentence, d_neg_ngrams_counts)
    if len(neg_counts) > 0:
        neg_counts = neg_counts[:,0]
    
    
    if(style_src):
        importances = (pos_counts + param_smooth) / (neg_counts + param_smooth)
    else:
        importances = (neg_counts + param_smooth) / (pos_counts + param_smooth)
        
    a = []
    
    importances = np.vstack((importances, ngrams)).T
    for importance in importances:
        if importance[0] > param_threshold and not is_in_string_array(importance[1], a):
            a.append(' '.join(importance[1]))
    return a

In [None]:
def separate(sentence, style_src):
    attributes = get_attribute_markers(sentence, style_src)
    c = sentence

    replace_indexes = []
    for a in attributes:
        replace_index = -1
        replace_index = c.find(a)
        replace_indexes.append(replace_index)
        c = c.replace(a, " "*len(a))
        
    if len(attributes) == 0:
        return {'c': c, 'a': [], 'i': [], 's': sentence}
    
    replace_indexes, attributes = zip(*sorted(zip(replace_indexes, attributes)))
    return {'c': c, 'a': attributes, 'i': replace_indexes, 's': sentence}

def get_c(sentence, style):
    return re.sub(' +', ' ', separate(sentence, style)['c'])

def get_a(sentence, style):
    a = separate(sentence, style)['a']
    if len(a) > 0:
        return ' '.join(a)
    else:
        return ""

### TF-IDF Distance

In [None]:
# ===== TF-IDF Weighted Word Overlap ===== #
# docs pre-processing
docs = d_both.tolist()

# creating dict_idf = {word: idf}
from sklearn.feature_extraction.text import TfidfVectorizer
tfidf_vectorizer=TfidfVectorizer(use_idf=True, stop_words=None)
tfidf_vectorizer_vectors=tfidf_vectorizer.fit_transform(docs)
dict_idf = dict(zip(tfidf_vectorizer.get_feature_names(), tfidf_vectorizer.idf_))

def get_overlap(a, b):
#     print(a, b)
    a_counter = collections.Counter(a.split())
    b_counter = collections.Counter(b.split())
    overlap = a_counter & b_counter
    return overlap

def get_weighted_overlap(a, b):
    overlap = get_overlap(a, b)
    a_counter = collections.Counter(a.split())
    #calculate
    weighted_overlap = 0
    for word in overlap:

        word_tf = a_counter[word]#/len(a.split()) -> commented out cause division by constant value doesn't matter

        get_idf = dict_idf.get(word)
        word_idf = 1 if get_idf == None else get_idf #get rid of error when idf not in dict 

        word_tfidf = word_tf*word_idf
        weighted_overlap+=overlap[word]*word_tfidf

    return weighted_overlap

def get_closest_sentence_tfidf(sentence, style_src):
    opposite_dataset = d_neg if style_src else d_pos

    highest_overlap = 0
    closest_sentence = ""
    
    min_attribute_markers=len(get_attribute_markers(sentence, style_src))
    num_markers = 0
    
    previous_sentences = []
    backoff_count = 0
    while(num_markers < min_attribute_markers and backoff_count < param_backoff_limit):
        for sentence_b in opposite_dataset:
            weighted_overlap = get_weighted_overlap(sentence, sentence_b)
            if weighted_overlap > highest_overlap and sentence_b not in previous_sentences:
                highest_overlap = weighted_overlap
                closest_sentence = sentence_b
        highest_overlap = 0 
        backoff_count += 1
        previous_sentences.append(closest_sentence)
        num_markers = len(get_attribute_markers(closest_sentence, not style_src))
    
    return closest_sentence

In [None]:
# Retrieve using tfidf
def retrieve(sentence, style_src):
    return separate(get_closest_sentence_tfidf(sentence, style_src), not style_src)

### Template Based

In [None]:
def insert_multi(s, indexes):
    d = {
        'w1': {'begin':'0', 'end':'3', 'w':'BIG'},
        'w2': {'being':'7', 'end':'7', 'w':'BARKED'}
    }
    
    
    final_s = re.sub('|'.join('\{}'.format(s[int(b['end'])]) for _, b in d.items()), "{}", s).format(*[c['w'] for _, c in sorted(d.items(), key=lambda x:int(x[0][-1]))])

In [None]:
def TemplateBased(sentence, style_src):
    
    separated_src = separate(sentence, style_src)
    c_src = separated_src['c']
    replace_indexes = separated_src['i']
    
    separated_tgt = retrieve(sentence, style_src)
    a_tgt = separated_tgt['a']

    missing_attributes =len(replace_indexes) - len(a_tgt)
    
    if len(a_tgt) > 0:
        for i in range(missing_attributes):
            a_tgt += (a_tgt[0],)
    
    output = c_src

    #deprecated insertion, now uses replace
    #loops backwards as by inserting backwards, you don't need to take into account the increasing length 
#     for i in range(len(replace_indexes)-1, -1, -1):
#         output = insert_string(output, a_tgt[i]+" ", replace_indexes[i])

#     #replace
#     for i in range(len(replace_indexes)):
#         if replace_indexes[i] >= 0
#             output = replace_string(output, a_tgt[i], replace_indexes[i])

    #NEW hackery way: insert back to front, then split & join -> to account of tgt attribute length > src attribute
    for i in range(len(replace_indexes)-1, -1, -1):
        if i < len(a_tgt):
            output = insert_string(output, a_tgt[i]+" ", replace_indexes[i])
    
    output = ' '.join(output.split())
    return output

In [None]:
sentence = "we got down and we got some really slow and lazy service ."
style_src = 0
TemplateBased(sentence, style_src)

# Model

### DeleteAndRetrieve

In [None]:
# Resources: https://pytorch.org/tutorials/intermediate/seq2seq_translation_tutorial.html
# Had to modify & adapt most of the code in the tutorial since this isn't translation & data preprocessing is different

SOS_token = 0
EOS_token = 1

class Lang:
    def __init__(self, name):
        self.name = name
        self.word2index = {}
        self.word2count = {}
        self.index2word = {0: "SOS", 1: "EOS"}
        self.n_words = 2  # Count SOS and EOS

    def addSentence(self, sentence):
        for word in sentence.split(' '):
            self.addWord(word)

    def addWord(self, word):
        if word not in self.word2index:
            self.word2index[word] = self.n_words
            self.word2count[word] = 1
            self.index2word[self.n_words] = word
            self.n_words += 1
        else:
            self.word2count[word] += 1

In [None]:
def get_total_overlap(a, b):
    return len(list(get_overlap(a, b).elements()))

In [None]:
noise_chance = 0.1

def prepareData():
    input_lang = Lang("input")
    output_lang = Lang("output")
    
    d_pos_a = []
    
    pairs_pos = []
    for sentence in d_pos:
        c = get_c(sentence, 1)
        a = get_a(sentence, 1)
        d_pos_a.append(a)
        pairs_pos.append([c, a, sentence])
    
    d_neg_a = []
    
    pairs_neg = []
    for sentence in d_neg:
        c = get_c(sentence, 0)
        a = get_a(sentence, 0)
        d_neg_a.append(a)
        pairs_neg.append([c, a, sentence])
    
    #adding noise for pos
    for pair in pairs_pos:
        if random.random() < noise_chance:
            real_a = pair[1].split()

            if(len(real_a) == 0):
                continue

            for a in d_pos_a:
                if(len(a) == 0):
                    continue
                a = a.split()

                overlap = get_total_overlap(' '.join(real_a), ' '.join(a))
                if overlap > 0 and ((overlap == len(real_a) - 1 and len(real_a) - len(a) == 1) or (overlap == len(real_a) and len(real_a) - len(a) == -1)):
                    real_a = a
                    break;
            pair[1] = ' '.join(real_a)
    
    
    #adding noise for neg
    for pair in pairs_neg:
        if random.random() < noise_chance:
            real_a = pair[1].split()
            if(len(real_a) == 0):
                continue

            for a in d_neg_a:
                if(len(a) == 0):
                    continue
                a = a.split()

                overlap = get_total_overlap(' '.join(real_a), ' '.join(a))
                if overlap > 0 and ((overlap == len(real_a) - 1 and len(real_a) - len(a) == 1) or (overlap == len(real_a) and len(real_a) - len(a) == -1)):
                    real_a = a
                    break;
            pair[1] = ' '.join(real_a)

            
    pairs = np.concatenate((pairs_pos, pairs_neg), 0)

    for pair in pairs:
        input_lang.addSentence(pair[0])
        input_lang.addSentence(pair[1])
        output_lang.addSentence(pair[2])
        
    print("Counted words:")
    print(input_lang.name, input_lang.n_words)
    print(output_lang.name, output_lang.n_words)
    return input_lang, output_lang, pairs

In [None]:
input_lang, output_lang, pairs = prepareData()

In [None]:
for pair in d_pos_ref:
    input_lang.addSentence(pair[0])
    output_lang.addSentence(pair[1])
for pair in d_neg_ref:
    input_lang.addSentence(pair[0])
    output_lang.addSentence(pair[1])

In [None]:
class Maxout(nn.Module):
    def __init__(self, pool_size):
        super().__init__()
        self._pool_size = pool_size

    def forward(self, x):
        assert x.shape[1] % self._pool_size == 0, \
            'Wrong input last dim size ({}) for Maxout({})'.format(x.shape[1], self._pool_size)
        m, i = x.view(*x.shape[:1], x.shape[1] // self._pool_size, self._pool_size, *x.shape[2:]).max(2)
        return m

In [None]:
class EncoderRNN(nn.Module):
    def __init__(self, input_size, word_vec_size, hidden_size):
        super(EncoderRNN, self).__init__()
        self.hidden_size = hidden_size
        
        self.embedding = nn.Embedding(input_size, word_vec_size)
        self.gru = nn.GRU(word_vec_size, hidden_size)

    def forward(self, input, hidden):
        embedded = self.embedding(input).view(1, 1, -1)
        output = embedded
        output, hidden = self.gru(output, hidden)
        return output, hidden

    def initHidden(self):
        return torch.zeros(1, 1, self.hidden_size, device=device)

In [None]:
class DecoderRNN(nn.Module):
    def __init__(self, hidden_size, word_vec_size, output_size):
        super(DecoderRNN, self).__init__()
        self.hidden_size = hidden_size
        self.embedding = nn.Embedding(output_size, word_vec_size)
        self.gru = nn.GRU(word_vec_size, hidden_size)
        self.out = nn.Linear(hidden_size, output_size)
        self.softmax = nn.LogSoftmax(dim=1)
        self.maxout = Maxout(1)

    def forward(self, input, hidden):
        output = self.embedding(input).view(1, 1, -1)
        output = self.maxout(output)
        output, hidden = self.gru(output, hidden)
        output = self.softmax(self.out(output[0]))
        return output, hidden

    def initHidden(self):
        return torch.zeros(1, 1, self.hidden_size, device=device)

In [None]:
#Preparing Training Data
def indexesFromSentence(lang, sentence):
    return [lang.word2index[word] for word in sentence.split(' ')]


def tensorFromSentence(lang, sentence):
    indexes = indexesFromSentence(lang, sentence)
    indexes.append(EOS_token)
    return torch.tensor(indexes, dtype=torch.long, device=device).view(-1, 1)


def tensorsFromPair(pair):
    input_c_tensor = tensorFromSentence(input_lang, pair[0])
    input_a_tensor = tensorFromSentence(input_lang, pair[1])
    target_tensor = tensorFromSentence(output_lang, pair[2])
    return (input_c_tensor, input_a_tensor, target_tensor)

In [None]:
#Training
MAX_LENGTH = 50

teacher_forcing_ratio = 0.5


def train(input_c_tensor, input_a_tensor, target_tensor, encoder_c, encoder_a, decoder, encoder_c_optimizer, encoder_a_optimizer, decoder_optimizer, criterion, max_length=MAX_LENGTH):
    encoder_c_hidden = encoder_c.initHidden()
    encoder_a_hidden = encoder_a.initHidden()

    encoder_a_optimizer.zero_grad()
    encoder_c_optimizer.zero_grad()

    decoder_optimizer.zero_grad()

    input_c_length = input_c_tensor.size(0)
    input_a_length = input_a_tensor.size(0)
    
    target_length = target_tensor.size(0)

    encoder_c_outputs = torch.zeros(max_length, encoder_c.hidden_size, device=device)
    encoder_a_outputs = torch.zeros(max_length, encoder_a.hidden_size, device=device)
    
    loss = 0

    for ei in range(input_c_length):
        encoder_c_output, encoder_c_hidden = encoder_c(
            input_c_tensor[ei], encoder_c_hidden)
        encoder_c_outputs[ei] = encoder_c_output[0, 0]
    
    for ei in range(input_a_length):
        encoder_a_output, encoder_a_hidden = encoder_a(
            input_a_tensor[ei], encoder_a_hidden)
        encoder_a_outputs[ei] = encoder_a_output[0, 0]

    
    decoder_input = torch.tensor([[SOS_token]], device=device)
    
    decoder_hidden = torch.cat((encoder_c_hidden, encoder_a_hidden), 2)
    
    use_teacher_forcing = True if random.random() < teacher_forcing_ratio else False

    if use_teacher_forcing:
        # Teacher forcing: Feed the target as the next input
        for di in range(target_length):
            decoder_output, decoder_hidden = decoder(
                decoder_input, decoder_hidden)
            loss += criterion(decoder_output, target_tensor[di])
            decoder_input = target_tensor[di]  # Teacher forcing

    else:
        # Without teacher forcing: use its own predictions as the next input
        for di in range(target_length):
            decoder_output, decoder_hidden = decoder(
                decoder_input, decoder_hidden)
            topv, topi = decoder_output.topk(1)
            decoder_input = topi.squeeze().detach()  # detach from history as input

            loss += criterion(decoder_output, target_tensor[di])
            if decoder_input.item() == EOS_token:
                break

    loss.backward()
    
    encoder_c_optimizer.step()
    encoder_a_optimizer.step()
    decoder_optimizer.step()

    return loss.item() / target_length

In [None]:
import time
import math

def asMinutes(s):
    m = math.floor(s / 60)
    s -= m * 60
    return '%dm %ds' % (m, s)


def timeSince(since, percent):
    now = time.time()
    s = now - since
    es = s / (percent)
    rs = es - s
    return '%s (- %s)' % (asMinutes(s), asMinutes(rs))

In [None]:
import matplotlib.pyplot as plt
plt.switch_backend('agg')
import matplotlib.ticker as ticker
import numpy as np


def showPlot(points):
    plt.figure()
    fig, ax = plt.subplots()
    # this locator puts ticks at regular intervals
    loc = ticker.MultipleLocator(base=0.2)
    ax.yaxis.set_major_locator(loc)
    plt.plot(points)

In [None]:
def trainIters(encoder_a, encoder_c, decoder, n_iters, print_every=1000, plot_every=100, learning_rate=0.01):
    start = time.time()
    plot_losses = []
    print_loss_total = 0  # Reset every print_every
    plot_loss_total = 0  # Reset every plot_every
    
    encoder_c_optimizer = optim.Adadelta(encoder_c.parameters(), lr=learning_rate)
    encoder_a_optimizer = optim.Adadelta(encoder_a.parameters(), lr=learning_rate)
    decoder_optimizer = optim.Adadelta(decoder.parameters(), lr=learning_rate)
    training_pairs = [tensorsFromPair(random.choice(pairs))
                      for i in range(n_iters)]
    criterion = nn.NLLLoss()

    for iter in range(1, n_iters + 1):
        training_pair = training_pairs[iter - 1]
        input_a_tensor = training_pair[0]
        input_c_tensor = training_pair[1]
        
        target_tensor = training_pair[2]
        loss = train(input_c_tensor, input_a_tensor, target_tensor, encoder_c, encoder_a,
                     decoder, encoder_c_optimizer, encoder_c_optimizer, decoder_optimizer, criterion)
        print_loss_total += loss
        plot_loss_total += loss

        if iter % print_every == 0:
            print_loss_avg = print_loss_total / print_every
            print_loss_total = 0
            print('%s (%d %d%%) %.4f' % (timeSince(start, iter / n_iters),
                                         iter, iter / n_iters * 100, print_loss_avg))

        if iter % plot_every == 0:
            plot_loss_avg = plot_loss_total / plot_every
            plot_losses.append(plot_loss_avg)
            plot_loss_total = 0

    showPlot(plot_losses)

In [None]:
word_vec_size = 128
hidden_size = 512
encoder_c = EncoderRNN(input_lang.n_words, word_vec_size, hidden_size).to(device)
encoder_a = EncoderRNN(input_lang.n_words, word_vec_size, hidden_size).to(device)

decoder = DecoderRNN(hidden_size + hidden_size, word_vec_size, output_lang.n_words).to(device)

In [None]:
trainIters(encoder_c, encoder_a, decoder, 200000, print_every=1000)

In [None]:
def encoderStep(encoder_c, encoder_a, c, a, max_length=MAX_LENGTH):
    input_c_tensor = tensorFromSentence(input_lang, c)
    input_a_tensor = tensorFromSentence(input_lang, a)

    input_c_length = input_c_tensor.size()[0]
    input_a_length = input_a_tensor.size()[0]

    encoder_c_hidden = encoder_c.initHidden()
    encoder_a_hidden = encoder_a.initHidden()

    encoder_c_outputs = torch.zeros(max_length, encoder_c.hidden_size, device=device)
    encoder_a_outputs = torch.zeros(max_length, encoder_a.hidden_size, device=device)
    
    for ei in range(input_c_length):
        encoder_c_output, encoder_c_hidden = encoder_c(input_c_tensor[ei],
                                                 encoder_c_hidden)
        encoder_c_outputs[ei] += encoder_c_output[0, 0]


    for ei in range(input_a_length):
        encoder_a_output, encoder_a_hidden = encoder_a(input_a_tensor[ei],
                                                 encoder_a_hidden)
        encoder_a_outputs[ei] += encoder_a_output[0, 0]
    

    
    return torch.cat((encoder_c_hidden, encoder_a_hidden), 2)

In [None]:
def get_c_embedding(encoder_c, c, max_length=MAX_LENGTH):
    input_c_tensor = tensorFromSentence(input_lang, c)
    
    input_c_length = input_c_tensor.size()[0]

    encoder_c_hidden = encoder_c.initHidden()

    encoder_c_outputs = torch.zeros(max_length, encoder_c.hidden_size, device=device)
    
    for ei in range(input_c_length):
        encoder_c_output, encoder_c_hidden = encoder_c(input_c_tensor[ei],
                                                 encoder_c_hidden)
        encoder_c_outputs[ei] += encoder_c_output[0, 0]
        
    return encoder_c_hidden

In [None]:
ALPHA = 0.7

class Node:
    def __init__(self, p_t, i, d_hidden, lvl, node_parent=None):
        self.p_t = p_t
        self.i = i
        self.d_hidden = d_hidden
        self.lvl = lvl
        self.node_parent = node_parent
        
        self.p_sentence = self.sentenceProb()
    
    def prepareToDecode(self):
        d_in = self.i.squeeze()#.detatch()
        return d_in, self.d_hidden
    
    def sentenceProb(self):
        if self.node_parent == None:
            return self.p_t
        return self.node_parent.sentenceProb() + self.p_t #addition because of log
    
    def normProb(self):
        #Normalised probability
#         return 1/((self.lvl+1)**ALPHA) * self.sentenceProb()
        #perplexity
        return torch.exp(self.sentenceProb()) ** (-1/(self.lvl+1))
    
    def getToken(self):
        return output_lang.index2word[self.i.item()]
    
    def getTokens(self):
        token = [self.getToken()]

        if(self.node_parent == None):
            return token
        else:
             return self.node_parent.getTokens() + token
            
    def getSentence(self):
        return ' '.join(self.getTokens())

In [None]:
BEAM_WIDTH = 10

def evaluate(encoder_c, encoder_a, decoder, c, a, max_length=MAX_LENGTH):
    with torch.no_grad():    
        decoder_input = torch.tensor([[SOS_token]], device=device)  # SOS

        decoder_hidden = encoderStep(encoder_c, encoder_a, c, a)
        decoded_words = []
        
        #BEAM SEARCH
        
        nodes = []
        finished_nodes = []
        for di in range(max_length):
            if di == 0:
                decoder_output, decoder_hidden = decoder(decoder_input, decoder_hidden)
                top_ps, top_is = decoder_output.data.topk(BEAM_WIDTH)
                top_ps = top_ps.view(-1)
                top_is = top_is.view(-1)
                for index in range(len(top_is)):
                    p_t = top_ps[index]
                    i = top_is[index]
                    nodes.append(Node(p_t, i, decoder_hidden, di))
            else:
                prev_nodes = [x for x in nodes if x.lvl == di-1]
                prev_nodes = sorted(prev_nodes, key=lambda x: x.p_sentence.item(), reverse=True)
                prev_nodes = prev_nodes[:BEAM_WIDTH]
                nodes = []
                for node in prev_nodes:
                    decoder_output, decoder_hidden = decoder(*node.prepareToDecode())
                    top_ps, top_is = decoder_output.data.topk(BEAM_WIDTH)
                    top_ps = top_ps.view(-1)
                    top_is = top_is.view(-1)    
                    
                    for index in range(len(top_is)):
                        p_t = top_ps[index]
                        i = top_is[index]
                        child_node = Node(p_t, i, decoder_hidden, di, node)
                        
                        if i.item() == EOS_token:
                            finished_nodes.append(child_node)
                        else:
                            nodes.append(child_node)

        final_node = sorted(finished_nodes, key=lambda x: x.p_sentence.item(), reverse=True)[0]
        return final_node.getTokens()

evaluate(encoder_c, encoder_a, decoder, "the food", "great")

In [None]:
def evaluateWithoutBeamSearch(encoder_c, encoder_a, decoder, c, a, max_length=MAX_LENGTH):
    with torch.no_grad():    
        decoder_input = torch.tensor([[SOS_token]], device=device)  # SOS

        decoder_hidden = encoderStep(encoder_c, encoder_a, c, a)
        decoded_words = []
        
        #BEAM SEARCH
        for di in range(max_length):
            decoder_output, decoder_hidden = decoder(decoder_input, decoder_hidden)
            
            topv, topi = F.softmax(decoder_output).data.topk(1)

            if topi.item() == EOS_token:
                decoded_words.append('<EOS>')
                break
            else:
                decoded_words.append(output_lang.index2word[topi.item()])
            
            decoder_input = topi.squeeze().detach()

        return decoded_words
' '.join(evaluateWithoutBeamSearch(encoder_c, encoder_a, decoder, "the food is", "great"))

In [None]:
def evaluateRandomly(encoder_c, encoder_a, decoder, n=10):
    for i in range(n):
        pair = random.choice(pairs)
        print('>', pair[0])
        print('=', pair[1])
        output_words = evaluate(encoder_c, encoder_a, decoder, pair[0], pair[1])
        output_sentence = ' '.join(output_words)
        print('<', output_sentence)
        print('')

In [None]:
evaluateRandomly(encoder_c, encoder_a, decoder)

In [None]:
def DeleteAndRetrieve(sentence, style):
    c_src = separate(sentence, 0)['c']
    a_tgt = ' '.join(retrieve(sentence, not style)['a'])
    return ' '.join(evaluate(encoder_c, encoder_a, decoder, c_src, a_tgt))

In [None]:
sentence = "we sit down and we got some really slow and lazy service ."
style_src = 0
DeleteAndRetrieve(sentence, style_src)

===============================================

# DeleteOnly

In [None]:
# Resources: https://pytorch.org/tutorials/intermediate/seq2seq_translation_tutorial.html
# Had to modify & adapt most of the code in the tutorial since this isn't translation & data preprocessing is different

SOS_token = 0
EOS_token = 1


class Lang:
    def __init__(self, name):
        self.name = name
        self.word2index = {}
        self.word2count = {}
        self.index2word = {0: "SOS", 1: "EOS"}
        self.n_words = 2  # Count SOS and EOS

    def addSentence(self, sentence):
        for word in sentence.split(' '):
            self.addWord(word)

    def addWord(self, word):
        if word not in self.word2index:
            self.word2index[word] = self.n_words
            self.word2count[word] = 1
            self.index2word[self.n_words] = word
            self.n_words += 1
        else:
            self.word2count[word] += 1

In [None]:
#Notes: DELETEONLY first embeds the content
#c(x, vsrc) into a vector using an RNN. It then
#concatenates the final hidden state with a learned
#embedding for vtgt, and feeds this into an RNN
#decoder to generate y. The decoder attempts to
#produce words indicative of the source content
#and target attribute, while remaining fluent.

#Problem 1 -> Embed c into vector. -> Simple GRU autoencoder

# c -> encoder -> code > concatenated -> decoder -> y (sentence)
# v -> embedding -> code    ^

In [None]:
def prepareData():
    input_lang = Lang("input")
    output_lang = Lang("output")
    pairs = []
    for sentence in d_pos:
         pairs.append([get_c(sentence, 1), sentence, 1])

    for sentence in d_neg:
         pairs.append([get_c(sentence, 0), sentence, 0])
            
    for pair in pairs:
        input_lang.addSentence(pair[0])
        output_lang.addSentence(pair[1])
        
    print("Counted words:")
    print(input_lang.name, input_lang.n_words)
    print(output_lang.name, output_lang.n_words)
    return input_lang, output_lang, pairs

In [None]:
input_lang, output_lang, pairs = prepareData()

In [None]:
for pair in d_pos_ref:
    input_lang.addSentence(pair[0])
    output_lang.addSentence(pair[1])
for pair in d_neg_ref:
    input_lang.addSentence(pair[0])
    output_lang.addSentence(pair[1])

In [None]:
class EncoderRNN(nn.Module):
    def __init__(self, input_size, word_vec_size, hidden_size):
        super(EncoderRNN, self).__init__()
        self.hidden_size = hidden_size
        
        self.embedding = nn.Embedding(input_size, word_vec_size)
        self.gru = nn.GRU(word_vec_size, hidden_size)

    def forward(self, input, hidden):
        embedded = self.embedding(input).view(1, 1, -1)
        output = embedded
        output, hidden = self.gru(output, hidden)
        return output, hidden

    def initHidden(self):
        return torch.zeros(1, 1, self.hidden_size, device=device)

In [None]:
class StyleEmbedder(nn.Module):
    def __init__(self, num_styles, dimensions):
        super(StyleEmbedder, self).__init__()
        self.dimensions = dimensions        
        self.embedding = nn.Embedding(num_styles, dimensions)

    def forward(self, input):
        embedded = self.embedding(input).view(1, 1, -1)
        return embedded

In [None]:
class Maxout(nn.Module):
    def __init__(self, pool_size):
        super().__init__()
        self._pool_size = pool_size

    def forward(self, x):
        assert x.shape[1] % self._pool_size == 0, \
            'Wrong input last dim size ({}) for Maxout({})'.format(x.shape[1], self._pool_size)
        m, i = x.view(*x.shape[:1], x.shape[1] // self._pool_size, self._pool_size, *x.shape[2:]).max(2)
        return m

In [None]:
class DecoderRNN(nn.Module):
    def __init__(self, hidden_size, word_vec_size, output_size):
        super(DecoderRNN, self).__init__()
        self.hidden_size = hidden_size

        self.embedding = nn.Embedding(output_size, word_vec_size)
        self.gru = nn.GRU(word_vec_size, hidden_size)
        self.out = nn.Linear(hidden_size, output_size)
        self.softmax = nn.LogSoftmax(dim=1)
        self.maxout = Maxout(1)

    def forward(self, input, hidden):
        output = self.embedding(input).view(1, 1, -1)
        output = self.maxout(output)
        output, hidden = self.gru(output, hidden)
        output = self.softmax(self.out(output[0]))
        return output, hidden

    def initHidden(self):
        return torch.zeros(1, 1, self.hidden_size, device=device)

In [None]:
#Preparing Training Data
def indexesFromSentence(lang, sentence):
    return [lang.word2index[word] for word in sentence.split(' ')]


def tensorFromSentence(lang, sentence):
    indexes = indexesFromSentence(lang, sentence)
    indexes.append(EOS_token)
    return torch.tensor(indexes, dtype=torch.long, device=device).view(-1, 1)

def tensorFromStyle(style):
#     one_hot_encoded_style = []
#     if style:
#         one_hot_encoded_style = [1,0]
#     else:
#         one_hot_encoded_style = [0,1]
    return torch.tensor(style, dtype=torch.long, device=device).view(-1, 1)

def tensorsFromPair(pair):
    input_tensor = tensorFromSentence(input_lang, pair[0])
    target_tensor = tensorFromSentence(output_lang, pair[1])
    style_tensor = tensorFromStyle(pair[2])
    return (input_tensor, style_tensor, target_tensor)  #add style_tensor

In [None]:
#Training
MAX_LENGTH = 50

teacher_forcing_ratio = 0.5


def train(input_tensor, style_tensor, target_tensor, encoder, style_embedder, decoder, encoder_optimizer, style_embedder_optimizer, decoder_optimizer, criterion, max_length=MAX_LENGTH):
    encoder_hidden = encoder.initHidden()

    encoder_optimizer.zero_grad()
    decoder_optimizer.zero_grad()
    #style embedding
    style_embedder_optimizer.zero_grad()
    
    
    input_length = input_tensor.size(0)
    target_length = target_tensor.size(0)

    encoder_outputs = torch.zeros(max_length, encoder.hidden_size, device=device)

    loss = 0

    
    for ei in range(input_length):
        encoder_output, encoder_hidden = encoder(
            input_tensor[ei], encoder_hidden)
        encoder_outputs[ei] = encoder_output[0, 0]

        
    decoder_input = torch.tensor([[SOS_token]], device=device)
    
    #calculate style embedding
    style_embedding = style_embedder(style_tensor)
    
    decoder_hidden = torch.cat((encoder_hidden, style_embedding), 2) #TODO: concatenate style embedding
    
    use_teacher_forcing = True if random.random() < teacher_forcing_ratio else False

    if use_teacher_forcing:
        # Teacher forcing: Feed the target as the next input
        for di in range(target_length):
            decoder_output, decoder_hidden = decoder(
                decoder_input, decoder_hidden)
            loss += criterion(decoder_output, target_tensor[di])
            decoder_input = target_tensor[di]  # Teacher forcing

    else:
        # Without teacher forcing: use its own predictions as the next input
        for di in range(target_length):
            decoder_output, decoder_hidden = decoder(
                decoder_input, decoder_hidden)
            topv, topi = decoder_output.topk(1)
            decoder_input = topi.squeeze().detach()  # detach from history as input

            loss += criterion(decoder_output, target_tensor[di])
            if decoder_input.item() == EOS_token:
                break

    loss.backward()

    encoder_optimizer.step()
    #style embedding
    style_embedder_optimizer.step()
    
    decoder_optimizer.step()
    

    return loss.item() / target_length

In [None]:
import matplotlib.pyplot as plt
plt.switch_backend('agg')
import matplotlib.ticker as ticker
import numpy as np


def showPlot(points):
    plt.figure()
    fig, ax = plt.subplots()
    # this locator puts ticks at regular intervals
    loc = ticker.MultipleLocator(base=0.2)
    ax.yaxis.set_major_locator(loc)
    plt.plot(points)
    
import time
import math

def asMinutes(s):
    m = math.floor(s / 60)
    s -= m * 60
    return '%dm %ds' % (m, s)

def timeSince(since, percent):
    now = time.time()
    s = now - since
    es = s / (percent)
    rs = es - s
    return '%s (- %s)' % (asMinutes(s), asMinutes(rs))

In [None]:
def trainIters(encoder, style_embedder, decoder, n_iters, print_every=1000, plot_every=100, learning_rate=0.01):
    start = time.time()
    plot_losses = []
    print_loss_total = 0  # Reset every print_every
    plot_loss_total = 0  # Reset every plot_every

    encoder_optimizer = optim.Adadelta(encoder.parameters(), lr=learning_rate)
    decoder_optimizer = optim.Adadelta(decoder.parameters(), lr=learning_rate)
    #style
    style_embedder_optimizer = optim.Adadelta(style_embedder.parameters(), lr=learning_rate)
    
    
    training_pairs = [tensorsFromPair(random.choice(pairs))
                      for i in range(n_iters)]
    criterion = nn.NLLLoss()

    for iter in range(1, n_iters + 1):
        training_pair = training_pairs[iter - 1]
        input_tensor = training_pair[0]
        style_tensor = training_pair[1]
        target_tensor = training_pair[2]
        
        loss = train(input_tensor, style_tensor, target_tensor, encoder, style_embedder,
                     decoder, encoder_optimizer, style_embedder_optimizer, decoder_optimizer, criterion)
        print_loss_total += loss
        plot_loss_total += loss

        if iter % print_every == 0:
            print_loss_avg = print_loss_total / print_every
            print_loss_total = 0
            print('%s (%d %d%%) %.4f' % (timeSince(start, iter / n_iters),
                                         iter, iter / n_iters * 100, print_loss_avg))

        if iter % plot_every == 0:
            plot_loss_avg = plot_loss_total / plot_every
            plot_losses.append(plot_loss_avg)
            plot_loss_total = 0

    showPlot(plot_losses)

In [None]:
word_vec_size = 128
hidden_size = 512
style_vec_size = 128
encoder1 = EncoderRNN(input_lang.n_words, word_vec_size, hidden_size).to(device)

decoder1 = DecoderRNN(hidden_size + style_vec_size, word_vec_size, output_lang.n_words).to(device)

style_embedder1 = StyleEmbedder(2, style_vec_size).to(device)

In [None]:
trainIters(encoder1, style_embedder1, decoder1, 200000, print_every=1000)

Evaluation

In [None]:
def encoderStep_deleteOnly(encoder, style_embedder, sentence, style, max_length=MAX_LENGTH):
    input_tensor = tensorFromSentence(input_lang, sentence)
    style_tensor = tensorFromStyle(style)

    input_length = input_tensor.size()[0]
    encoder_hidden = encoder.initHidden()

    encoder_outputs = torch.zeros(max_length, encoder.hidden_size, device=device)

    for ei in range(input_length):
        encoder_output, encoder_hidden = encoder(input_tensor[ei],
                                                 encoder_hidden)
        encoder_outputs[ei] += encoder_output[0, 0]
    style_embedding = style_embedder(style_tensor)
    return torch.cat((encoder_hidden, style_embedding), 2)

In [None]:
ALPHA = 0.7

class Node:
    def __init__(self, p_t, i, d_hidden, lvl, node_parent=None):
        self.p_t = p_t
        self.i = i
        self.d_hidden = d_hidden
        self.lvl = lvl
        self.node_parent = node_parent
        
        self.p_sentence = self.sentenceProb()
    
    def prepareToDecode(self):
        d_in = self.i.squeeze()#.detatch()
        return d_in, self.d_hidden
    
    def sentenceProb(self):
        if self.node_parent == None:
            return self.p_t
        return self.node_parent.sentenceProb() + self.p_t #addition because of log
    
    def normProb(self):
        #Normalised probability - deprecated
#         return 1/((self.lvl+1)**ALPHA) * self.sentenceProb()
        #perplexity
        return torch.exp(self.sentenceProb()) ** (-1/(self.lvl+1))
    
    def getToken(self):
        return output_lang.index2word[self.i.item()]
    
    def getTokens(self):
        token = [self.getToken()]
#         print(token)
        if(self.node_parent == None):
            return token
        else:
             return self.node_parent.getTokens() + token
#         return tokens
    def getSentence(self):
        return ' '.join(self.getTokens())
# Normalize 1/num words^alpha (alpha = 0.7)

In [None]:
def evaluateWithoutBeamSearch(encoder, style_embedder, decoder, sentence, style, max_length=MAX_LENGTH):
    with torch.no_grad():
        
        decoder_input = torch.tensor([[SOS_token]], device=device)  # SOS

        decoder_hidden = encoderStep_deleteOnly(encoder, style_embedder, sentence, style)
        
        decoded_words = []

        for di in range(max_length):
            decoder_output, decoder_hidden = decoder(
                decoder_input, decoder_hidden)
            
            topv, topi = decoder_output.data.topk(1)
            
            
            if topi.item() == EOS_token:
                decoded_words.append('<EOS>')
                break
            else:
                decoded_words.append(output_lang.index2word[topi.item()])

            decoder_input = topi.squeeze().detach()

        return decoded_words

In [None]:
BEAM_WIDTH = 10

def evaluate_deleteOnly(encoder, style_embedder, decoder, sentence, style, max_length=MAX_LENGTH):
    with torch.no_grad():    
        decoder_input = torch.tensor([[SOS_token]], device=device)  # SOS

        decoder_hidden = encoderStep_deleteOnly(encoder, style_embedder, sentence, style)
        decoded_words = []

        nodes = []
        finished_nodes = []
        for di in range(max_length):
            if di == 0:
                decoder_output, decoder_hidden = decoder(decoder_input, decoder_hidden)
                top_ps, top_is = decoder_output.data.topk(BEAM_WIDTH)

                top_ps = top_ps.view(-1)
                top_is = top_is.view(-1)

                for index in range(len(top_is)):
                    p_t = top_ps[index]
                    i = top_is[index]
                    nodes.append(Node(p_t, i, decoder_hidden, di))

            else:
                prev_nodes = [x for x in nodes if x.lvl == di-1]
                prev_nodes = sorted(prev_nodes, key=lambda x: x.p_sentence.item(), reverse=True)
                prev_nodes = prev_nodes[:BEAM_WIDTH]

                nodes = []
                for node in prev_nodes:
                    decoder_output, decoder_hidden = decoder(*node.prepareToDecode())
                    top_ps, top_is = decoder_output.data.topk(BEAM_WIDTH)
                    top_ps = top_ps.view(-1)
                    top_is = top_is.view(-1)    
                    
                    for index in range(len(top_is)):
                        p_t = top_ps[index]
                        i = top_is[index]
                        child_node = Node(p_t, i, decoder_hidden, di, node)
                        
                        if i.item() == EOS_token:
                            finished_nodes.append(child_node)
                        else:
                            nodes.append(child_node)
                            
        final_node = sorted(finished_nodes, key=lambda x: x.p_sentence.item(), reverse=True)[0]
        return final_node.getSentence()
evaluate_deleteOnly(encoder1, style_embedder1, decoder1, "the food", 1)

In [None]:
def evaluateRandomly(encoder, style_embedder, decoder, n=10):
    for i in range(n):
        pair = random.choice(pairs)
        print('>', pair[0])
        print('=', pair[1])
        print('style: ', pair[2])
        
        
        output_words = evaluate_deleteOnly(encoder, style_embedder, decoder, pair[0], pair[2])
        output_sentence = ' '.join(output_words)
        
        print('<', output_sentence)
        print('')

In [None]:
evaluateRandomly(encoder1, style_embedder1, decoder1)

In [None]:
def DeleteOnly(sentence, style_src):
    return evaluate_deleteOnly(encoder1, style_embedder1, decoder1, sentence, not style_src)

In [None]:
sentence = "we sit down and we got some really slow and lazy service ."
style_src = 0
DeleteOnly(sentence, style_src)

### RetrieveOnly

In [None]:
def get_euclidean_distance(c, c2):
    return torch.dist(get_c_embedding(encoder_c, c), get_c_embedding(encoder_c, c2))

In [None]:
def RetrieveOnly(sentence, style_src):
    opposite_dataset = d_neg if style_src else d_pos

    closest_sentence = ""

    c_src = get_c(sentence, style_src)

    min_distance = -1
    for sentence_b in opposite_dataset:
        c_tgt = get_c(sentence_b, not style_src)

        dist = get_euclidean_distance(c_src, c_tgt)
        if min_distance == -1 or dist < min_distance:
            min_distance = dist
            closest_sentence = sentence_b

    return closest_sentence

In [None]:
sentence = "we sit down and we got some really slow and lazy service ."
style_src = 0
RetrieveOnly(sentence, style_src)

### Comparison

In [None]:
sentence = "we sit down and we got some really slow and lazy service ."
style_src = 0

In [None]:
TemplateBased(sentence, style_src)

In [None]:
RetrieveOnly(sentence, style_src)

In [None]:
DeleteOnly(sentence, style_src)

In [None]:
DeleteAndRetrieve(sentence, style_src)

### Classifier

In [None]:
#Since a sentiment classifier is only a small & simple component, & is nothing new, I followed a tutorial instead of doing it 100% from scratch like above code.
# https://github.com/bentrevett/pytorch-sentiment-analysis/blob/master/1%20-%20Simple%20Sentiment%20Analysis.ipynb

In [None]:
TRAIN_SPLIT_PERCENT = 0.8
from fastai.text import *

In [None]:
df = pd.DataFrame(np.random.randn(d_all['labels'].count(), 2))
msk = np.random.rand(len(df)) < TRAIN_SPLIT_PERCENT
train = d_all[msk]
test = d_all[~msk]

In [None]:
data = (TextList.from_df(train, cols='text')
                .split_by_rand_pct(0.2)
                .label_for_lm()  
                .databunch(bs=48))
data.show_batch()

In [None]:
learner = language_model_learner(data, AWD_LSTM, drop_mult=0.3)
learner.lr_find()

# we typically find the point where the slope is steepest
learner.recorder.plot()

# Fit the model based on selected learning rate
learner.fit_one_cycle(5, 1e-2, moms=(0.8,0.7))

# Tune a little more
learner.unfreeze()
learner.fit_one_cycle(5, 1e-3, moms=(0.8,0.7))

In [None]:
learner.save_encoder('fine_tuned_enc')

In [None]:
test_datalist = TextList.from_df(test, cols='text', vocab=data.vocab)

data_clas = (TextList.from_df(train, cols='text', vocab=data.vocab)
             .split_by_rand_pct(0.2)
             .label_from_df(cols= 'labels')
             .add_test(test_datalist)
             .databunch(bs=32))

data_clas.show_batch()

In [None]:
learn_classifier = text_classifier_learner(data_clas, AWD_LSTM, drop_mult=0.5)

# load the encoder saved  
learn_classifier.load_encoder('fine_tuned_enc')

learn_classifier.freeze()

# select the appropriate learning rate
learn_classifier.lr_find()

# we typically find the point where the slope is steepest
learn_classifier.recorder.plot()

# Fit the model based on selected learning rate
learn_classifier.fit_one_cycle(5, 2e-2, moms=(0.8,0.7))

# Tune a little more
learn_classifier.freeze_to(-2)
learn_classifier.fit_one_cycle(5, slice(1e-2/(2.6**4),1e-2), moms=(0.8,0.7))

# Tune a little more
learn_classifier.freeze_to(-3)
learn_classifier.fit_one_cycle(5, slice(5e-3/(2.6**4),5e-3), moms=(0.8,0.7))

learn_classifier.show_results()

In [None]:
sentence = "we sit down and we got some really slow and lazy service ."
style_src = 0

def predict_style(sentence):
    predicted_value = learn_classifier.predict(sentence)[0].data[0]
    if(predicted_value > 0.5):
        return 1
    else:
        return 0

In [None]:
predict_style("bad")

### Get Scores

In [None]:
def get_scores(func, num_tests=None):
    if(num_tests == None):
        pos_refs = d_pos_ref
        neg_refs = d_neg_ref
    else:
        pos_refs = d_pos_ref[:num_tests//2]
        neg_refs = d_neg_ref[:num_tests//2]
    
    num_correct = 0
    total = 0
    
    hypotheses = []
    references = []
    
    for pair in pos_refs:
        #positive dataset
        sentence = pair[0]
        hypothesis = func(sentence, 1)
        reference = pair[1]
        
        hypotheses.append(hypothesis)
        references.append(reference)
        
        predicted_style = predict_style(hypothesis)
        actual_style = 0
        if predicted_style == actual_style:
            num_correct +=1
        total +=1
        print("H, R, P: ", hypothesis, reference, predicted_style)
        
    for pair in neg_refs:
        #positive dataset
        sentence = pair[0]
        hypothesis = func(sentence, 0)
        reference = pair[1]
        
        hypotheses.append(hypothesis)
        references.append(reference)
        
        
        predicted_style = predict_style(hypothesis)
        actual_style = 1
        if predicted_style == actual_style:
            num_correct +=1
        total +=1
        print("H, R, P: ", hypothesis, reference, predicted_style)
    
    if (num_correct > 0):
        accuracy = num_correct / total
    else:
        accuracy = 0
        
    bleu_score = get_moses_multi_bleu(hypotheses, references, True)
    print("Accuracy: ", accuracy)
    print("BLEU: ", bleu_score)

In [None]:
get_scores(TemplateBased)

In [None]:
get_scores(RetrieveOnly)

In [None]:
get_scores(DeleteOnly)

In [None]:
get_scores(DeleteAndRetrieve)