Prototype of model explanation via LIME with help of extractive summary
---

---

In [1]:
import numpy as np

import tensorflow_datasets as tfds
import tensorflow as tf
from tensorflow import keras

import time

tfds.disable_progress_bar()

In [2]:
now = time.strftime("%Y-%m-%d_%H:%M")

In [37]:
import logging
import sys

logging.basicConfig(
    level=logging.DEBUG, 
    format='[{%(filename)s:%(lineno)d} %(levelname)s - %(message)s',
    handlers=[
        logging.FileHandler(filename=f'../../data/logs/v2-{now}.log'),
        logging.StreamHandler(sys.stdout)
    ]
)

l = logging.getLogger('prototype')
l.critical("Logging LIME with new TF model")

[{<ipython-input-37-240cd68e7195>:14} CRITICAL - Logging LIME with new TF model


---

Model-related
---

In [4]:
def load_model():
    """
    Define a function that loads a model to be explained and returns its instance
    """
    
    return keras.models.load_model("../../raw-data/lstm-model-sigmoid")    

In [5]:
model = load_model()
l.info("Model loaded")

[{<ipython-input-5-b2e056f55d37>:2} INFO - Model loaded


In [6]:
model.predict(["hahahahahahahahahaha this is the most boring film I have ever seen"])

array([[0.0706619]], dtype=float32)

In [7]:
model.predict_proba(["hahahahahahahahahaha this is the funniest film I have ever seen"])
# Even though model has function `predict_proba`, it is not sufficient for LIME
# LIME expects this predict_proba function to return probability for each of the predicted classes

Instructions for updating:
Please use `model.predict()` instead.
Instructions for updating:
Please use `model.predict()` instead.


array([[0.948632]], dtype=float32)

---

Explanation
---

#### 1. Preparation

In [8]:
from lime import lime_text

from sumy.parsers.plaintext import PlaintextParser
from sumy.nlp.tokenizers import Tokenizer
from sumy.nlp.stemmers import Stemmer
from sumy.utils import get_stop_words
from sumy.summarizers.text_rank import TextRankSummarizer

import os

from functools import reduce

In [9]:
INPUT_DIR = "/home/tomasmizera/school/diploma/src/data/reviews"

LANGUAGE = "english"
SENTENCES_COUNT = 6
TOP_FEATURES_COUNT = 100

EXPL_IX = 1
SUMMARY_IX = 0
CLASSES = ['Positive', 'Negative'] # NOTE: Order matters!

MINIMAL_WORD_WEIGHT = 0.001
HP_FACTOR = 0

In [10]:
summarizer = TextRankSummarizer(Stemmer(LANGUAGE))
summarizer.stop_words = get_stop_words(LANGUAGE)

In [11]:
explanator = lime_text.LimeTextExplainer(class_names=CLASSES)

---

### 2. Execution

In [12]:
l.info("Starting an algorithm")

[{<ipython-input-12-9e84a1d64b6b>:1} INFO - Starting an algorithm


In [13]:
# define a decorator to log execusion time
# inspired by https://medium.com/pythonhive/python-decorator-to-measure-the-execution-time-of-methods-fa04cb6bb36d

def timeit(method):
    def timed(*args, **kw):
        timed.calls += 1
        ts = time.time()
        result = method(*args, **kw)
        te = time.time()
        timed.time_taken += (te - ts) * 1000
        return result
    timed.calls = 0
    timed.time_taken = 0
    return timed

In [14]:
# @timeit # for LIME it is called once and for this model takes around 150ms with logging
def _predict_proba(_input):
    """
    Define a function that accepts array of instances and returns a probability for each class 
    _input - 1d array of instances
    Returns 2d array of [num of instances] x [num of classes] with probabilities
    """
    prediction = model.predict( _input )
    
    return np.append(prediction, 1 - prediction, axis=1)

In [15]:
def _explain_instance(_file, _explanator):
    return  _explanator.explain_instance(_file, _predict_proba, num_features=TOP_FEATURES_COUNT)

**Some examples of perturbed text**: \[ ..., ' great hm', ' great ', ' great hm', '  hm', '  ', '  hm', '  ', '  ', '  ', '  ', 'not great ', '  ', '  hm', '  ', '  ', 'not great ', '  ', ' great ', ' great ', '  hm', '  ', ' great ', 'not  hm', ' great hm', ' great ', ' great hm', '  hm', 'not  ', '  hm', 'not  ', ' great hm', 'not great ', ' great ', '  ', '  ', '  hm', '  ', '  ', '  ', 'not  hm', 'not  ', 'not great ', 'not  hm', 'not  hm', 'not great ', 'not  hm', '  ', ' great ', '  ', '  hm', 'not  hm', '  ', '  ', ' great ', '  ', '  ', ' great ', 'not  ', 'not  hm', ' great ', 'not  ', 'not  ', 'not  hm', 'not  ', '  ', 'not great ', '  hm', ' great hm', '  hm', '  ', '  ', 'not  hm', '  hm', 'not  ', '  ', ' great hm', 'not  ', ' great hm', 'not  hm', 'not  ', 'not  hm', '  ', ' great ', '  hm', ' great ', 'not  hm', 'not  ', 'not  ', ' great hm', 'not  hm', '  hm', '  hm', ' great ', '  ', 'not great ', '  hm', 'not great ', '  ', '  ', 'not  hm', 'not great ', '  ', '  ', ' great hm', 'not  hm', 'not  hm', ' great ', ' great hm', '  ', 'not  hm', ' great ', '  hm', ' great ', 'not great ', '  ', '  ', ' great hm', ' great hm', '  ', ' great ', 'not  hm', ' great ', 'not  ', ' great ', 'not great ', ' great ', 'not  ', 'not great ', '  ', '  ', '  ', 'not  ', ' great hm', ' great hm', '  ', '  ', 'not great ', '  ', '  ', 'not  ', ' great ', ' great ', 'not great ', '  ', '  ', '  ', '  ', '  ', '  ', '  hm', '  hm', 'not  ', 'not  hm', 'not  ', '  ', ' great ', '  hm', ' great hm', '  ', '  ', '  ', ' great hm', '  ', '  ', 'not  hm', '  ', ' great hm', ' great hm', ' great ', '  ', '  ', 'not  ', '  ', 'not  ', ' great hm', 'not great ', ' great hm', 'not  hm', 'not great ', 'not  ', 'not great ', '  ', 'not great ', '  hm', 'not  ', ' great ', '  ', '  ', ' great ', '  hm', 'not  ', 'not  ', 'not  ', '  ', ' great hm', ' great hm', ' great ', ' great ', 'not great ', ' great ', '  ', 'not great ', 'not great ', '  ', 'not  ', ' great ', ' great ', '  ', '  ', ' great ', 'not  ', ' great ', '  ', '  ', ' great ', 'not  hm', 'not  ', '  ', 'not great ', '  ', '  ', '  ', 'not great ', 'not great ', '  ', '  hm', '  hm', 'not  hm', 'not great ', ' great ', 'not  ', '  ', 'not  hm', 'not great ', 'not  ', 'not great ', 'not  hm', 'not  ', 'not  hm', ' great hm', ' great ', '  hm', '  ', '  hm', '  ', 'not  ', ' great ', '  ', '  hm', 'not  hm', 'not great ', '  ', '  ', ' great ', '  ', '  ', 'not  ', 'not  ', ' great ', '  ', 'not  hm', '  ', ' great hm', '  ', '  ', '  ', ' great ', ' great ', ' great ', ' great ', ' great ', '  hm', 'not  ', ' great ', 'not  hm', '  ', ' great hm', 'not great ', '  hm', '  hm', 'not  hm', 'not  ', 'not great ', '  ', '  ', '  ', 'not  ', '  ', ' great ', '  ', 'not great ', '  ', 'not  ', '  ', '  hm', ' great ', '  ', 'not  ', ' great hm', '  ', ' great ', 'not great ', '  ']

In [16]:
def input_from_files(path_to_files):
    """
    Loads all readable files in path_to_files directory
    Returns np.array with each files content as a separate element
    """
    
    def _read_text_file(filepath):
        with open(filepath, 'r') as f:
            return reduce(lambda a, b: a + b, f.readlines())
    
    files_it = os.scandir(path_to_files)
    files_contents = {}
    
    for file in files_it:
        if file.is_file(): 
            files_contents[file.name] = _read_text_file(file.path)
        
    return files_contents

In [17]:
npInput = input_from_files(INPUT_DIR)

In [18]:
list(npInput.keys())

['review-low.txt-test-setup',
 'review-med.txt-test-setup',
 'review-med.txt-test',
 'review-low.txt',
 'review-top.txt-test',
 'review-med.txt',
 'review-top.txt',
 'review-low.txt-test',
 'review-top.txt-test-setup']

In [36]:
def _summarize_doc_custom(_summarizer, _instance, _explanation):
    """
    Returns summary with altered weights based on explanation
    _summarizer - summy summarizer instance
    _instance - instance content string
    _explanation - LIME explanation
    """
    
    def _create_weight_graph(_summarizer, _instance_doc):
        return _summarizer.rate_sentences(_instance_doc)
    
    def _count_factor(_sentence, _explanation_words_weight) -> float: # returns boosting factor for sentence
        factor = 1.0
        exp_words = list(map(lambda x: x[0], _explanation_words_weight))
        for word in _sentence.words:
            if word in exp_words:
                factor += HP_FACTOR * abs(_explanation_words_weight[exp_words.index(word)][1])
        return factor # factor * 3 if factor != 1 else factor # TODO: Parameter tuning for factor scale
    
    parser = PlaintextParser.from_string(_instance, Tokenizer(LANGUAGE))
    graph = _create_weight_graph(_summarizer, parser.document)
    l.info("Using factor: " + str(HP_FACTOR))
    for sentence in graph.keys():
        factor = _count_factor(sentence, _explanation.as_list())
        graph[sentence] = graph[sentence] * factor 
        
    resulting_summary = _summarizer._get_best_sentences(parser.document.sentences, SENTENCES_COUNT, graph)
    
    return resulting_summary

In [20]:
def _summary_to_string(_summary):
    if len(_summary) <= 0:
        return ""
    
    summary_str = str(_summary[0])
    i = 1
    
    while(i < len(_summary)):
        summary_str += ' ' + str(_summary[i])
        i += 1
        
    return summary_str

In [21]:
def create_explanation_summary(_instance, _explanator, _summarizer):
    """
    Returns explanation summary for instance and words
    """
    
    explanation = _explain_instance(_instance, _explanator)
    summary = _summarize_doc_custom(_summarizer, _instance, explanation)
    return (_summary_to_string(summary), explanation.as_list())
    

def create_explanation_summaries(_instance_map, _explanator, _summarizer):
    """
    Returns summaries for all input elements
    _instance_map - map containing instance name and its content
    _explanator - LIME explanator instance
    _summarizer - summy summarizer instance
    """
    
    summaries = {}
    
    for instance in _instance_map.keys():
        summaries[instance] = create_explanation_summary(_instance_map[instance], _explanator, _summarizer)

    return summaries

In [22]:
def create_simple_summary(_instance, _summarizer):
    """
    Returns summary from instance
    """
    parser = PlaintextParser.from_string(_instance, Tokenizer(LANGUAGE))
    return _summary_to_string(_summarizer(parser.document, SENTENCES_COUNT))

def create_simple_summaries(_instance_map, _summarizer):
    """
    Returns summaries for all input instances
    _instance_map - map containing instance name and its content
    _summarizer - summy summarizer instance
    """
    
    summaries = {}
    
    for instance in _instance_map.keys():
        summaries[instance] = create_simple_summary(_instance_map[instance], _summarizer)
    
    return summaries

In [23]:
explanation_sums = create_explanation_summaries(npInput, explanator, summarizer);

--- Logging error ---
Traceback (most recent call last):
  File "/usr/lib/python3.8/logging/__init__.py", line 1081, in emit
    msg = self.format(record)
  File "/usr/lib/python3.8/logging/__init__.py", line 925, in format
    return fmt.format(record)
  File "/usr/lib/python3.8/logging/__init__.py", line 664, in format
    record.message = record.getMessage()
  File "/usr/lib/python3.8/logging/__init__.py", line 369, in getMessage
    msg = msg % self.args
TypeError: not all arguments converted during string formatting
Call stack:
  File "/usr/lib/python3.8/runpy.py", line 194, in _run_module_as_main
    return _run_code(code, main_globals, None,
  File "/usr/lib/python3.8/runpy.py", line 87, in _run_code
    exec(code, run_globals)
  File "/home/tomasmizera/school/diploma/src/env/lib/python3.8/site-packages/ipykernel_launcher.py", line 16, in <module>
    app.launch_new_instance()
  File "/home/tomasmizera/school/diploma/src/env/lib/python3.8/site-packages/traitlets/config/applicati

In [24]:
simple_sums = create_simple_summaries(npInput, summarizer);

Summary visualization
---

---

In [25]:
import re
import matplotlib.pyplot as plt
from IPython.core.display import display, HTML

def highlight_summary(_summary, _class_names = None, _title = None):
    """
    Highlights important words from LIME explanation
    Display content immediately and do not return anything
    """
    
    # Normalize values to custom bounds https://stackoverflow.com/a/48109733/7875594
    def normalize(values, bounds):
        if bounds['actual']['upper'] == bounds['actual']['lower']:
            return values
        return [bounds['desired']['lower'] + (abs(x) - bounds['actual']['lower']) * (bounds['desired']['upper'] - bounds['desired']['lower']) / 
                (bounds['actual']['upper'] - bounds['actual']['lower']) for x in values]

    colors = {}
    colors[0] = '16,171,232' # blue
    colors[1] = '199,175,16' # yellow-ish
    
    start_highlight_tag = lambda col, a: f'<mark style="background-color:rgba({colors[col]},{a});">'
    end_highlight_tag = '</mark>'
    
    raw_text = _summary[SUMMARY_IX]
    important_words_weights = _summary[EXPL_IX]
    important_words = list(map(lambda x: x[0], important_words_weights))
    maxv = round(abs(important_words_weights[0][1]), 5)
    minv = round(abs(important_words_weights[-1][1]), 5) # Here take abs for alpha calculations

    weights = list(map(lambda x: x[1], important_words_weights))
    upper_bound = 1
    lower_bound = 0.2
    # normalize weights to <1, 0.2> range to be usable as alpha color channel
    norm_weights = normalize(weights, {'actual': {'lower':minv, 'upper':maxv}, 'desired':{'lower':lower_bound, 'upper':upper_bound}})
    
    title = "<h2>Summary</h2>"
    if _title is not None:
        title = f'<h2>Summary: {_title}</h2>'

    legend = ""
    if _class_names is not None:
        legend = "<h4>Legend</h4>"
        legend += f'<span style="color:rgb({colors[0]});font-weight:bold">' + _class_names[0] + "</span><br>"
        legend += f'Most significant <canvas width="200" height="10" style="border:1px solid #000000; background-image: linear-gradient(to left, rgba({colors[0]},{lower_bound}), rgba({colors[0]},{upper_bound}));"></canvas> Least significant'
        legend += "<br>"
        legend += f'<span style="color:rgb({colors[1]});font-weight:bold">' + _class_names[1] + "</span><br>"
        legend += f'Most significant <canvas width="200" height="10" style="border:1px solid #000000; background-image: linear-gradient(to left, rgba({colors[1]},{lower_bound}), rgba({colors[1]},{upper_bound}));"></canvas> Least significant'
        legend += ""
    
    for ix, word in enumerate(important_words):
        wx = important_words_weights[ix][1]
        
        if abs(wx) < MINIMAL_WORD_WEIGHT:
            continue
            
        col = 1 if wx >= 0 else 0
        alpha = norm_weights[ix]
        
        # https://regex101.com/r/nNu7Rs/1
        pattern = r'(?<![><(=")\/])\b(' + word + r')\b(?!(:rgba)|(="back))'
        
        if word.isnumeric():
            # https://regex101.com/r/ZP4VV1/1
            pattern = r'(?<!\(|,)\b' + word + r'\b(?!\)|,)'
            
        raw_text = re.sub(pattern, start_highlight_tag(col, alpha) + word + end_highlight_tag, raw_text, flags=re.I)
    
        
    result = title + legend + '<h3>Text</h3>' + raw_text + '<p style="margin-bottom:1cm;"></p>'

    display(HTML(result))
    print(important_words_weights)

In [26]:
highlight_summary(explanation_sums['review-top.txt'], _class_names=CLASSES)

[('and', -0.12360869477749314), ('s', 0.09084782910133374), ('I', -0.06045640875081793), ('is', -0.05508562248025823), ('band', -0.05446852919743163), ('the', -0.04340921464769811), ('a', -0.04140570063269813), ('White', -0.04040556535179805), ('just', 0.03865039728158772), ('as', -0.037717587028933124), ('will', -0.037663202202166325), ('to', 0.03765804553587961), ('no', 0.036759445464938284), ('Rainey', 0.034683685101470914), ('it', -0.0346758681956784), ('element', -0.0345301253657332), ('bit', -0.0339521830869772), ('underrated', -0.03364845945683605), ('also', -0.03349725736888776), ('monologue', -0.03340889148670543), ('own', -0.03315278993350857), ('more', -0.031841215818146286), ('soul', -0.03170468903677064), ('studio', -0.031075711621125008), ('freedom', -0.03048879160980103), ('record', -0.030417526016294688), ('great', -0.030036586941750827), ('of', 0.029151612962679565), ('facing', -0.029085235131124522), ('deal', -0.028387102578291076), ('him', -0.028077825978579087), ('k

In [27]:
# longest instance from testing data
longest = 'Match 1: Tag Team Table Match Bubba Ray and Spike Dudley vs Eddie Guerrero and Chris Benoit Bubba Ray and Spike Dudley started things off with a Tag Team Table Match against Eddie Guerrero and Chris Benoit. According to the rules of the match, both opponents have to go through tables in order to get the win. Benoit and Guerrero heated up early on by taking turns hammering first Spike and then Bubba Ray. A German suplex by Benoit to Bubba took the wind out of the Dudley brother. Spike tried to help his brother, but the referee restrained him while Benoit and Guerrero ganged up on him in the corner. With Benoit stomping away on Bubba, Guerrero set up a table outside. Spike dashed into the ring and somersaulted over the top rope onto Guerrero on the outside! After recovering and taking care of Spike, Guerrero slipped a table into the ring and helped the Wolverine set it up. The tandem then set up for a double superplex from the middle rope which would have put Bubba through the table, but Spike knocked the table over right before his brother came crashing down! Guerrero and Benoit propped another table in the corner and tried to Irish Whip Spike through it, but Bubba dashed in and blocked his brother. Bubba caught fire and lifted both opponents into back body drops! Bubba slammed Guerrero and Spike stomped on the Wolverine from off the top rope. Bubba held Benoit at bay for Spike to soar into the Wassup! headbutt! Shortly after, Benoit latched Spike in the Crossface, but the match continued even after Spike tapped out. Bubba came to his brother\'s rescue and managed to sprawl Benoit on a table. Bubba leapt from the middle rope, but Benoit moved and sent Bubba crashing through the wood! But because his opponents didn\'t force him through the table, Bubba was allowed to stay in the match. The first man was eliminated shortly after, though, as Spike put Eddie through a table with a Dudley Dawg from the ring apron to the outside! Benoit put Spike through a table moments later to even the score. Within seconds, Bubba nailed a Bubba Bomb that put Benoit through a table and gave the Dudleys the win! Winner: Bubba Ray and Spike Dudley<br /><br />Match 2: Cruiserweight Championship Jamie Noble vs Billy Kidman Billy Kidman challenged Jamie Noble, who brought Nidia with him to the ring, for the Cruiserweight Championship. Noble and Kidman locked up and tumbled over the ring, but raced back inside and grappled some more. When Kidman thwarted all Noble\'s moves, Noble fled outside the ring where Nidia gave him some encouragement. The fight spread outside the ring and Noble threw his girlfriend into the challenger. Kidman tossed Nidia aside but was taken down with a modified arm bar. Noble continued to attack Kidman\'s injured arm back in the ring. Kidman\'s injured harm hampered his offense, but he continued to battle hard. Noble tried to put Kidman away with a powerbomb but the challenger countered into a facebuster. Kidman went to finish things with a Shooting Star Press, but Noble broke up the attempt. Kidman went for the Shooting Star Press again, but this time Noble just rolled out of harm\'s way. Noble flipped Kidman into a power bomb soon after and got the pin to retain his WWE Cruiserweight Championship! Winner: Jamie Noble<br /><br />Match 3: European Championship William Regal vs Jeff Hardy William Regal took on Jeff Hardy next in an attempt to win back the European Championship. Jeff catapulted Regal over the top rope then took him down with a hurracanrana off the ring apron. Back in the ring, Jeff hit the Whisper in the wind to knock Regal for a loop. Jeff went for the Swanton Bomb, but Regal got his knees up to hit Jeff with a devastating shot. Jeff managed to surprise Regal with a quick rollup though and got the pin to keep the European Championship! Regal started bawling at seeing Hardy celebrate on his way back up the ramp. Winner: Jeff Hardy<br /><br />Match 4: Chris Jericho vs John Cena Chris Jericho had promised to end John Cena\'s career in their match at Vengeance, which came up next. Jericho tried to teach Cena a lesson as their match began by suplexing him to the mat. Jericho continued to knock Cena around the ring until his cockiness got the better of him. While on the top rope, Jericho began to showboat and allowed Cena to grab him for a superplex! Cena followed with a tilt-a-whirl slam but was taken down with a nasty dropkick to the gut. The rookie recovered and hit a belly to belly suplex but couldn\'t put Y2J away. Jericho launched into the Lionsault but Cena dodged the move. Jericho nailed a bulldog and then connected on the Lionsault, but did not go for the cover. He goaded Cena to his feet so he could put on the Walls of Jericho. Cena had other ideas, reversing the move into a pin attempt and getting the 1-2-3! Jericho went berserk after the match. Winner: John Cena<br /><br />Match 5: Intercontinental Championship RVD vs Brock Lesnar via disqualification The Next Big Thing and Mr. Pay-Per-View tangled with the Intercontinental Championship on the line. Brock grabbed the title from the ref and draped it over his shoulder momentarily while glaring at RVD. Van Dam \'s quickness gave Brock fits early on. The big man rolled out of the ring and kicked the steel steps out of frustration. Brock pulled himself together and began to take charge. With Paul Heyman beaming at ringside, Brock slammed RVD to the hard floor outside the ring. From there, Brock began to overpower RVD, throwing him with ease over the top rope. RVD landed painfully on his back, then had to suffer from having his spine cracked against the steel ring steps. The fight returned to the ring with Brock squeezing RVD around the ribs. RVD broke away and soon after leveled Brock with a kick to the temple. RVD followed with the Rolling Thunder but Brock managed to kick out after a two-count. The fight looked like it might be over soon as RVD went for a Five-Star Frog Splash. Brock, though, hoisted Van Dam onto his shoulder and went for the F-5, but RVD whirled Brock into a DDT and followed with the Frog Splash! He went for the pin, but Heyman pulled the ref from the ring! The ref immediately called for a disqualification and soon traded blows with Heyman! After, RVD leapt onto Brock from the top rope and then threatened to hit the Van Terminator! Heyman grabbed RVD\'s leg and Brock picked up the champ and this time connected with the F-5 onto a steel chair! Winner: RVD<br /><br />Match 6: Booker T vs the Big Show Booker T faced the Big Show one-on-one next. Show withstood Booker T\'s kicks and punches and slapped Booker into the corner. After being thrown from the ring, Booker picked up a chair at ringside, but Big Show punched it back into Booker\'s face. Booker tried to get back into the game by choking Show with a camera cable at ringside. Booker smashed a TV monitor from the Spanish announcers\' position into Show\'s skull, then delivered a scissors kick that put both men through the table! Booker crawled back into the ring and Big Show staggered in moments later. Show grabbed Booker\'s throat but was met by a low blow and a kick to the face. Booker climbed the top rope and nailed a somersaulting leg drop to get the pin! Winner: Booker T<br /><br />Announcement: Triple H entered the ring to a thunderous ovation as fans hoped to learn where The Game would end up competing. Before he could speak, Eric Bishoff stopped The Game to apologize for getting involved in his personal business. If Triple H signed with RAW, Bischoff promised his personal life would never come into play again. Bischoff said he\'s spent the past two years networking in Hollywood. He said everyone was looking for the next breakout WWE Superstar, and they were all talking about Triple H. Bischoff guaranteed that if Triple H signed with RAW, he\'d be getting top opportunities coming his way. Stephanie McMahon stepped out to issue her own pitch. She said that because of her personal history with Triple H, the two of them know each other very well. She said the two of them were once unstoppable and they can be again. Bischoff cut her off and begged her to stop. Stephanie cited that Triple H once told her how Bischoff said Triple H had no talent and no charisma. Bischoff said he was young at the time and didn\'t know what he had, but he still has a lot more experience that Stephanie. The two continued to bicker back and forth, until Triple H stepped up with his microphone. The Game said it would be easy to say "screw you" to either one of them. Triple H went to shake Bischoff\'s hand, but pulled it away. He said he would rather go with the devil he knows, rather than the one he doesn\'t know. Before he could go any further, though, Shawn Michaels came out to shake things up. HBK said the last thing he wanted to do was cause any trouble. He didn\'t want to get involved, but he remembered pledging to bring Triple H to the nWo. HBK said there\'s nobody in the world that Triple H is better friends with. HBK told his friend to imagine the two back together again, making Bischoff\'s life a living hell. Triple H said that was a tempting offer. He then turned and hugged HBK, making official his switch to RAW! Triple H and HBK left, and Bischoff gloated over his victory. Bischoff said the difference between the two of them is that he\'s got testicles and she doesn\'t. Stephanie whacked Bischoff on the side of the head and left!<br /><br />Match 7: Tag Team Championship Match Christian and Lance Storm vs Hollywood Hogan and Edge The match started with loud "USA" chants and with Hogan shoving Christian through the ropes and out of the ring. The Canadians took over from there. But Edge scored a kick to Christian\'s head and planted a facebuster on Storm to get the tag to Hogan. Hogan began to Hulk up and soon caught Christian with a big boot and a leg drop! Storm broke up the count and Christian tossed Hogan from the ring where Storm superkicked the icon. Edge tagged in soon after and dropped both opponents. He speared both of them into the corner turnbuckles, but missed a spear on Strom and hit the ref hard instead. Edge nailed a DDT, but the ref was down and could not count. Test raced down and took down Hogan then leveled Edge with a boot. Storm tried to get the pin, but Edge kicked out after two. Riksihi sprinted in to fend off Test, allowing Edge to recover and spear Storm. Christian distracted the ref, though, and Y2J dashed in and clocked Edge with the Tag Team Championship! Storm rolled over and got the pinfall to win the title! Winners and New Tag Team Champions: Christian and Lance Storm<br /><br />Match 8: WWE Undisputed Championship Triple Threat Match. The Rock vs Kurt Angle and the Undertaker Three of WWE\'s most successful superstars lined up against each other in a Triple Threat Match with the Undisputed Championship hanging in the balance. Taker and The Rock got face to face with Kurt Angle begging for some attention off to the side. He got attention in the form of a beat down form the two other men. Soon after, Taker spilled out of the ring and The Rock brawled with Angle. Angle gave a series of suplexes that took down Rock, but the Great One countered with a DDT that managed a two-count. The fight continued outside the ring with Taker coming to life and clotheslining Angle and repeatedly smacking The Rock. Taker and Rock got into it back into the ring, and Taker dropped The Rock with a sidewalk slam to get a two-count. Rock rebounded, grabbed Taker by the throat and chokeslammed him! Angle broke up the pin attempt that likely would have given The Rock the title. The Rock retaliated by latching on the ankle lock to Kurt Angle. Angle reversed the move and Rock Bottomed the People\'s Champion. Soon after, The Rock disposed of Angle and hit the People\'s Elbow on the Undertaker. Angle tried to take advantage by disabling the Great One outside the ring and covering Taker, who kicked out after a two count. Outside the ring, Rock took a big swig from a nearby water bottle and spewed the liquid into Taker\'s face to blind the champion. Taker didn\'t stay disabled for long, and managed to overpower Rock and turn his attention to Angle. Taker landed a guillotine leg drop onto Angle, laying on the ring apron. The Rock picked himself up just in time to break up a pin attempt on Kurt Angle. Taker nailed Rock with a DDT and set him up for a chokeslam. ANgle tried sneaking up with a steel chair, but Taker caught on to that tomfoolery and smacked it out of his hands. The referee got caught in the ensuing fire and didn\'t see Angle knock Taker silly with a steel chair. Angle went to cover Taker as The Rock lay prone, but the Dead Man somehow got his shoulder up. Angle tried to pin Rock, but he too kicked out. The Rock got up and landed Angle in the sharpshooter! Angle looked like he was about to tap, but Taker kicked The Rock out of the submission hold. Taker picked Rock up and crashed him with the Last Ride. While the Dead Man covered him for the win, Angle raced in and picked Taker up in the ankle lock! Taker went delirious with pain, but managed to counter. He picked Angle up for the last ride, but Angle put on a triangle choke! It looked like Taker was about to pass out, but The Rock broke Angle\'s hold only to find himself caught in the ankle lock. Rock got out of the hold and watched Taker chokeslam Angle. Rocky hit the Rock Bottom, but Taker refused to go down and kicked out. Angle whirled Taker up into the Angle Slam but was Rock Bottomed by the Great One and pinned! Winner and New WWE Champion: The Rock<br /><br />~Finally there is a decent PPV! Lately the PPV weren\'t very good, but this one was a winner. I give this PPV a A-<br /><br />'

---

In [29]:
# %%timeit
# model.predict(list(npInput.values()))

In [30]:
# %%timeit
#for ins in list(npInput.values()):
#     model.predict([ins])

In [31]:
# %%timeit
# create_explanation_summaries(npInput, explanator, summarizer)

In [32]:
# %%timeit
# val = npInput.values()
# for v in val:
#     create_explanation_summary(v, explanator, summarizer)

---

In [33]:
def testLimeStability():    
    outs = np.array([])
    for _ in range(10):
        explanation_sums = create_explanation_summaries(inp, explanator, summarizer);
        outs = np.append(outs, model.predict([explanation_sums['review-med'][0]]))

    return outs

In [34]:
def experiment_pipeline():
    global HP_FACTOR
    l.info("Another pilot experiment - v2")
    
    import time

    start = time.time()
    
    # read input data
    inp = input_from_files(INPUT_DIR)
    
    # load model
    model = load_model()
    
    for fp in range(100):
        HP_FACTOR = fp
        
        l.info("Started working on factor: " + str(HP_FACTOR))
        # let model create summaries - both
        csummary = create_explanation_summaries(inp, explanator, summarizer)
        ssummary = create_simple_summaries(inp, summarizer)
        modelp = model.predict(list(inp.values()))

        csummary_texts = list(map( lambda x: x[SUMMARY_IX], csummary.values()))
        ssummary_texts = list(ssummary.values())

        mcsummaries = model.predict(csummary_texts)
        mssummaries = model.predict(ssummary_texts)

        l.info("Created summaries for factor: " + str(HP_FACTOR))
        
        assert( all([len(modelp) == len(ssummary), 
                    len(ssummary) == len(csummary),
                    len(csummary_texts) == len(ssummary),
                    len(mssummaries) == len(ssummary_texts)]) )

        modelp = np.append(modelp, mcsummaries, axis=1)
        modelp = np.append(modelp, mssummaries, axis=1)

        np.savetxt(f'../../data/logs/experiment/v2/v2-{fp}.csv', modelp, fmt='%1.5f', header="originalP,customSP,simpleSP")

    # log model prediction on summaries texts
    # log current HP_FACTOR
    end = time.time()
    print(end - start)
    l.info(f'time taken {end-start}')


In [38]:
INPUT_DIR = "/home/tomasmizera/school/diploma/src/data/reviews/only-long"
experiment_pipeline()

[{<ipython-input-34-b77a3c7de55e>:3} INFO - Another pilot experiment - v2
[{<ipython-input-34-b77a3c7de55e>:18} INFO - Started working on factor: 0
[{<ipython-input-36-030d4ae25f20>:22} INFO - Using factor: 0
[{<ipython-input-36-030d4ae25f20>:22} INFO - Using factor: 0
[{<ipython-input-36-030d4ae25f20>:22} INFO - Using factor: 0
[{<ipython-input-34-b77a3c7de55e>:30} INFO - Created summaries for factor: 0
[{<ipython-input-34-b77a3c7de55e>:18} INFO - Started working on factor: 1
[{<ipython-input-36-030d4ae25f20>:22} INFO - Using factor: 1
[{<ipython-input-36-030d4ae25f20>:22} INFO - Using factor: 1
[{<ipython-input-36-030d4ae25f20>:22} INFO - Using factor: 1
[{<ipython-input-34-b77a3c7de55e>:30} INFO - Created summaries for factor: 1
[{<ipython-input-34-b77a3c7de55e>:18} INFO - Started working on factor: 2
[{<ipython-input-36-030d4ae25f20>:22} INFO - Using factor: 2
[{<ipython-input-36-030d4ae25f20>:22} INFO - Using factor: 2
[{<ipython-input-36-030d4ae25f20>:22} INFO - Using factor: 2
[

KeyboardInterrupt: 

---

## Training data

In [None]:
import tensorflow_datasets as tfds

In [None]:
train_data, validation_data, test_data = tfds.load(
    name="imdb_reviews", 
    split=('train[:60%]', 'train[60%:]', 'test'),
    as_supervised=True)

In [None]:
len(test_data)

---

### TODO:
- [x] find a good pytorch/tf LSTM text classification model ~ maybe check datasets in LIME paper
- [x] create predict_proba based on the type of the framework
- [ ] allow hyperparameter tuning
- [ ] predict on created summaries ~ automatically -> (possible: save summaries to files and then load and pass them just as normal instance) 
- [ ] add better logging (more logs in this version)
- [ ] build and test quantitative experiment pipeline
- [ ] add prediction of model to visualization
- [ ] ~~refactor process to not store everything in RAM, rather put intermediate results to files~~
- [x] highlighting of important words from any summary (maybe save both, str summary and Sentence type summary - from sumy)
- [ ] extract it to separate script ?
- [ ] maybe find better dataset (longer texts) for data and train another model for it
- [ ] run quantitative experiment on all instances
- [ ] pick several (~6) explanations for user-study