In [1]:
import os
import copy
import shutil
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib as mlp
import seaborn as sns
%matplotlib inline
from transformers import AutoTokenizer, AutoModelWithLMHead
import shap
from shap.utils import GenerateLogits
import scipy as sp
import nlp
import torch
import spacy
from shap.utils import assert_import, record_import_error, safe_isinstance, make_masks, OpChain, MaskedModel
from shap import maskers, links

In [2]:
MODEL_DIR = r'C:\Users\ryserrao\Microsoft_work\AIPLATFORM_SHAP\ilm_pretrained_model'

In [3]:
import os
import pickle

import ilm.tokenize_util

tokenizer = ilm.tokenize_util.Tokenizer.GPT2
with open(os.path.join(MODEL_DIR, 'additional_ids_to_tokens.pkl'), 'rb') as f:
    additional_ids_to_tokens = pickle.load(f)
additional_tokens_to_ids = {v:k for k, v in additional_ids_to_tokens.items()}
try:
    ilm.tokenize_util.update_tokenizer(additional_ids_to_tokens, tokenizer)
except ValueError:
    print('Already updated')
print(additional_tokens_to_ids)

{'<|startofinfill|>': 50257, '<|endofinfill|>': 50258, '<|infill_document|>': 50259, '<|infill_paragraph|>': 50260, '<|infill_sentence|>': 50261, '<|infill_ngram|>': 50262, '<|infill_word|>': 50263}


In [4]:
import torch
from transformers import GPT2LMHeadModel

device = 'cpu'
model = GPT2LMHeadModel.from_pretrained(MODEL_DIR).cuda()
_=model.eval()

In [39]:
from ilm.infer import infill_with_ilm
def infill(sentence,infill_types):
    context_ids = ilm.tokenize_util.encode(context, tokenizer)
    _blank_id = ilm.tokenize_util.encode(' _', tokenizer)[0]
    for infill_type in infill_types:
        context_ids[context_ids.index(_blank_id)] = additional_tokens_to_ids[infill_type]
    generated = infill_with_ilm(
    model,
    additional_tokens_to_ids,
    context_ids,
    num_infills=3)
    for g in generated:
        print(ilm.tokenize_util.decode(g, tokenizer))
        print('-' * 80)

In [40]:
def infill_experiment(sentence,infill_exp_types):
    for infill_exp_type in infill_exp_types:
        print(f'Experimenting with infilling type: {infill_exp_type}')
        infill(sentence,[infill_exp_type])

In [47]:
context = """
The problem is affecting people using the older versions of the PlayStation 3, called the "Fat" model.The problem isn't affecting the newer PS3 Slim systems that have been on sale since September last year. _ some users to avoid using their console for the time being."We hope to resolve this problem within the next 24 hours," a statement reads. "In the meantime, if you have a model other than the new slim PS3, we advise that you do not use your PS3 system, as doing so may result in errors in some functionality, such as recording obtained trophies, and not being able to restore certain data."We believe we have identified that this problem is being caused by a bug in the clock functionality incorporated in the system."The PlayStation Network is used by millions of people around the world.It allows users to play their friends at games like Fifa over the internet and also do things like download software or visit online stores.
""".strip()
infill_types=['<|infill_word|>','<|infill_ngram|>','<|infill_sentence|>','<|infill_paragraph|>']

In [48]:
infill_experiment(context,infill_types)

Experimenting with infilling type: <|infill_word|>
The problem is affecting people using the older versions of the PlayStation 3, called the "Fat" model.The problem isn't affecting the newer PS3 Slim systems that have been on sale since September last year. Do some users to avoid using their console for the time being."We hope to resolve this problem within the next 24 hours," a statement reads. "In the meantime, if you have a model other than the new slim PS3, we advise that you do not use your PS3 system, as doing so may result in errors in some functionality, such as recording obtained trophies, and not being able to restore certain data."We believe we have identified that this problem is being caused by a bug in the clock functionality incorporated in the system."The PlayStation Network is used by millions of people around the world.It allows users to play their friends at games like Fifa over the internet and also do things like download software or visit online stores.
----------

The problem is affecting people using the older versions of the PlayStation 3, called the "Fat" model.The problem isn't affecting the newer PS3 Slim systems that have been on sale since September last year. Most some users to avoid using their console for the time being."We hope to resolve this problem within the next 24 hours," a statement reads. "In the meantime, if you have a model other than the new slim PS3, we advise that you do not use your PS3 system, as doing so may result in errors in some functionality, such as recording obtained trophies, and not being able to restore certain data."We believe we have identified that this problem is being caused by a bug in the clock functionality incorporated in the system."The PlayStation Network is used by millions of people around the world.It allows users to play their friends at games like Fifa over the internet and also do things like download software or visit online stores.
-----------------------------------------------------------

In [22]:
context = """
The problem is affecting people using the older versions of the PlayStation 3, called the "Fat" model.The problem isn't 
affecting the newer PS3 Slim systems that have been on sale since September last year. _ _ _. "In the meantime, if you have a model other than the new slim PS3, we advise that you do not use your PS3 system, as doing so may result in errors in some functionality, such as recording obtained trophies, and not being able to restore certain data."We believe we have identified that this problem is being caused by a bug in the clock functionality incorporated in the system."The PlayStation Network is used by millions of people around the world.It allows users to play their friends at games like Fifa over the internet and also do things like download software or visit online stores.
""".strip()

context_ids = ilm.tokenize_util.encode(context, tokenizer)

# Replace blanks with appropriate tokens from left to right
_blank_id = ilm.tokenize_util.encode(' _', tokenizer)[0]
#context_ids[context_ids.index(_blank_id)] = additional_tokens_to_ids['<|infill_word|>']
#context_ids[context_ids.index(_blank_id)] = additional_tokens_to_ids['<|infill_ngram|>']
#context_ids[context_ids.index(_blank_id)] = additional_tokens_to_ids['<|infill_paragraph|>']
#context_ids[context_ids.index(_blank_id)] = additional_tokens_to_ids['<|infill_sentence|>']
context_ids[context_ids.index(_blank_id)] = additional_tokens_to_ids['<|infill_sentence|>']
context_ids[context_ids.index(_blank_id)] = additional_tokens_to_ids['<|infill_sentence|>']
context_ids[context_ids.index(_blank_id)] = additional_tokens_to_ids['<|infill_sentence|>']
print(ilm.tokenize_util.decode(context_ids, tokenizer))

The problem is affecting people using the older versions of the PlayStation 3, called the "Fat" model.The problem isn't affecting the newer PS3 Slim systems that have been on sale since September last year.<|infill_sentence|><|infill_sentence|><|infill_sentence|>. "In the meantime, if you have a model other than the new slim PS3, we advise that you do not use your PS3 system, as doing so may result in errors in some functionality, such as recording obtained trophies, and not being able to restore certain data."We believe we have identified that this problem is being caused by a bug in the clock functionality incorporated in the system."The PlayStation Network is used by millions of people around the world.It allows users to play their friends at games like Fifa over the internet and also do things like download software or visit online stores.


In [23]:


generated = infill_with_ilm(
    model,
    additional_tokens_to_ids,
    context_ids,
    num_infills=2)
for g in generated:
    print('-' * 80)
    print(ilm.tokenize_util.decode(g, tokenizer))

--------------------------------------------------------------------------------
The problem is affecting people using the older versions of the PlayStation 3, called the "Fat" model.The problem isn't affecting the newer PS3 Slim systems that have been on sale since September last year. If you play games you get injured you may experience abdominal cramps or can have to recover. The Playstation account you note your progress by setting performance as it. Hopefully if you lose track with the hack your input data until the next month.. "In the meantime, if you have a model other than the new slim PS3, we advise that you do not use your PS3 system, as doing so may result in errors in some functionality, such as recording obtained trophies, and not being able to restore certain data."We believe we have identified that this problem is being caused by a bug in the clock functionality incorporated in the system."The PlayStation Network is used by millions of people around the world.It allows 