# Initialize libraries and dataset

### Load libraries

In [9]:
from datasets import load_dataset
from transformers import AutoTokenizer, AutoModelForMaskedLM, pipeline

### Load wikitext

In [10]:
dataset = load_dataset('wikitext', 'wikitext-2-raw-v1', split='train')

Reusing dataset wikitext (/Users/thomaslemenestrel/.cache/huggingface/datasets/wikitext/wikitext-2-raw-v1/1.0.0/a241db52902eaf2c6aa732210bead40c090019a499ceb13bcbfa3f8ab646a126)


# Pre-processing

### Find the right training data example

In [4]:
example = dataset[10]['text']

print(example)

 The game 's battle system , the BliTZ system , is carried over directly from Valkyira Chronicles . During missions , players select each unit using a top @-@ down perspective of the battlefield map : once a character is selected , the player moves the character around the battlefield in third @-@ person . A character can only act once per @-@ turn , but characters can be granted multiple turns at the expense of other characters ' turns . Each character has a field and distance of movement limited by their Action Gauge . Up to nine characters can be assigned to a single mission . During gameplay , characters will call out if something happens to them , such as their health points ( HP ) getting low or being knocked out by enemy attacks . Each character has specific " Potentials " , skills unique to each character . They are divided into " Personal Potential " , which are innate skills that remain unaltered unless otherwise dictated by the story and can either help or impede a character

### Tokenizing the example

In [5]:
hf_name = 'distilbert-base-cased'

In [6]:
tokenizer     = AutoTokenizer.from_pretrained(hf_name)
encoded_input = tokenizer.tokenize(example)

print(encoded_input)

['The', 'game', "'", 's', 'battle', 'system', ',', 'the', 'B', '##li', '##T', '##Z', 'system', ',', 'is', 'carried', 'over', 'directly', 'from', 'Val', '##ky', '##ira', 'Chronicles', '.', 'During', 'missions', ',', 'players', 'select', 'each', 'unit', 'using', 'a', 'top', '@', '-', '@', 'down', 'perspective', 'of', 'the', 'battlefield', 'map', ':', 'once', 'a', 'character', 'is', 'selected', ',', 'the', 'player', 'moves', 'the', 'character', 'around', 'the', 'battlefield', 'in', 'third', '@', '-', '@', 'person', '.', 'A', 'character', 'can', 'only', 'act', 'once', 'per', '@', '-', '@', 'turn', ',', 'but', 'characters', 'can', 'be', 'granted', 'multiple', 'turns', 'at', 'the', 'expense', 'of', 'other', 'characters', "'", 'turns', '.', 'Each', 'character', 'has', 'a', 'field', 'and', 'distance', 'of', 'movement', 'limited', 'by', 'their', 'Action', 'G', '##au', '##ge', '.', 'Up', 'to', 'nine', 'characters', 'can', 'be', 'assigned', 'to', 'a', 'single', 'mission', '.', 'During', 'gameplay

### Mask the 6th token (index 5)

In [7]:
encoded_input[5] = '[MASK]'
encoded_input = ' '.join(encoded_input)

print(encoded_input)

The game ' s battle [MASK] , the B ##li ##T ##Z system , is carried over directly from Val ##ky ##ira Chronicles . During missions , players select each unit using a top @ - @ down perspective of the battlefield map : once a character is selected , the player moves the character around the battlefield in third @ - @ person . A character can only act once per @ - @ turn , but characters can be granted multiple turns at the expense of other characters ' turns . Each character has a field and distance of movement limited by their Action G ##au ##ge . Up to nine characters can be assigned to a single mission . During gameplay , characters will call out if something happens to them , such as their health points ( HP ) getting low or being knocked out by enemy attacks . Each character has specific " Po ##ten ##tial ##s " , skills unique to each character . They are divided into " Personal Po ##ten ##tial " , which are inn ##ate skills that remain un ##altered unless otherwise dictated by the

### Run distillBERT to find the word for the [MASK] token

In [8]:
unmasker = pipeline('fill-mask', model=hf_name)
unmasker(encoded_input)

[{'score': 0.20464356243610382,
  'token': 5418,
  'token_str': 'mode',
  'sequence': 'The game\'s battle mode, the B # # li # # T # # Z system, is carried over directly from Val # # ky # # ira Chronicles. During missions, players select each unit using a top @ - @ down perspective of the battlefield map : once a character is selected, the player moves the character around the battlefield in third @ - @ person. A character can only act once per @ - @ turn, but characters can be granted multiple turns at the expense of other characters\'turns. Each character has a field and distance of movement limited by their Action G # # au # # ge. Up to nine characters can be assigned to a single mission. During gameplay, characters will call out if something happens to them, such as their health points ( HP ) getting low or being knocked out by enemy attacks. Each character has specific " Po # # ten # # tial # # s ", skills unique to each character. They are divided into " Personal Po # # ten # # t