# Import Dependencies

In [8]:
import torch
import textwrap
import numpy as np 
import pandas as pd 
from pprint import pprint
from transformers import pipeline

# Load/Download Sentiment Analysis Pipeline

In [12]:
filler = pipeline("fill-mask", device="cuda:0")
type(filler)

No model was supplied, defaulted to distilroberta-base and revision ec58a5b (https://huggingface.co/distilroberta-base).
Using a pipeline without specifying a model name and revision in production is not recommended.


transformers.pipelines.fill_mask.FillMaskPipeline

# Perform Some Test Cases

In [13]:
'''
"Two roads diverged in a yellow wood,",
"And sorry I could not travel both",
"And be one traveler, long I stood",
"And looked down one as far as I could",
"To where it bent in the undergrowth; "

'''

sample_texts = [
        "Two roads <mask> in a yellow wood,",
        "And sorry I could not <mask> both",
        "And be one traveler, long I <mask>",
        "And <mask> down one as far as I could",
        "To where it bent in the <mask>; "
        ]

filled_masks = filler(sample_texts)
filled_masks

[[{'score': 0.20124192535877228,
   'token': 8144,
   'token_str': ' wrapped',
   'sequence': 'Two roads wrapped in a yellow wood,'},
  {'score': 0.11673210561275482,
   'token': 2913,
   'token_str': ' covered',
   'sequence': 'Two roads covered in a yellow wood,'},
  {'score': 0.03971727192401886,
   'token': 10122,
   'token_str': ' painted',
   'sequence': 'Two roads painted in a yellow wood,'},
  {'score': 0.029909003525972366,
   'token': 3558,
   'token_str': ' ending',
   'sequence': 'Two roads ending in a yellow wood,'},
  {'score': 0.021850239485502243,
   'token': 30307,
   'token_str': ' draped',
   'sequence': 'Two roads draped in a yellow wood,'}],
 [{'score': 0.08102558553218842,
   'token': 109,
   'token_str': ' do',
   'sequence': 'And sorry I could not do both'},
  {'score': 0.07285445928573608,
   'token': 3679,
   'token_str': ' handle',
   'sequence': 'And sorry I could not handle both'},
  {'score': 0.06665847450494766,
   'token': 4960,
   'token_str': ' afford'

# LOAD DATA

In [14]:
df = pd.read_csv('data/bbc_text_cls.csv')
df

Unnamed: 0,text,labels
0,Ad sales boost Time Warner profit\n\nQuarterly...,business
1,Dollar gains on Greenspan speech\n\nThe dollar...,business
2,Yukos unit buyer faces loan claim\n\nThe owner...,business
3,High fuel prices hit BA's profits\n\nBritish A...,business
4,Pernod takeover talk lifts Domecq\n\nShares in...,business
...,...,...
2220,BT program to beat dialler scams\n\nBT is intr...,tech
2221,Spam e-mails tempt net shoppers\n\nComputer us...,tech
2222,Be careful how you code\n\nA new European dire...,tech
2223,US cyber security chief resigns\n\nThe man mak...,tech


In [17]:
def wrap(x):
    return textwrap.fill(
                        x, 
                        replace_whitespace=False, 
                        fix_sentence_endings=True
                        )

In [18]:
df = df[df.labels == 'business']
df

Unnamed: 0,text,labels
0,Ad sales boost Time Warner profit\n\nQuarterly...,business
1,Dollar gains on Greenspan speech\n\nThe dollar...,business
2,Yukos unit buyer faces loan claim\n\nThe owner...,business
3,High fuel prices hit BA's profits\n\nBritish A...,business
4,Pernod takeover talk lifts Domecq\n\nShares in...,business
...,...,...
505,Trial begins of Spain's top banker\n\nThe tria...,business
506,UK economy ends year with spurt\n\nThe UK econ...,business
507,HealthSouth ex-boss goes on trial\n\nThe forme...,business
508,Euro firms miss out on optimism\n\nMore than 9...,business


In [20]:
# randomly pick a text 
article = df.text.iloc[np.random.choice(len(df))]
print(wrap(article))

Qantas sees profits fly to record

Australian airline Qantas has
posted a record fiscal first-half profit thanks to cost-cutting
measures.

Net profit in the six months ending 31 December rose 28% to
A$458.4m ($357.6m; £191m) from a year earlier.  Analysts expected a
figure closer to A$431m.  Qantas shares fell almost 3%, however, after
it warned that earnings growth would slow in the second half.  Sales
will dip by at least A$30m after the Indian ocean tsunami devastated
many holiday destinations, Qantas said.

"The tsunami affected travel
patterns in ways that we were a bit surprised about," chief executive
Geoff Dixon explained.  "It certainly affected Japanese travel into
Australia.  As soon as the tsunami hit we saw ... a lessening with
bookings for Australia."  Higher fuel costs also are expected to eat
into earnings in coming months.  "We don't have as much hedging
benefit in the second half as we had in the first," said chief
financial officer Peter Gregg.  Qantas is facing inc

# Predictions on Poem

In [21]:
filler('Qantas sees profits fly to <mask>')

[{'score': 0.3668358325958252,
  'token': 4276,
  'token_str': ' zero',
  'sequence': 'Qantas sees profits fly to zero'},
 {'score': 0.09420368820428848,
  'token': 2576,
  'token_str': ' bottom',
  'sequence': 'Qantas sees profits fly to bottom'},
 {'score': 0.02868789993226528,
  'token': 40186,
  'token_str': ' infinity',
  'sequence': 'Qantas sees profits fly to infinity'},
 {'score': 0.02523326873779297,
  'token': 19728,
  'token_str': ' heel',
  'sequence': 'Qantas sees profits fly to heel'},
 {'score': 0.014089357107877731,
  'token': 8680,
  'token_str': ' profitability',
  'sequence': 'Qantas sees profits fly to profitability'}]

In [22]:
text = 'Qantas sees profits fly to zero' + \
  'Australian airline <mask> has posted a record fiscal first-half profit thanks to cost-cutting measures'

filler(text)

[{'score': 0.11308567970991135,
  'token': 5364,
  'token_str': ' operator',
  'sequence': 'Qantas sees profits fly to zeroAustralian airline operator has posted a record fiscal first-half profit thanks to cost-cutting measures'},
 {'score': 0.05692284554243088,
  'token': 8313,
  'token_str': ' Emirates',
  'sequence': 'Qantas sees profits fly to zeroAustralian airline Emirates has posted a record fiscal first-half profit thanks to cost-cutting measures'},
 {'score': 0.03350399062037468,
  'token': 265,
  'token_str': ' business',
  'sequence': 'Qantas sees profits fly to zeroAustralian airline business has posted a record fiscal first-half profit thanks to cost-cutting measures'},
 {'score': 0.02909381315112114,
  'token': 3619,
  'token_str': ' Express',
  'sequence': 'Qantas sees profits fly to zeroAustralian airline Express has posted a record fiscal first-half profit thanks to cost-cutting measures'},
 {'score': 0.028333093971014023,
  'token': 6599,
  'token_str': ' Delta',
  's

In [23]:
text = 'Qantas sees profits fly to zero' + \
  'Australian airline operator has posted a record fiscal first-half profit thanks to cost-cutting measures' + \
  'Net profit in the six months ending 31 December rose 28% to A$458.4m ($357.6m; £191m) from a year <mask>.'

filler(text)

[{'score': 0.8682662844657898,
  'token': 656,
  'token_str': ' earlier',
  'sequence': 'Qantas sees profits fly to zeroAustralian airline operator has posted a record fiscal first-half profit thanks to cost-cutting measuresNet profit in the six months ending 31 December rose 28% to A$458.4m ($357.6m; £191m) from a year earlier.'},
 {'score': 0.11375085264444351,
  'token': 536,
  'token_str': ' ago',
  'sequence': 'Qantas sees profits fly to zeroAustralian airline operator has posted a record fiscal first-half profit thanks to cost-cutting measuresNet profit in the six months ending 31 December rose 28% to A$458.4m ($357.6m; £191m) from a year ago.'},
 {'score': 0.008088293485343456,
  'token': 2052,
  'token_str': ' prior',
  'sequence': 'Qantas sees profits fly to zeroAustralian airline operator has posted a record fiscal first-half profit thanks to cost-cutting measuresNet profit in the six months ending 31 December rose 28% to A$458.4m ($357.6m; £191m) from a year prior.'},
 {'sco