In [1]:


import tensorflow as tf

# Get the GPU device name.
device_name = tf.test.gpu_device_name()

# The device name should look like the following:
if device_name == '/device:GPU:0':
    print('Found GPU at: {}'.format(device_name))
else:
    raise SystemError('GPU device not found')

Found GPU at: /device:GPU:0


In [2]:
import torch

# If there's a GPU available...
if torch.cuda.is_available():    

    # Tell PyTorch to use the GPU.    
    device = torch.device("cuda")

    print('There are %d GPU(s) available.' % torch.cuda.device_count())

    print('We will use the GPU:', torch.cuda.get_device_name(0))

# If not...
else:
    print('No GPU available, using the CPU instead.')
    device = torch.device("cpu")

There are 1 GPU(s) available.
We will use the GPU: Tesla T4


In [3]:
!pip install transformers       #no need for this
!pip install simpletransformers

Collecting transformers
[?25l  Downloading https://files.pythonhosted.org/packages/a3/78/92cedda05552398352ed9784908b834ee32a0bd071a9b32de287327370b7/transformers-2.8.0-py3-none-any.whl (563kB)
[K     |▋                               | 10kB 22.0MB/s eta 0:00:01[K     |█▏                              | 20kB 27.1MB/s eta 0:00:01[K     |█▊                              | 30kB 18.8MB/s eta 0:00:01[K     |██▎                             | 40kB 13.5MB/s eta 0:00:01[K     |███                             | 51kB 11.4MB/s eta 0:00:01[K     |███▌                            | 61kB 11.7MB/s eta 0:00:01[K     |████                            | 71kB 11.0MB/s eta 0:00:01[K     |████▋                           | 81kB 10.3MB/s eta 0:00:01[K     |█████▎                          | 92kB 10.0MB/s eta 0:00:01[K     |█████▉                          | 102kB 10.7MB/s eta 0:00:01[K     |██████▍                         | 112kB 10.7MB/s eta 0:00:01[K     |███████                         | 

In [4]:
!git clone https://github.com/ronitganguly/nlp-resources.git    #data is taken from https://rajpurkar.github.io/SQuAD-explorer/ and stored in my gut repositary which is of course public!

Cloning into 'nlp-resources'...
remote: Enumerating objects: 11, done.[K
remote: Counting objects: 100% (11/11), done.[K
remote: Compressing objects: 100% (7/7), done.[K
remote: Total 11 (delta 1), reused 5 (delta 1), pack-reused 0
Unpacking objects: 100% (11/11), done.


In [0]:
path_to_file='/content/nlp-resources/BERT_question-answering_trainingSet.json'

In [0]:
import json


with open(path_to_file, 'r') as f:
    train_data = json.load(f)

train_data = [item for topic in train_data['data'] for item in topic['paragraphs'] ]

In [0]:
# train_data is a list of dictionaries
# each dictionary has 2 keys: context and qas
# each qas is a list of dictionaries
# each dictionary in the list contains keys: answers, question id which is unique, is_impossible, question 
# is_impossible =True means it's impossible to answer the question from the context.

# Training: 

### BERT question answering model requires apex library from NVidia

In [0]:
from simpletransformers.question_answering import QuestionAnsweringModel


train_args = {
    'learning_rate': 3e-5,
    'num_train_epochs': 2,
    'max_seq_length': 384,
    'doc_stride': 128,
    'overwrite_output_dir': True,
    'reprocess_input_data': False,
    'train_batch_size': 2,
    'gradient_accumulation_steps': 8,
}

model = QuestionAnsweringModel('bert', 'bert-base-cased', args=train_args)

In [6]:
%%writefile setup.sh

git clone https://github.com/NVIDIA/apex
cd apex
pip install -v --no-cache-dir ./


Overwriting setup.sh


In [0]:
!sh setup.sh

In [0]:
import apex

In [9]:
from time import time
tic=time()

model.train_model(train_data)
toc=time()


100%|██████████| 130319/130319 [11:54<00:00, 182.50it/s]


Selected optimization level O1:  Insert automatic casts around Pytorch functions and Tensor methods.

Defaults for this optimization level are:
enabled                : True
opt_level              : O1
cast_model_type        : None
patch_torch_functions  : True
keep_batchnorm_fp32    : None
master_weights         : None
loss_scale             : dynamic
Processing user overrides (additional kwargs that are not None)...
After processing overrides, optimization options are:
enabled                : True
opt_level              : O1
cast_model_type        : None
patch_torch_functions  : True
keep_batchnorm_fp32    : None
master_weights         : None
loss_scale             : dynamic


HBox(children=(IntProgress(value=0, description='Epoch', max=2, style=ProgressStyle(description_width='initial…

HBox(children=(IntProgress(value=0, description='Current iteration', max=66150, style=ProgressStyle(descriptio…

Running loss: 5.965102



Running loss: 5.903967



Running loss: 3.423162Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 32768.0
Running loss: 2.130188Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 16384.0
Running loss: 1.630353Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 8192.0
Running loss: 3.367854Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 16384.0
Running loss: 0.826381Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 16384.0
Running loss: 1.890193Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 16384.0
Running loss: 0.981015Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 32768.0
Running loss: 0.695339Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 16384.0
Running loss: 2.800758Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 8192.0
Running loss: 1.153553Gradient overflow.  Skipping step, loss scaler 0 reducing loss

HBox(children=(IntProgress(value=0, description='Current iteration', max=66150, style=ProgressStyle(descriptio…

Running loss: 0.160719Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 32768.0
Running loss: 0.847686Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 16384.0
Running loss: 0.485782Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 8192.0
Running loss: 0.061485Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 16384.0
Running loss: 0.112780Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 16384.0
Running loss: 0.029470Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 16384.0
Running loss: 0.165798Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 32768.0
Running loss: 0.623432Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 32768.0
Running loss: 2.057151Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 16384.0
Running loss: 1.284222Gradient overflow.  Skipping step, loss scaler 0 reducing los

In [30]:
#total time taken:

print(f"Time taken to train the model: {(toc-tic)/60**2:.2f} hours")

Time taken to train the model: 4.16 hours


# Evaluation:

## We have to take new examples like recent stories such as Corona virus paragraph or a newly released video game..


### Below paragraph has been taken from Corona virus wiki:

In [0]:
answer_text= "The Corona virus is primarily spread between people during close contact, often via small droplets produced by coughing, sneezing, or talking. While these droplets are produced when breathing out, they usually fall to the ground or onto surfaces rather than being infectious over long distances. People may also become infected by touching a contaminated surface and then touching their eyes, nose, or mouth. The virus can survive on surfaces up to 72 hours. It is most contagious during the first three days after the onset of symptoms, although spread may be possible before symptoms appear and in later stages of the disease. Common symptoms include fever, cough and shortness of breath. Complications may include pneumonia and acute respiratory distress syndrome. The time from exposure to onset of symptoms is typically around five days, but may range from two to fourteen days. There is no known vaccine or specific antiviral treatment. Primary treatment is symptomatic and supportive therapy."

In [0]:
question_text_1 = "How does the Corona virus spread?"
question_text_2=  "How can we treat patients?"

In [0]:
qas_dict={'context':answer_text,
          'qas':[
              {'id':0, 'question': question_text_1},
              {'id':1, 'question': question_text_2}
          ]
          }

In [18]:
model.predict([qas_dict])

100%|██████████| 2/2 [00:00<00:00, 110.69it/s]


HBox(children=(IntProgress(value=0, max=1), HTML(value='')))




[{'answer': 'via small droplets produced by coughing, sneezing, or talking',
  'id': 0},
 {'answer': 'symptomatic and supportive therapy', 'id': 1}]

## Below paragraph has been taken from a recently realeased PS4 game "Death Stranding" Wiki : https://en.wikipedia.org/wiki/Death_Stranding

In [0]:
answer_text="Death Stranding is an action game set in an open world, and also includes asynchronous online functions, although Kojima refers to Death Stranding as the first \"strand game\", an original genre characterized by the game's incorporation of social elements. Kojima compared this genre to how his earlier game Metal Gear—now considered to be a stealth game—was called an action game during its release because the stealth genre was not considered to exist at the time. The player controls Sam Bridges, a porter for a company known as Bridges. The player is tasked with delivering supply cargo to various isolated cities known as KNOTs, as well as isolated researchers and survivalists, while also connecting them to a communications system known as the Chiral Network.The player is evaluated by the company and recipients based on their performance (including via \"likes\" similar to social networks), including whether the cargo was delivered, and if it is intact among other factors. These merits are, in turn, used to level up the player's statistics, such as stability and weight capacity, and increase their standing with individual locations and characters (which can improve rewards). How cargo is packed by the player, and the overall weight being carried, affect Sam's ability to navigate through the environments. The player's main enemies include otherworldly creatures known as \"beached things\" (BTs), MULE (a cult of rogue, bandit-like porters influenced by an obsession with cargo, who attempt to steal deliveries so they can deliver it themselves), and Demens, MULEs who have begun killing porters to claim their cargo. BTs are surrounded by a rain known as \"timefall\", which damages the player's armor and cargo by speeding up their deterioration. BTs are normally invisible, but Sam's suit is equipped with a robotic sensor that points towards BTs he is in close proximity to, and the player can then scan the area to reveal them. As Sam is a \"Repatriate\", he is taken to an underwater world known as the \"Seam\" if he is killed, where he can \"swim\" back to his body to revive himself. However, being killed and consumed by a BT also results in a destructive explosion known as a \"voidout\", which permanently damages the location of the death with an untraversable crater. As players expand the coverage of the Chiral Network, they can access maps of areas, and use blueprints to produce consumable items and structures with the Portable Chiral Constructor (PCC, a device similar to a 3D printer), including ropes, bridges, and power generators used for charging battery-powered equipment. The Network is also used as the basis for the game's online functionality, where players can leave supplies, structures, and messages that can be viewed and used by other players, although structures will eventually be destroyed by Timefall after some time. The player can also recover cargo lost by other players to complete their delivery. The player does not directly encounter other players in the world."

In [0]:
question_text_1="What is the profession of the player?"
question_text_2="Who are the enemies?"
question_text_3="How are the battery-powered equipment charged?"
question_text_4 ="How is the player's performance evaluated?"
question_text_5= "Why Metal Gear was not called a stealth game during its release?"

In [0]:
qas_dict={'context':answer_text,
          'qas':[
              {'id':0, 'question': question_text_1},
              {'id':1, 'question': question_text_2},
              {'id':2, 'question': question_text_3},
              {'id':3, 'question': question_text_4},
              {'id':4, 'question': question_text_5}
          ]
          }

In [22]:
model.predict([qas_dict])

100%|██████████| 5/5 [00:00<00:00, 40.71it/s]


HBox(children=(IntProgress(value=0, max=3), HTML(value='')))




[{'answer': 'porter', 'id': 0},
 {'answer': 'otherworldly creatures known as "beached things" (BTs)', 'id': 1},
 {'answer': 'power generators', 'id': 2},
 {'answer': 'based on their performance (including via "likes" similar to social networks',
  'id': 3},
 {'answer': 'the stealth genre was not considered to exist at the time',
  'id': 4}]

## Below text has been taken from https://www.politico.com/news/2020/04/05/boris-johnson-admitted-to-hospital-with-covid-19-166708

In [0]:
answer_text= "U.K. Health Secretary Matt Hancock, who confirmed that he had also tested positive for coronavirus on the same day as the prime minister, has since recovered and resumed his public role at the forefront of the government’s pandemic response. Johnson’s fiancé Carrie Symonds has also been ill with COVID-19 symptoms and is self-isolating but has not been tested for the virus. News of Johnson’s admission to hospital came just over an hour after Queen Elizabeth II gave a rare televised address, which was pre-recorded, to rally spirits in what she said was an \“increasingly challenging time.\” Leading British politicians including Hancock, Keir Starmer, the new Labour leader, Scotland’s First Minister Nicola Sturgeon and London Mayor Sadiq Khan wished the prime minister well on social media."

question_text_1 = "What is the relationship between Carrie Symonds and Matt Hancock?"
question_text_2 = "What did the queen say?"

In [0]:
qas_dict={'context':answer_text,
          'qas':[
              {'id':0, 'question': question_text_1},
              {'id':1, 'question': question_text_2}
          ]
          }

In [38]:
model.predict([qas_dict])

100%|██████████| 2/2 [00:00<00:00, 128.92it/s]


HBox(children=(IntProgress(value=0, max=1), HTML(value='')))




[{'answer': 'fiancé', 'id': 0},
 {'answer': 'an \\“increasingly challenging time', 'id': 1}]

## Evaluation results:
### All the questions have been correctly answered!!