In [None]:
# Transformers installation
! pip install transformers

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


Sentiments of the following sentences have been analyzed using Huggingface’s Transformer library and the ‘sentiment-analysis’ pipeline. The sentences are: "I like NLP course." and "I hate when my computer crashes."

In [None]:
from transformers import pipeline
classifier = pipeline('sentiment-analysis')

In [None]:
classifier('I like NLP course.')

[{'label': 'POSITIVE', 'score': 0.9993118047714233}]

In [None]:
classifier('I hate when my computer crashes.')

[{'label': 'NEGATIVE', 'score': 0.9994106292724609}]

It can be used on a list of sentences, which will be preprocessed then fed to the model as a batch, returning a list of dictionaries like this one:

In [None]:
# Classify

results = classifier(["I like NLP course.",
           "I hate when my computer crashes."])
for result in results:
    print(f"label: {result['label']}, with score: {round(result['score'], 4)}")

label: POSITIVE, with score: 0.9993
label: NEGATIVE, with score: 0.9994


The Huggingface’s Transformer library and ‘zero-shot-classification’ pipeline have been utilized to classify the sentence "Los Angeles Clippers is a good basketball team" into one of the three given categories: sports, politics, education.

In [None]:
from transformers import pipeline
classifier = pipeline('zero-shot-classification')

In [None]:
classifier('Los Angeles Clippers is a good basketball team',
           candidate_labels=['sports', 'politics', 'education'])

{'sequence': 'Los Angeles Clippers is a good basketball team',
 'labels': ['sports', 'education', 'politics'],
 'scores': [0.9973843693733215, 0.0016129021532833576, 0.001002754084765911]}

The sentence "Los Angeles Clippers is a good basketball team" has been categorized under Sports with the highest score (0.997384369373321).

The Huggingface’s Transformer library and ‘text-generation’ pipeline have been employed to complete the following sentence: "In this month, the stock market will".

In [None]:
from transformers import pipeline
gen = pipeline('text-generation')

In [None]:
gen('In this month, the stock market will')



[{'generated_text': 'In this month, the stock market will continue tumbling, while many investors will not be able to see a sharp decline, and other sectors may be pushed out of control.\n\nThe U.S. economy contracted in July during the height of'}]

The Huggingface’s Transformer library and ‘fill-mask’ pipeline have been used to fill in the blanks in the sentence "Math course will teach you about <mask> topics".


In [None]:
from transformers import pipeline
gen = pipeline('fill-mask')

In [None]:
gen('Math course will teach you about <mask> topics',top_k = 2)

[{'score': 0.18224956095218658,
  'token': 30412,
  'token_str': ' mathematical',
  'sequence': 'Math course will teach you about mathematical topics'},
 {'score': 0.1431763917207718,
  'token': 10638,
  'token_str': ' math',
  'sequence': 'Math course will teach you about math topics'}]

The Huggingface’s Transformer library and ‘ner’ (Name Entity Recognition) pipeline have been utilized to identify name, organization, and place in the sentence "Tim Cook is the CEO of Apple located in San Jose."


In [None]:
from transformers import pipeline
gen = pipeline('ner',grouped_entities=True)



In [None]:
gen('Tim Cook is the CEO of Apple located in San Jose.')

[{'entity_group': 'PER',
  'score': 0.9997417,
  'word': 'Tim Cook',
  'start': 0,
  'end': 8},
 {'entity_group': 'ORG',
  'score': 0.99871206,
  'word': 'Apple',
  'start': 23,
  'end': 28},
 {'entity_group': 'LOC',
  'score': 0.9983774,
  'word': 'San Jose',
  'start': 40,
  'end': 48}]

The Huggingface’s Transformer library and ‘question-answering’ pipeline have been employed to find the answer to the following question in the given context: "In which state Los Angeles located?" The context is "Los Angeles is in California".

In [None]:
from transformers import pipeline
gen = pipeline('question-answering')

In [None]:
gen(question='In which state Los Angeles located', context=' Los Angeles is in California')

{'score': 0.9838225245475769, 'start': 19, 'end': 29, 'answer': 'California'}

The Huggingface’s Transformer library and ‘summarize’ pipeline have been used to summarize the given text.

In [None]:
from transformers import pipeline
gen = pipeline('summarization')

In [None]:
gen(''' Australia was celebrated for its initial response to the Covid-19 pandemic,
and for getting its economy more or less back on track long ago. But with
that security has come complacency, particularly in the federal government,
which failed to secure enough vaccine doses to prevent the regular "circuit
breaker" lockdowns that come every time a handful of cases emerge, or even
the longer restrictions that Sydney is experiencing now. Australia's
borders, controlled by strict quarantine measures, have been all but shut
for more than a year. Now Australians, who basked in their early successes,
are wondering how much longer this can go on.''')

[{'summary_text': ' Australia was celebrated for its initial response to the Covid-19 pandemic . But with that security has come complacency, particularly in the federal government . The government failed to secure enough vaccine doses to prevent the regular "circuitbreaker" lockdowns that come every time a handful of cases emerge, or even the longer restrictions that Sydney is experiencing now .'}]

In [None]:
!pip install flair

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


The embeddings of all words in the two sentences 
Sentence 1: I went to a bank to deposit money.
Sentence 2: I sat near a bank of a river. 
have been retrieved using Glove and BERT embeddings from the Hugging Face Transformer web portal. Subsequently, the Euclidian distance for the word 'bank' has been calculated for both Glove and BERT embeddings in both sentences.

In [None]:
import numpy as np
from flair.embeddings import WordEmbeddings
from flair.embeddings import TransformerWordEmbeddings
from flair.data import Sentence
from scipy.spatial import distance

In [None]:
# Glove Embedding

glove_embedding = WordEmbeddings('glove')

In [None]:
sentence_1 = Sentence(' I went to a bank to deposit money.')

In [None]:
#Glove Embedding for all words

glove_embedding.embed(sentence_1)

[Sentence[9]: " I went to a bank to deposit money."]

In [None]:
#Print out the embedding

for token in sentence_1:
  print(token)
  print(token.embedding)
  print("\n")

Token[0]: "I"
tensor([-0.0465,  0.6197,  0.5665, -0.4658, -1.1890,  0.4460,  0.0660,  0.3191,
         0.1468, -0.2212,  0.7924,  0.2991,  0.1607,  0.0253,  0.1868, -0.3100,
        -0.2811,  0.6051, -1.0654,  0.5248,  0.0642,  1.0358, -0.4078, -0.3801,
         0.3080,  0.5996, -0.2699, -0.7603,  0.9422, -0.4692, -0.1828,  0.9065,
         0.7967,  0.2482,  0.2571,  0.6232, -0.4477,  0.6536,  0.7690, -0.5123,
        -0.4433, -0.2187,  0.3837, -1.1483, -0.9440, -0.1506,  0.3001, -0.5781,
         0.2017, -1.6591, -0.0792,  0.0264,  0.2205,  0.9971, -0.5754, -2.7266,
         0.3145,  0.7052,  1.4381,  0.9913,  0.1398,  1.3474, -1.1753,  0.0040,
         1.0298,  0.0646,  0.9089,  0.8287, -0.4700, -0.1058,  0.5916, -0.4221,
         0.5733, -0.5411,  0.1077,  0.3978, -0.0487,  0.0646, -0.6144, -0.2860,
         0.5067, -0.4976, -0.8157,  0.1641, -1.9630, -0.2669, -0.3759, -0.9585,
        -0.8584, -0.7158, -0.3234, -0.4312,  0.4139,  0.2837, -0.7093,  0.1500,
        -0.2154, -0.3762, 

In [None]:
sentence_2 = Sentence(' I sat near a bank of a river.')

In [None]:
glove_embedding.embed(sentence_2)

[Sentence[9]: " I sat near a bank of a river."]

In [None]:
#Print out the embedding

for token in sentence_2:
  print(token)
  print(token.embedding)
  print("\n")

Token[0]: "I"
tensor([-0.0465,  0.6197,  0.5665, -0.4658, -1.1890,  0.4460,  0.0660,  0.3191,
         0.1468, -0.2212,  0.7924,  0.2991,  0.1607,  0.0253,  0.1868, -0.3100,
        -0.2811,  0.6051, -1.0654,  0.5248,  0.0642,  1.0358, -0.4078, -0.3801,
         0.3080,  0.5996, -0.2699, -0.7603,  0.9422, -0.4692, -0.1828,  0.9065,
         0.7967,  0.2482,  0.2571,  0.6232, -0.4477,  0.6536,  0.7690, -0.5123,
        -0.4433, -0.2187,  0.3837, -1.1483, -0.9440, -0.1506,  0.3001, -0.5781,
         0.2017, -1.6591, -0.0792,  0.0264,  0.2205,  0.9971, -0.5754, -2.7266,
         0.3145,  0.7052,  1.4381,  0.9913,  0.1398,  1.3474, -1.1753,  0.0040,
         1.0298,  0.0646,  0.9089,  0.8287, -0.4700, -0.1058,  0.5916, -0.4221,
         0.5733, -0.5411,  0.1077,  0.3978, -0.0487,  0.0646, -0.6144, -0.2860,
         0.5067, -0.4976, -0.8157,  0.1641, -1.9630, -0.2669, -0.3759, -0.9585,
        -0.8584, -0.7158, -0.3234, -0.4312,  0.4139,  0.2837, -0.7093,  0.1500,
        -0.2154, -0.3762, 

In [None]:
glove_dst = distance.euclidean(np.array(sentence_1[4].embedding), np.array(sentence_2[4].embedding))
print("Glove embedding: Euclidian distance between the embeddings for the word 'bank' used in 2 sentences = {}".format(glove_dst))

Glove embedding: Euclidian distance between the embeddings for the word 'bank' used in 2 sentences = 0.0


In [None]:
bert_embedding = TransformerWordEmbeddings('bert-base-multilingual-cased')

In [None]:
bert_embedding.embed(sentence_1)
for token in sentence_1:
    print(token)
    print(token.embedding)
    print(token.embedding.shape)

Token[0]: "I"
tensor([-4.6539e-02,  6.1966e-01,  5.6647e-01, -4.6584e-01, -1.1890e+00,
         4.4599e-01,  6.6035e-02,  3.1910e-01,  1.4679e-01, -2.2119e-01,
         7.9239e-01,  2.9905e-01,  1.6073e-01,  2.5324e-02,  1.8678e-01,
        -3.1001e-01, -2.8108e-01,  6.0515e-01, -1.0654e+00,  5.2476e-01,
         6.4152e-02,  1.0358e+00, -4.0779e-01, -3.8011e-01,  3.0801e-01,
         5.9964e-01, -2.6991e-01, -7.6035e-01,  9.4222e-01, -4.6919e-01,
        -1.8278e-01,  9.0652e-01,  7.9671e-01,  2.4825e-01,  2.5713e-01,
         6.2320e-01, -4.4768e-01,  6.5357e-01,  7.6902e-01, -5.1229e-01,
        -4.4333e-01, -2.1867e-01,  3.8370e-01, -1.1483e+00, -9.4398e-01,
        -1.5062e-01,  3.0012e-01, -5.7806e-01,  2.0175e-01, -1.6591e+00,
        -7.9195e-02,  2.6423e-02,  2.2051e-01,  9.9714e-01, -5.7539e-01,
        -2.7266e+00,  3.1448e-01,  7.0522e-01,  1.4381e+00,  9.9126e-01,
         1.3976e-01,  1.3474e+00, -1.1753e+00,  3.9503e-03,  1.0298e+00,
         6.4637e-02,  9.0887e-01,  8.

In [None]:
print(sentence_1[4])
print(sentence_1[4].embedding.shape)
print(sentence_1[4].embedding)

Token[4]: "bank"
torch.Size([868])
tensor([ 4.1869e-01, -9.2211e-01,  4.8684e-02,  1.1798e-01,  2.2062e-01,
        -3.2506e-01, -7.8456e-01,  4.3787e-01, -1.4762e-01,  6.3100e-02,
         2.3754e-01,  1.4837e+00, -5.4762e-01,  8.0406e-01, -3.7728e-03,
         2.4949e-01, -1.5523e-01, -6.6263e-01, -6.0428e-02,  1.0121e-01,
         5.1163e-01,  4.3285e-01, -6.0858e-01,  2.2083e-01, -6.2269e-01,
        -9.3740e-01, -1.9981e-01, -6.9866e-01, -3.3680e-01,  4.0864e-01,
         4.6296e-01,  5.9369e-01, -5.6550e-01,  2.4968e-01, -3.7977e-01,
        -8.4865e-01,  6.0765e-01, -3.5362e-02,  6.2735e-02, -9.4900e-01,
         1.3185e-01, -1.0301e+00, -7.7346e-02,  8.1087e-01,  1.8624e-01,
         4.5122e-01,  3.6329e-01,  4.0375e-01, -7.7568e-01, -1.2271e+00,
        -4.7763e-01,  9.4753e-02,  1.0842e+00,  7.1649e-01, -8.9553e-01,
        -2.4286e+00, -6.2272e-01, -7.4197e-01,  1.4537e+00,  1.1202e+00,
         1.2085e-01, -5.2417e-01, -7.2460e-03,  4.6182e-01,  8.2402e-01,
         5.7731e

In [None]:
bert_embedding.embed(sentence_2)
for token in sentence_2:
    print(token)
    print(token.embedding)
    print(token.embedding.shape)

Token[0]: "I"
tensor([-4.6539e-02,  6.1966e-01,  5.6647e-01, -4.6584e-01, -1.1890e+00,
         4.4599e-01,  6.6035e-02,  3.1910e-01,  1.4679e-01, -2.2119e-01,
         7.9239e-01,  2.9905e-01,  1.6073e-01,  2.5324e-02,  1.8678e-01,
        -3.1001e-01, -2.8108e-01,  6.0515e-01, -1.0654e+00,  5.2476e-01,
         6.4152e-02,  1.0358e+00, -4.0779e-01, -3.8011e-01,  3.0801e-01,
         5.9964e-01, -2.6991e-01, -7.6035e-01,  9.4222e-01, -4.6919e-01,
        -1.8278e-01,  9.0652e-01,  7.9671e-01,  2.4825e-01,  2.5713e-01,
         6.2320e-01, -4.4768e-01,  6.5357e-01,  7.6902e-01, -5.1229e-01,
        -4.4333e-01, -2.1867e-01,  3.8370e-01, -1.1483e+00, -9.4398e-01,
        -1.5062e-01,  3.0012e-01, -5.7806e-01,  2.0175e-01, -1.6591e+00,
        -7.9195e-02,  2.6423e-02,  2.2051e-01,  9.9714e-01, -5.7539e-01,
        -2.7266e+00,  3.1448e-01,  7.0522e-01,  1.4381e+00,  9.9126e-01,
         1.3976e-01,  1.3474e+00, -1.1753e+00,  3.9503e-03,  1.0298e+00,
         6.4637e-02,  9.0887e-01,  8.

In [None]:
print(sentence_2[4])
print(sentence_2[4].embedding.shape)
print(sentence_2[4].embedding)

Token[4]: "bank"
torch.Size([868])
tensor([ 4.1869e-01, -9.2211e-01,  4.8684e-02,  1.1798e-01,  2.2062e-01,
        -3.2506e-01, -7.8456e-01,  4.3787e-01, -1.4762e-01,  6.3100e-02,
         2.3754e-01,  1.4837e+00, -5.4762e-01,  8.0406e-01, -3.7728e-03,
         2.4949e-01, -1.5523e-01, -6.6263e-01, -6.0428e-02,  1.0121e-01,
         5.1163e-01,  4.3285e-01, -6.0858e-01,  2.2083e-01, -6.2269e-01,
        -9.3740e-01, -1.9981e-01, -6.9866e-01, -3.3680e-01,  4.0864e-01,
         4.6296e-01,  5.9369e-01, -5.6550e-01,  2.4968e-01, -3.7977e-01,
        -8.4865e-01,  6.0765e-01, -3.5362e-02,  6.2735e-02, -9.4900e-01,
         1.3185e-01, -1.0301e+00, -7.7346e-02,  8.1087e-01,  1.8624e-01,
         4.5122e-01,  3.6329e-01,  4.0375e-01, -7.7568e-01, -1.2271e+00,
        -4.7763e-01,  9.4753e-02,  1.0842e+00,  7.1649e-01, -8.9553e-01,
        -2.4286e+00, -6.2272e-01, -7.4197e-01,  1.4537e+00,  1.1202e+00,
         1.2085e-01, -5.2417e-01, -7.2460e-03,  4.6182e-01,  8.2402e-01,
         5.7731e

In [None]:
print(sentence_1[4])
print(sentence_1[4].embedding.shape)

print('---------------------------------')

print(sentence_2[4])
print(sentence_2[4].embedding.shape)

print('---------------------------------')

bert_dst = distance.euclidean(np.array(sentence_1[4].embedding), np.array(sentence_2[4].embedding))

print("BERT embeddings: Euclidian distance between the embeddings for the word ‘bank’ used in 2 sentences = {}".format(bert_dst))

Token[4]: "bank"
torch.Size([868])
---------------------------------
Token[4]: "bank"
torch.Size([868])
---------------------------------
BERT embeddings: Euclidian distance between the embeddings for the word ‘bank’ used in 2 sentences = 14.108161926269531
