In [1]:
from adaptnlp import EasyWordEmbeddings, EasyStackedEmbeddings, EasyDocumentEmbeddings

# Straight Forward Embeddings

### *Pretrained keys are available on Transformer's documentation or Flair's tutorials

## Example of producing embeddings using NovettaWordEmbeddings

In [2]:
example_text = "This is Albert.  My last name is Einstein.  I like physics and atoms."

In [3]:
# Instantiate embeddings tagger
embeddings = EasyWordEmbeddings()

In [4]:
# Get GPT2 embeddings of example text... A list of flair Sentence objects are generated
sentences = embeddings.embed_text(example_text, model_name_or_path="gpt2")
# Iterate through to access the embeddings
for token in sentences[0]:
    print(token.get_embedding())
    break

tensor([-0.1524, -0.0703,  0.5778,  ..., -0.3797, -0.3565,  2.4139],
       device='cuda:0')


In [5]:
# Same thing but now we get bert embeddings with the embed_bert() method
sentences = embeddings.embed_text(example_text, model_name_or_path="bert-base-cased")
# Iterate through to access the embeddings
for token in sentences[0]:
    print(token.get_embedding())
    break

tensor([ 0.5918, -0.4142,  1.0203,  ...,  0.4004, -0.1586,  1.0107],
       device='cuda:0')


In [6]:
# Same thing but now we get bert embeddings with the embed_roberta() method
sentences = embeddings.embed_text(example_text, model_name_or_path="roberta-base")
# Iterate through to access the embeddings
for token in sentences[0]:
    print(token.get_embedding())
    break

tensor([ 3.3757e-02,  5.2783e-01, -6.2026e-02, -1.0129e-01,  8.1527e-01,
         5.0778e-01, -6.9540e-02,  7.9886e-02, -3.3002e-01,  8.0280e-02,
        -9.5056e-02, -3.7590e-01, -2.4488e-01,  1.2541e-02,  1.4148e-01,
         2.9701e-01, -4.6033e-01, -1.7297e-01,  9.4156e-03, -1.6250e-01,
        -2.4242e-01,  3.7214e-01,  8.8796e-03,  2.1160e-01, -2.4286e-01,
         1.3693e-01,  2.5863e-01, -2.0122e-01, -4.2645e-02,  1.0488e-01,
        -3.9044e-02, -1.1922e-01,  1.6976e-01,  3.5874e-01,  3.6035e-02,
        -1.4893e-02,  3.6945e-01,  1.3586e-01,  1.5267e-01,  1.8039e-02,
        -1.1443e-02, -3.8058e-01, -9.9154e-02,  2.1226e-01, -4.2931e-03,
         2.0853e-01, -4.1712e-01,  8.6358e-03,  2.5362e-02, -1.2906e-02,
        -2.4237e-01,  6.7392e-02, -8.2088e-02, -1.0465e-01, -2.8965e-01,
         4.4008e-01, -2.1734e-01, -5.6794e-02,  1.9137e-01, -1.8721e-01,
        -4.6830e-02,  1.7417e-01, -2.8660e-01,  9.6234e-02,  4.2202e-01,
        -1.3933e-01,  7.8239e-02,  2.0238e-01,  2.2

## Producing stacked embeddings with NovettaStackedEmbeddings

In [7]:
# Instantiate stacked embeddings tagger
embeddings = EasyStackedEmbeddings("bert-base-cased", "xlnet-base-cased")

May need a couple moments to instantiate...


In [8]:
# Run the `embed_stack` method to get the stacked embeddings outlined above
sentences = embeddings.embed_text(example_text)
# Iterate through to access the embeddings
for token in sentences[0]:
    print(token.get_embedding())
    break

tensor([ 0.5918, -0.4142,  1.0203,  ..., -0.1045, -1.2841,  0.0192],
       device='cuda:0')


## Document Embeddings with NovettaDocumentEmbeddings

In [9]:
# Instantiate document embedder with stacked embeddings
embeddings = EasyDocumentEmbeddings("bert-base-cased", "xlnet-base-cased")

May need a couple moments to instantiate...
Pooled embedding loaded
RNN embeddings loaded


In [10]:
# Document Pool embedding...Instead of a list of flair Sentence objects, we get one Sentence object: the document
text = embeddings.embed_pool(example_text)
#get the text/document embedding
text[0].get_embedding()

tensor([ 0.4216,  0.0123,  0.3136,  ..., -0.0683, -0.3761, -0.0974],
       device='cuda:0', grad_fn=<CatBackward>)

In [11]:
# Now again but with Document RNN embedding
text = embeddings.embed_rnn(example_text)
#get the text/document embedding
text[0].get_embedding()

tensor([ 0.6869,  0.8342, -0.9680, -0.2715,  0.2795, -0.8108, -0.4427,  0.4212,
        -0.8906,  0.7585,  0.1219,  0.6713, -0.7826,  0.1598, -0.3123, -0.5157,
        -0.9682,  0.4898,  0.7514, -0.8477,  0.4772, -0.2872,  0.9695, -0.3948,
        -0.3644,  0.8077,  0.5765,  0.9452,  0.1830,  0.1115, -0.7094,  0.8865,
         0.9721,  0.7024,  0.7235,  0.1546,  0.2707, -0.3844, -0.3979, -0.9088,
        -0.5137, -0.1299,  0.6560, -0.7347, -0.1360,  0.9693,  0.7245,  0.6635,
        -0.8372, -0.4400, -0.2479, -0.9833, -0.0384, -0.6965, -0.2441,  0.2078,
        -0.9871,  0.7516,  0.5990, -0.4252, -0.6084, -0.8859,  0.0810, -0.9508,
        -0.0802,  0.9544, -0.4850, -0.6775,  0.4424, -0.6980,  0.6440,  0.3130,
         0.6960, -0.7615,  0.9561, -0.2989, -0.8480,  0.8545, -0.9670, -0.9131,
         0.7906, -0.6413,  0.0053, -0.8684,  0.4129,  0.4803,  0.3190,  0.6832,
         0.5421,  0.2341,  0.7407, -0.1879, -0.6010,  0.9640,  0.8652,  0.9695,
        -0.8757,  0.4910,  0.7947,  0.17