In [1]:
from adaptnlp import EasyWordEmbeddings, EasyStackedEmbeddings, EasyDocumentEmbeddings

# Straight Forward Embeddings

### *Pretrained keys are available on Transformer's documentation or Flair's tutorials

## Example of producing embeddings using NovettaWordEmbeddings

In [2]:
example_text = "This is Albert.  My last name is Einstein.  I like physics and atoms."

In [3]:
# Instantiate embeddings tagger
embeddings = EasyWordEmbeddings()

In [4]:
# Get GPT2 embeddings of example text... A list of flair Sentence objects are generated
sentences = embeddings.embed_text(example_text, model_name_or_path="gpt2")
# Iterate through to access the embeddings
for token in sentences[0]:
    print(token.get_embedding())
    break

tensor([-0.1524, -0.0703,  0.5778,  ..., -0.3797, -0.3565,  2.4139],
       device='cuda:0')


In [5]:
# Same thing but now we get bert embeddings with the embed_bert() method
sentences = embeddings.embed_text(example_text, model_name_or_path="bert-base-cased")
# Iterate through to access the embeddings
for token in sentences[0]:
    print(token.get_embedding())
    break

tensor([ 0.5918, -0.4142,  1.0203,  ...,  0.4004, -0.1586,  1.0107],
       device='cuda:0')


In [6]:
# Same thing but now we get bert embeddings with the embed_roberta() method
sentences = embeddings.embed_text(example_text, model_name_or_path="roberta-base")
# Iterate through to access the embeddings
for token in sentences[0]:
    print(token.get_embedding())
    break

tensor([ 3.3757e-02,  5.2783e-01, -6.2026e-02, -1.0129e-01,  8.1527e-01,
         5.0778e-01, -6.9540e-02,  7.9886e-02, -3.3002e-01,  8.0280e-02,
        -9.5056e-02, -3.7590e-01, -2.4488e-01,  1.2541e-02,  1.4148e-01,
         2.9701e-01, -4.6033e-01, -1.7297e-01,  9.4156e-03, -1.6250e-01,
        -2.4242e-01,  3.7214e-01,  8.8796e-03,  2.1160e-01, -2.4286e-01,
         1.3693e-01,  2.5863e-01, -2.0122e-01, -4.2645e-02,  1.0488e-01,
        -3.9044e-02, -1.1922e-01,  1.6976e-01,  3.5874e-01,  3.6035e-02,
        -1.4893e-02,  3.6945e-01,  1.3586e-01,  1.5267e-01,  1.8039e-02,
        -1.1443e-02, -3.8058e-01, -9.9154e-02,  2.1226e-01, -4.2931e-03,
         2.0853e-01, -4.1712e-01,  8.6358e-03,  2.5362e-02, -1.2906e-02,
        -2.4237e-01,  6.7392e-02, -8.2088e-02, -1.0465e-01, -2.8965e-01,
         4.4008e-01, -2.1734e-01, -5.6794e-02,  1.9137e-01, -1.8721e-01,
        -4.6830e-02,  1.7417e-01, -2.8660e-01,  9.6234e-02,  4.2202e-01,
        -1.3933e-01,  7.8239e-02,  2.0238e-01,  2.2

## Producing stacked embeddings with NovettaStackedEmbeddings

In [7]:
# Instantiate stacked embeddings tagger
embeddings = EasyStackedEmbeddings("bert-base-cased", "xlnet-base-cased")

May need a couple moments to instantiate...


In [8]:
# Run the `embed_stack` method to get the stacked embeddings outlined above
sentences = embeddings.embed_text(example_text)
# Iterate through to access the embeddings
for token in sentences[0]:
    print(token.get_embedding())
    break

tensor([ 0.5918, -0.4142,  1.0203,  ..., -0.1045, -1.2841,  0.0192],
       device='cuda:0')


## Document Embeddings with NovettaDocumentEmbeddings

In [9]:
# Instantiate document embedder with stacked embeddings
embeddings = EasyDocumentEmbeddings("bert-base-cased", "xlnet-base-cased")

May need a couple moments to instantiate...
Pooled embedding loaded
RNN embeddings loaded


In [10]:
# Document Pool embedding...Instead of a list of flair Sentence objects, we get one Sentence object: the document
text = embeddings.embed_pool(example_text)
#get the text/document embedding
text[0].get_embedding()

tensor([ 0.4216,  0.0123,  0.3136,  ..., -0.0683, -0.3761, -0.0974],
       device='cuda:0', grad_fn=<CatBackward>)

In [11]:
# Now again but with Document RNN embedding
text = embeddings.embed_rnn(example_text)
#get the text/document embedding
text[0].get_embedding()

tensor([ 7.2329e-02, -4.3343e-01, -2.6151e-01, -2.0612e-01,  6.8425e-01,
        -4.7493e-02, -6.8987e-01, -2.5405e-01,  4.3807e-02, -4.3772e-01,
        -1.0571e-01,  7.5407e-02,  4.5788e-01,  6.4685e-01, -5.9863e-02,
         5.0038e-01,  1.6544e-01, -9.8675e-02, -2.9197e-01, -1.0568e-01,
        -1.2376e-01,  1.5152e-01, -3.9265e-01, -5.6044e-01, -7.7540e-01,
        -3.6526e-01, -1.5909e-01, -2.2194e-01,  8.7446e-02,  6.9650e-02,
         1.9783e-01,  8.9473e-03,  5.2198e-01,  4.6220e-02,  7.4506e-01,
        -4.1262e-01, -3.4982e-01, -7.9022e-01, -1.5705e-01, -3.5391e-01,
        -8.7835e-02,  3.4633e-01,  3.5077e-01, -1.0179e-01,  1.9139e-02,
        -4.7291e-01,  6.2119e-01, -1.4826e-01, -8.4816e-02,  2.0001e-01,
         4.5577e-01,  1.2306e-01,  2.8628e-01, -2.0212e-01,  2.3836e-01,
         7.9868e-01, -1.7068e-02,  4.9447e-01, -2.8615e-01, -1.9351e-01,
        -5.3810e-01,  2.2558e-01,  3.7929e-01, -1.5124e-01,  5.1027e-02,
         2.3011e-01,  1.7311e-02, -7.5651e-02, -6.5