In [None]:
import torch
from sentence_transformers import SentenceTransformer
from small_concept_model.model import SmallConceptModel
from small_concept_model.inverter import PreNet
from small_concept_model.train import train_scm, train_inverter
from small_concept_model.data import get_bookcorpus_scm, get_bookcorpus_inverter
from small_concept_model.auto import build_scm, build_inverter

---

In [None]:
model = build_scm("small_multilingual")

In [3]:
inverter = build_inverter("paraphrase_multilingual")
encoder = SentenceTransformer("paraphrase-multilingual-MiniLM-L12-v2", device="cuda")

In [33]:
text = "I am tired of stressing out"
vec = encoder.encode(text, convert_to_tensor=True)

print(inverter.invert(vec, 30, 0.6))

 being tired is exhausting . '' i am being too busy sobbing .


In [10]:
scm_configs = {
    "d_model": 512,
    "d_embed": 384,
    "d_ff": 4 * 512,
    "n_heads": 4,
    "n_layers": 3,
    "dropout": 0.0,
    "max_seq_len": 128
}

train_configs = {
    "lr": 1e-3,
    "weight_decay": 1e-3,
    "batch_size": 32,
    "num_epochs": 1
}

In [33]:
model = SmallConceptModel(**scm_configs)

# test forward pass
x = torch.rand([8, 16, scm_configs["d_embed"]])

y = model(x)
print(y.shape)

torch.Size([8, 16, 384])


In [4]:
encoder = SentenceTransformer("paraphrase-multilingual-MiniLM-L12-v2", device="cuda")
dataset = get_bookcorpus_scm(encoder, 32)

Batches: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 50000/50000 [10:46<00:00, 77.31it/s]


Train the model.

In [36]:
from datasets import load_dataset

data = load_dataset("francescoortame/bookcorpus-sorted-100k16x", split="train")

In [35]:
train_scm(model, dataset, **train_configs)

Epoch [1/1]  Batch [100/3125]  Loss: 0.048322
Epoch [1/1]  Batch [200/3125]  Loss: 0.049153
Epoch [1/1]  Batch [300/3125]  Loss: 0.048909
Epoch [1/1]  Batch [400/3125]  Loss: 0.047952
Epoch [1/1]  Batch [500/3125]  Loss: 0.049336
Epoch [1/1]  Batch [600/3125]  Loss: 0.048689
Epoch [1/1]  Batch [700/3125]  Loss: 0.050626
Epoch [1/1]  Batch [800/3125]  Loss: 0.048821
Epoch [1/1]  Batch [900/3125]  Loss: 0.048079
Epoch [1/1]  Batch [1000/3125]  Loss: 0.050075
Epoch [1/1]  Batch [1100/3125]  Loss: 0.048651
Epoch [1/1]  Batch [1200/3125]  Loss: 0.048846
Epoch [1/1]  Batch [1300/3125]  Loss: 0.049641
Epoch [1/1]  Batch [1400/3125]  Loss: 0.049652
Epoch [1/1]  Batch [1500/3125]  Loss: 0.046929
Epoch [1/1]  Batch [1600/3125]  Loss: 0.048214
Epoch [1/1]  Batch [1700/3125]  Loss: 0.047378
Epoch [1/1]  Batch [1800/3125]  Loss: 0.047083
Epoch [1/1]  Batch [1900/3125]  Loss: 0.048429
Epoch [1/1]  Batch [2000/3125]  Loss: 0.048467
Epoch [1/1]  Batch [2100/3125]  Loss: 0.047782
Epoch [1/1]  Batch [22

## Pipeline

In [31]:
from small_concept_model.pipeline import Pipeline

model = model.to("cuda")
inverter = build_inverter("paraphrase_multilingual")
pipe = Pipeline(encoder, model, inverter)

In [39]:
texts = [
    'lexi stretched her arms up over her head as she waited patiently for jack to return and ravish her body .',
    'she heard the door open , and soft voices echoed down the hall toward her .',
]

pipe.generate(
    texts,
    n_future_steps = 5,
    sigma_noise = 0.0,
    temperature = 0.0,
    max_len = 30
)

[' she waited for her body to rest before she lifted her head and lifted her head up for her to rest . . . . she was exhausted . .',
 ' she heard the door open , and the door opened softly . the door opened , and the door opened softly . doors . doors . doors . doors .',
 " she said , and then she turned to the door , and the door opened . '' she said , and then she turned to the door , and the",
 " '' oh , she said , pointing at the door . '' and then she said , `` we should be talking about the door . '' . '' ''",
 " '' i should say , but i mean , he is a good liar . '' '' he replied . '' and then he said , `` oh , sorry",
 " '' i should say , but i mean , we are talking about the goddamned thing . '' '' he replied . '' i mean , we are",
 " '' i should say , but i mean , we are talking about the goddamned thing . '' '' he said . '' i mean , we are"]