A TensorFlow.js implementation of the transformer architecture from the paper Attention Is All You Need.
See this code in action at https://stellarapp.net
- TransformerEncoder
- TransformerDecoder
- MultiHeadAttention
- TokenAndPositionalEmbedding (sinusoidal)
These layers are serializable and compatible with the default layers in TFJS.
Install TensorFlow.js
npm i @tensorflow/tfjs
Copy the tfjs-transformer folder to into your project
import tf from "@tensorflow/tfjs";
import * as tfc from "./tfjs-transformer";
const vocabularySize = 50257;
const embedingSize = 32;
const sequenceLength = 64;
const model = tf.sequential({ // or new tfc.GPTModel
layers: [
// to mask a specific token (e.g. 50256 for BPE), set `maskToken`
// so that the embedding layer returns a mask that gets passed
// through the decoder layers
new tfc.TokenAndPositionalEmbedding({ embedDim: embedingSize, vocabularySize, inputShape: [sequenceLength], maskToken: undefined }),
new tfc.TransformerDecoder({ numHeads: 4, embedDim: 32, causal: true }),
new tfc.TransformerDecoder({ numHeads: 4, embedDim: 32, causal: true }),
tf.layers.dense({ units: vocabularySize, activation: "softmax" })
]
});
model.compile({ loss: "sparseCategoricalCrossentropy", optimizer: "adam", metrics: ["accuracy"] });
model.summary();
// model.fit(...)
// serialize model
model.save("downloads://TFJS-Transformer-Model", { includeOptimizer: true })import * as tfc from "./tfjs-transformer";
import BPEVocab from "./vocab.json"
const sequenceLength = 64;
// THIS WILL NOT WORK UNTL YOU UNTIL YOU
// somehow fetch("./merges.txt") and split the text by \n to get a string[]
const merges: string[] = []
const tokenizer = new tfc.BytePairDataset({
vocabulary: new Map(Object.entries(BPEVocab)),
merges,
sentenceLength: sequenceLength
});
const LARGE_BODY_OF_TEXT = "HERE GOES SOME LARGE BODY OF TEXT"
// these go into model.fit(samples, labels)
const { samples, labels } = tokenizer.dataset(LARGE_BODY_OF_TEXT);The unit tests use Jest.
Install Jest
npm install --save-dev jest
npm jest