In [1]:
import numpy as np
import pandas as pd
import re
import torch
from torch import nn
import os
import sklearn
import json
from tqdm.notebook import tqdm
import Levenshtein

# ordinal encoding

In [2]:
path_to_data_folder = "../data/"

In [3]:
with open(path_to_data_folder + 'character_to_prediction_index.json') as json_file:
    CHAR2ORD = json.load(json_file)
    
ORD2CHAR = {j:i for i,j in CHAR2ORD.items()}
    
display(pd.Series(CHAR2ORD).to_frame('Ordinal Encoding'))

Unnamed: 0,Ordinal Encoding
,0
!,1
#,2
$,3
%,4
&,5
',6
(,7
),8
*,9


# hyperparameters

In [4]:
#IS_INTERACTIVE = os.environ['KAGGLE_KERNEL_RUN_TYPE'] == 'Interactive'
SEED = 42
DEBUG = True
N_UNIQUE_CHARACTERS = len(CHAR2ORD) + 1 + 1 + 1 + 1#
PAD_TOKEN = len(CHAR2ORD) # Padding
SOS_TOKEN = len(CHAR2ORD) + 1 # Start Of Sentence
EOS_TOKEN = len(CHAR2ORD) + 2 # End Of Sentence
NAN_TOKEN = len(CHAR2ORD) + 3
BATCH_SIZE = 64
NUM_EPOCHS = 2 #if IS_INTERACTIVE else 100
NUM_WARMUP_EPOCHS = 10
WEIGHT_DECAY = 0.05
NUM_WORKERS = 2
TRAIN_MODEL = True
LOAD_WEIGHTS = False
MAX_LR = 1e-3
WARMUP_METHOD = 'exp'
USE_VAL = True
MAX_PHRASE_LENGTH = 32

In [5]:
torch.manual_seed(SEED)

<torch._C.Generator at 0x7efeb08dbd30>

# data manipulation

In [6]:
ORD2CHAR[PAD_TOKEN] = "<PAD>"
ORD2CHAR[SOS_TOKEN] = "<SOS>"
ORD2CHAR[EOS_TOKEN] = "<EOS>"
ORD2CHAR[NAN_TOKEN] = "<NAN>"
CHAR2ORD["<PAD>"] = PAD_TOKEN
CHAR2ORD["<SOS>"] = SOS_TOKEN
CHAR2ORD["<EOS>"] = EOS_TOKEN
CHAR2ORD["<NAN>"] = NAN_TOKEN

# Load data

In [7]:
if USE_VAL:
    # TRAIN
    X_train = np.load(path_to_data_folder + '/X_train.npy')
    y_train = np.load(path_to_data_folder + '/y_train.npy')[:,:MAX_PHRASE_LENGTH]
    N_TRAIN_SAMPLES = len(X_train)
    # VAL
    X_val = np.load(path_to_data_folder + '/X_val.npy')
    y_val = np.load(path_to_data_folder + '/y_val.npy')[:,:MAX_PHRASE_LENGTH]
    N_VAL_SAMPLES = len(X_val)
    # Shapes
    print(f'X_train shape: {X_train.shape}, X_val shape: {X_val.shape}')
# Train On All Data
else:
    # TRAIN
    X_train = np.load(path_to_data_folder + '/X.npy')
    y_train = np.load(path_to_data_folder + '/y.npy')[:,:MAX_PHRASE_LENGTH]
    N_TRAIN_SAMPLES = len(X_train)
    print(f'X_train shape: {X_train.shape}')

X_train shape: (54719, 128, 164), X_val shape: (7236, 128, 164)


in X i've right hand, left hand, lips coords(x,y) for i in range(number of frames)

and in y i have char for X hands and lips position

the main problem is that there are 128 frames and only 31 letters, so i gotta combine some frames that represent the same letter

In [8]:
y_train.shape

(54719, 32)

# Baseline

In [9]:
kernel_size = 2
stride = 2

In [10]:
class SignRecognition(nn.Module):
    def __init__(self, frames, kernel_size=2, stride=2):
        super().__init__()
        self.conv1 = nn.Conv2d(frames, frames // 4, kernel_size=kernel_size, stride=stride)
        self.bn1 = nn.BatchNorm2d(frames // 4)
        self.lin1 = nn.Linear(41, 128)
        self.lin2 = nn.Linear(128, N_UNIQUE_CHARACTERS)
        self.gelu = torch.nn.GELU()
        self.softmax = nn.Softmax(dim=-1)
        

    def forward(self, x):
        x = self.conv1(x) # 128 41 1
        x = self.gelu(self.bn1(x))
        x = x.squeeze(dim=-1)
        x = self.lin1(x)
        x = self.gelu(x)
        x = self.lin2(x)
        x = self.softmax(x)
        return x

In [11]:
class SelfAttention(nn.Module):
    def __init__(self, embed_size, heads):
        super().__init__()
        self.embed_size = embed_size
        self.heads = heads
        self.head_dim = embed_size // heads
        
        assert (self.head_dim * heads == embed_size), "Embed size needs to be divisible by heads"
        self.values = nn.Linear(embed_size, embed_size, bias=False)
        self.keys = nn.Linear(embed_size, embed_size, bias=False)
        self.queries = nn.Linear(embed_size, embed_size, bias=False)
        self.fc_out = nn.Linear(heads*self.head_dim, embed_size) # concat them
        
    def forward(self, values, keys, query, mask):
        N = query.shape[0]
        value_len, key_len, query_len = values.shape[1], keys.shape[1], query.shape[1]
        
        values = self.values(values)
        keys = self.keys(keys)
        queries = self.queries(query)
        
        # split embedding into self.heads pieces
        values = values.reshape(N, value_len, self.heads, self.head_dim)
        keys = keys.reshape(N, key_len, self.heads, self.head_dim)
        queries = query.reshape(N, query_len, self.heads, self.head_dim)
        
        # energy shape: (N, heads, query_len, key_len) table with attention on
        # each word from target to input
        energy = torch.einsum("nqhd,nkhd->nhqk", [queries, keys])
        
        if mask is not None:
            energy = energy.masked_fill(mask == 0, float("-1e20"))
            
        attention = torch.softmax(energy / (self.embed_size ** (1/2)), dim=3)
        # since value_len == key_len i use l for both
        out = torch.einsum("nhql,nlhd->nqhd", [attention, values]).reshape(
            N, query_len, self.heads*self.head_dim,
        ) # flatten last 2 dimensions
        
        out = self.fc_out(out)
        return out
        

class TransformerBlock(nn.Module):
    def __init__(self, embed_size, heads, dropout, forward_expansion):
        super().__init__()
        self.attention = SelfAttention(embed_size, heads)
        self.norm1 = nn.LayerNorm(embed_size)
        self.norm2 = nn.LayerNorm(embed_size)
        
        self.feed_forward = nn.Sequential(
            nn.Linear(embed_size, forward_expansion*embed_size),
            nn.ReLU(),
            nn.Linear(forward_expansion*embed_size, embed_size)
        )
        self.dropout = nn.Dropout(dropout)
        
        
    def forward(self, value, key, query, mask):
        attention = self.attention(value, key, query, mask)
        
        x = self.dropout(self.norm1(attention + query))
        forward = self.feed_forward(x)
        out = self.dropout(self.norm2(forward + x))
        return out
    
class Encoder(nn.Module):
    def __init__(
            self,
            scr_vocab_size,
            embed_size,
            num_layers,
            heads,
            device,
            forward_expansion,
            dropout,
            max_length
    ):
        super().__init__()
        self.embed_size = embed_size
        self.device = device
       # self.word_embedding = nn.Embedding(scr_vocab_size, embed_size)
        self.position_embedding = nn.Embedding(max_length, embed_size)
        
        self.layers = nn.ModuleList(
            [
                TransformerBlock(embed_size, heads, dropout, forward_expansion)
                for _ in range(num_layers)
            ]
        )    
        self.dropout = nn.Dropout(dropout)
        
    def forward(self, x, mask):
        N, seq_length, vocab = x.shape
        positions = torch.arange(0, seq_length).expand(N, seq_length).to(device)
        
        out = self.dropout(x + self.position_embedding(positions))
        # x B, Seq_len, vocab_size
        # pos B, Seq_len, n_embd
        for layer in self.layers:
            # since we are in encoder and values, queries and keys are the same
            out = layer(out, out, out, mask)
            
        return out
    
class DecoderBlock(nn.Module):
    def __init__(self, embed_size, heads, forward_expansion, dropout, device):
        super().__init__()
        self.attention = SelfAttention(embed_size, heads)
        self.norm = nn.LayerNorm(embed_size)
        self.transformer_block = TransformerBlock(
            embed_size, heads, dropout, forward_expansion
        )
        self.dropout = nn.Dropout(dropout)
        
    # valule and key are from encoder
    def forward(self, x, value, key, src_mask, trg_mask):
        attention = self.attention(x, x, x, trg_mask)
        query = self.dropout(self.norm(attention + x))
        out = self.transformer_block(value, key, query, src_mask)
        return out
    
class Decoder(nn.Module):
    def __init__(
            self,
            trg_vocab_size,
            embed_size, 
            num_layers,
            heads,
            forward_expansion,
            dropout,
            device,
            max_length
    ):
        super().__init__()
        self.device = device
        self.word_embedding = nn.Embedding(trg_vocab_size, embed_size)
        self.position_embedding = nn.Embedding(max_length, embed_size)
        
        self.layers = nn.ModuleList(
            [
                DecoderBlock(embed_size, heads, forward_expansion, dropout, device)
                for _ in range(num_layers)
            ]
        )
        
        self.fc_out = nn.Linear(embed_size, trg_vocab_size)
        self.dropout = nn.Dropout(dropout)
        
    def forward(self, x, enc_out, src_mask, trg_mask):
        N, seq_length = x.shape
        positions = torch.arange(0, seq_length).expand(N, seq_length).to(self.device)
        x = self.dropout(self.word_embedding(x) + self.position_embedding(positions))
        
        for layer in self.layers:
            x = layer(x, enc_out, enc_out, src_mask, trg_mask)
        
        out = self.fc_out(x) 
        return out
        
        
class Transformer(nn.Module):
    def __init__(
            self,
            scr_vocab_size,
            trg_vocab_size,
            src_pad_idx,
            trg_pad_idx,
            embed_size=63,
            num_layers=6,
            forward_expansion=4,
            heads=9,
            dropout=0,
            device="cuda",
            max_length=128
    ):
        super().__init__()
        
        self.encoder = Encoder(
            scr_vocab_size,
            embed_size,
            num_layers,
            heads,
            device,
            forward_expansion,
            dropout,
            max_length
        )
        
        self.decoder = Decoder(
            trg_vocab_size,
            embed_size,
            num_layers,
            heads,
            forward_expansion,
            dropout,
            device,
            max_length
        )
        
        self.src_pad_idx = src_pad_idx
        self.trg_pad_idx = trg_pad_idx
        self.device = device
        
    def make_src_mask(self, src):
        # (N, 1, 1, src_length)
        # src_mask = (src != self.src_pad_idx).unsqueeze(1).unsqueeze(2)
        # return src_mask.to(self.device)
        return None
    
    def make_trg_mask(self, trg):
        N, trg_length = trg.shape
        trg_mask = torch.tril(torch.ones((trg_length, trg_length))).expand(
            N, 1, trg_length, trg_length
        )
        return trg_mask.to(self.device)
    
    def forward(self, src, trg):
        src_mask = self.make_src_mask(src)
        trg_mask = self.make_trg_mask(trg)
        enc_src = self.encoder(src, src_mask)
        out = self.decoder(trg, enc_src, src_mask, trg_mask)
        return out
    

In [12]:
device = "cuda" if torch.cuda.is_available() else "cpu"

In [13]:
class Model(nn.Module):
    def __init__(self, src_vocab_size, trg_vocab_size, src_pad_idx, trg_pad_idx, device=device):
        super().__init__()
        self.cnn = SignRecognition(128)
        self.transformer = Transformer(src_vocab_size, trg_vocab_size, src_pad_idx, trg_pad_idx, device=device)

    def forward(self, x, decoder_input_ids):
        x = self.cnn(x) # [B, T, Vocab_size]
        x = self.transformer(x, decoder_input_ids)
        return x
        

In [14]:
src_pad_idx = 0
trg_pad_idx = 0
src_vocab_size = 63
trg_vocab_size = 63

In [15]:
model = Model(src_vocab_size, trg_vocab_size, src_pad_idx, trg_pad_idx).to(device)
model

Model(
  (cnn): SignRecognition(
    (conv1): Conv2d(128, 32, kernel_size=(2, 2), stride=(2, 2))
    (bn1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (lin1): Linear(in_features=41, out_features=128, bias=True)
    (lin2): Linear(in_features=128, out_features=63, bias=True)
    (gelu): GELU(approximate='none')
    (softmax): Softmax(dim=-1)
  )
  (transformer): Transformer(
    (encoder): Encoder(
      (position_embedding): Embedding(128, 63)
      (layers): ModuleList(
        (0-5): 6 x TransformerBlock(
          (attention): SelfAttention(
            (values): Linear(in_features=63, out_features=63, bias=False)
            (keys): Linear(in_features=63, out_features=63, bias=False)
            (queries): Linear(in_features=63, out_features=63, bias=False)
            (fc_out): Linear(in_features=63, out_features=63, bias=True)
          )
          (norm1): LayerNorm((63,), eps=1e-05, elementwise_affine=True)
          (norm2): LayerNorm((

In [16]:
class Tokenizer:
    def __init__(self):
        pass
    def decode(self, out: torch.tensor) -> str:
        """
        decode output of model into text
        """
        text = []
        for x in out:
            token = x.item()
            if token == PAD_TOKEN:
                continue # what if pad token will be in the middle of sentence
            text.append(ORD2CHAR[token])
        return ''.join(text)
    
    def encode(self, string: str) -> torch.tensor:
        """
        encode string into vocab size space so that i can put it as my target while training
        """
        out = []
        for symbol in string:
            out.append(CHAR2ORD[symbol])
        
        return torch.tensor(out)        


In [17]:
tokenizer = Tokenizer()

# Preprocess targets

In [18]:
X_train = torch.from_numpy(X_train)
y_train = torch.from_numpy(y_train).type(torch.LongTensor)
if USE_VAL:
    X_val = torch.from_numpy(X_val)
    y_val = torch.from_numpy(y_val).type(torch.LongTensor)

In [19]:
y_train[0:4]

tensor([[18,  0, 34, 49, 36, 36, 42, 39, 46, 52, 50, 36, 61, 59, 59, 59, 59, 59,
         59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59],
        [39, 36, 45, 51, 32, 40, 39, 52, 33, 50, 13, 34, 46, 44, 61, 59, 59, 59,
         59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59],
        [16, 18, 23, 18,  0, 54, 40, 43, 43, 40, 32, 44,  0, 43, 32, 45, 40, 36,
         49, 61, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59],
        [24, 23, 23,  0, 37, 49, 32, 45, 42, 43, 40, 45,  0, 43, 32, 45, 36, 61,
         59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59]])

In [20]:
X_train[4:8, 20:30, :10]

tensor([[[0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]],

        [[0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0.,

In [21]:
num_examples, frames, features = X_train.shape
X_train = X_train.view(num_examples, frames, features // 2, 2)
X_train.shape

torch.Size([54719, 128, 82, 2])

In [22]:
if USE_VAL:
    num_examples, frames, features = X_val.shape
    X_val = X_val.view(num_examples, frames, features // 2, 2)
    X_val.shape

In [23]:
y_train = y_train.type(torch.LongTensor)
if USE_VAL:
    y_val = y_val.type(torch.LongTensor)

In [24]:
decoder_input_ids_train = torch.zeros_like(y_train)
decoder_input_ids_train.shape

torch.Size([54719, 32])

In [25]:
for i in range(len(y_train)):
    decoder_input_ids_train[i] = torch.concat((torch.tensor([SOS_TOKEN]), y_train[i][:-1]))

In [26]:
if USE_VAL:
    decoder_input_ids_val = torch.zeros_like(y_val)
    decoder_input_ids_val.shape

In [27]:
if USE_VAL:
    for i in range(len(y_val)):
        decoder_input_ids_val[i] = torch.concat((torch.tensor([SOS_TOKEN]), y_val[i][:-1]))

In [28]:
decoder_input_ids_train = decoder_input_ids_train.type(torch.LongTensor)
if USE_VAL:
    decoder_input_ids_val = decoder_input_ids_val.type(torch.LongTensor)

# Creating Dataloader

In [29]:
from torch.utils.data import DataLoader


In [30]:
class CustomDataset(torch.utils.data.Dataset):
    def __init__(self, data, labels, decoder_input_ids):
        self.data = data
        self.labels = labels
        self.decoder_input_ids = decoder_input_ids

    def __len__(self):
        return len(self.data)

    def __getitem__(self, index):
        x = self.data[index]
        y = self.labels[index]
        decoder_input_ids = self.decoder_input_ids[index]
        return x, y, decoder_input_ids


In [31]:
train_dataset = CustomDataset(X_train, y_train, decoder_input_ids_train)
train_dataloader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=NUM_WORKERS, drop_last=True)

In [32]:
val_dataset = CustomDataset(X_val, y_val, decoder_input_ids_val)
val_dataloader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=NUM_WORKERS, drop_last=True)

# Levenshtein distance

In [33]:
import time

In [34]:
def encode_target(target: torch.tensor):
    answer = []
    for x in target:
        if x != PAD_TOKEN:
            answer.append(ORD2CHAR[x.item()])
    return "".join(answer)


In [35]:
def metric(pred: torch.tensor, target:torch.tensor):
    # shouldn't count pad token 
    # if i would use it in model
    # since levenshtein distance is not linear i'll return N and D of each batch and then sum them
    D = 0
    N = 0
    for i in range(len(pred)):# through batches
        p = encode(pred[i])
        t = encode_target(target[i])
        distance = Levenshtein.distance(p, t)
        
        D += distance
        N += len(p) + len(t)
    
    return N, D


In [36]:
# # testing metric
# for idx, (x, y) in enumerate(train_dataloader):
#     out = model(x)
#     print(metric(out, y))
#     if idx > 5:
#         break

# train parameters(optimizer, loss, etc)

In [36]:
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.AdamW(
    model.parameters(),
    lr = 1e-6
)

# Train loop

In [37]:
# for g in optimizer.param_groups:
#     g['lr'] = 1e-3

overfitting on single example

real training starts

In [37]:
epoch_losses = []
for epoch in tqdm(range(NUM_EPOCHS * 10)):
    total_loss = 0
    # loop = tqdm(train_dataloader, leave=False)
    #train_NDs = [] # array with pairs (N, D) 
    for (x, y, decoder_input_ids) in tqdm(train_dataloader, leave=False):
        x = x.to(device)
        y = y.to(device)
        decoder_input_ids = decoder_input_ids.to(device)

        out = model(x, decoder_input_ids)
        #print(out.shape)        
        optimizer.zero_grad()
        loss = loss_fn(out.view(-1, N_UNIQUE_CHARACTERS), y.view(-1))
        loss.backward()
        optimizer.step()

        total_loss += loss.item()
        # if epoch % 5 == 0:
        #     train_NDs.append(metric(out, y))
    
    avg_loss = total_loss / len(train_dataloader)
    print(f"Epoch [{epoch+1}/{NUM_EPOCHS}] Average Loss: {avg_loss}")
        


        # validation score 
    # CHANGE NAME FOR LOOP!!!!!!!!!!!!!!!!!!!!!
        # loop = tqdm(val_dataloader, leave=False)
        # for (x, y) in loop:
        #     x = x.to(device)
        #     y = y.to(device)
        #     with torch.no_grad():
        #         out = model(x)
        #     loss = loss_fn(out.view(BATCH_SIZE, -1, 31), y)
        #     val_losses.append(loss.item())
        
        # print(f"Validation loss on epoch №{epoch + 1} = {sum(val_losses) / len(val_losses)}\nbtw train loss = {mean_loss}")
        
        
    #val_losses = []
    # if epoch % 5 == 0:
    #     # compute levenshtein distance on validation data
    #     loop = tqdm(val_dataloader, leave=False)
    #     val_NDs = []
    #     for (x,y) in loop:
    #         x = x.to(device)
    #         y = y.to(device)
    #         with torch.no_grad():
    #             out = model(x)
    #         val_NDs.append(metric(out, y))
            
    #     train_distance = 1 - sum(x[1] for x in train_NDs) / sum(x[0] for x in train_NDs)
    #     val_distance = 1 - sum(x[1] for x in val_NDs) / sum(x[0] for x in val_NDs)
        
        
        
    #     print(f"epoch№ {epoch + 1}\ntrain_distance = {train_distance}\nval_distance = {val_distance}\n\n")
            
        


ImportError: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html

# inference

In [55]:
#model.load_state_dict(torch.load("../models/first_model.pth.tar", map_location=torch.device(device)))

In [36]:
#model.eval()

In [45]:
class ModelForInference(nn.Module):
    def __init__(self, model):
        super().__init__()
        self.model = model
        self.model.eval()

    def forward(self, x):
        if len(x.shape) == 3:
            x = x.unsquuze(0)
        trg = torch.full((x.shape[0], 1), SOS_TOKEN, dtype=torch.long).to(device)
        for _ in range(MAX_PHRASE_LENGTH):
            with torch.no_grad():
                out = self.model(x, trg)[:, -1, :]
            token = out.argmax(-1).unsqueeze(-1)
            trg = torch.cat((trg, token), -1)
        
            if torch.all(token == EOS_TOKEN):
                break    
    
        return trg

    def postprocess(self, x):
        pass

In [46]:
inference = ModelForInference(model).to(device)


In [47]:
x = X_train[0:BATCH_SIZE]
out = inference(x)
out.shape

torch.Size([64, 33])

In [48]:
out

tensor([[60, 23, 31,  ...,  7, 31, 44],
        [60, 23, 31,  ...,  7, 31, 44],
        [60, 23, 31,  ...,  7, 31, 44],
        ...,
        [60, 23, 31,  ...,  7, 31, 44],
        [60, 23, 31,  ...,  7, 31, 44],
        [60, 23, 31,  ...,  7, 31, 44]])

In [41]:
import onnx
import tensorflow as tf
import onnx_tf

2023-08-12 00:56:21.791344: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2023-08-12 00:56:21.842016: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2023-08-12 00:56:21.842887: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.

TensorFlow Addons (TFA) has ended development and introduction of new features.
TFA has entered a minimal maintenance and release mode until a planned end of life in May 2024.
Please modify downstream libraries to take dependencies from other repositories in our TensorFlow community (e.g. Keras, Keras-CV, and Keras-NLP). 

For more information see: https://github.com/tensorflow/addons/issues/2807 



In [44]:
input_shape = (1, 128, 82, 2)
dummy_input = torch.randn(input_shape)
onnx_model_path = '../models/my_transformer.onnx'
torch.onnx.export(inference, dummy_input, onnx_model_path, verbose=False)

  if torch.all(token == EOS_TOKEN):


verbose: False, log level: Level.ERROR



In [49]:
onnx_model = onnx.load(onnx_model_path)

# Convert the ONNX model to TensorFlow format
tf_model_path = 'my_transformer.pb'
tf_rep = onnx_tf.backend.prepare(onnx_model)
tf_rep.export_graph(tf_model_path)

# Convert the TensorFlow model to TensorFlow Lite format
converter = tf.compat.v1.lite.TFLiteConverter.from_saved_model(tf_model_path)
tflite_model = converter.convert()

# Save the TensorFlow Lite model to a file
with open('my_transformer.tflite', 'wb') as f:
    f.write(tflite_model)



KeyError: in user code:

    File "/home/qklent/programming/machine_learning/alfa_bank_receipts/.venv/lib/python3.11/site-packages/onnx_tf/backend_tf_module.py", line 99, in __call__  *
        output_ops = self.backend._onnx_node_to_tensorflow_op(onnx_node,
    File "/home/qklent/programming/machine_learning/alfa_bank_receipts/.venv/lib/python3.11/site-packages/onnx_tf/backend.py", line 347, in _onnx_node_to_tensorflow_op  *
        return handler.handle(node, tensor_dict=tensor_dict, strict=strict)
    File "/home/qklent/programming/machine_learning/alfa_bank_receipts/.venv/lib/python3.11/site-packages/onnx_tf/handlers/handler.py", line 59, in handle  *
        return ver_handle(node, **kwargs)
    File "/home/qklent/programming/machine_learning/alfa_bank_receipts/.venv/lib/python3.11/site-packages/onnx_tf/handlers/backend/conv.py", line 15, in version_11  *
        return cls.conv(node, kwargs["tensor_dict"])
    File "/home/qklent/programming/machine_learning/alfa_bank_receipts/.venv/lib/python3.11/site-packages/onnx_tf/handlers/backend/conv_mixin.py", line 29, in conv  *
        x = input_dict[node.inputs[0]]

    KeyError: 'input.1'
