In [1]:
import torch

In [2]:
from transformers import AutoTokenizer
tokenizer = AutoTokenizer.from_pretrained("./persianTokenizer")
pecial_tokens_dict = {'additional_special_tokens': ["[<year>]","[<month>]","[<day>]"]}
tokenizer.add_special_tokens(pecial_tokens_dict)

  from .autonotebook import tqdm as notebook_tqdm


3

In [3]:
from huggingface_hub import PyTorchModelHubMixin

class Transformer(torch.nn.Module,PyTorchModelHubMixin):
    def __init__(self, input_dim, model_dim, num_heads, num_layers, output_dim, dropout=0.1,device="cuda"):
        super(Transformer, self).__init__()

        self.embedding = torch.nn.Embedding(output_dim, model_dim)
        self.positional_encoding = torch.nn.Embedding(input_dim, model_dim)
        self.en = torch.nn.TransformerEncoder(
            torch.nn.TransformerEncoderLayer(d_model=model_dim,nhead=num_heads,dim_feedforward= model_dim * 2,
                                         activation = torch.nn.functional.gelu,
                                         batch_first=True,bias=False,dropout=0.1,device=device),
                                           num_layers=num_layers,enable_nested_tensor=False)
        self.fc_train = torch.nn.Linear(model_dim, output_dim)


    def forward(self, x):
        batch_size, seq_len = x.shape
        tok_embed = self.embedding(x)
        pos_embed = self.positional_encoding(torch.arange(seq_len, device=x.device))
        x = tok_embed + pos_embed
        x = self.en(x)
        
        return self.fc_train(x)

    
    

In [4]:
DATE_CONFIG = {
    "vocab_size" : 25003,
    "context_length" : 32,
    "emb_dim" : 256,
    "n_heads" : 4,
    "n_layers" : 4,
    "drop_rate" : 0.1
}


In [6]:
model = Transformer(
    input_dim=DATE_CONFIG["context_length"],
    model_dim=DATE_CONFIG["emb_dim"],
    num_heads=DATE_CONFIG["n_heads"],
    num_layers=DATE_CONFIG["n_layers"],
    output_dim= DATE_CONFIG["vocab_size"],
)
model.to("cuda")

Transformer(
  (embedding): Embedding(25003, 256)
  (positional_encoding): Embedding(32, 256)
  (en): TransformerEncoder(
    (layers): ModuleList(
      (0-3): 4 x TransformerEncoderLayer(
        (self_attn): MultiheadAttention(
          (out_proj): NonDynamicallyQuantizableLinear(in_features=256, out_features=256, bias=False)
        )
        (linear1): Linear(in_features=256, out_features=512, bias=False)
        (dropout): Dropout(p=0.1, inplace=False)
        (linear2): Linear(in_features=512, out_features=256, bias=False)
        (norm1): LayerNorm((256,), eps=1e-05, elementwise_affine=True)
        (norm2): LayerNorm((256,), eps=1e-05, elementwise_affine=True)
        (dropout1): Dropout(p=0.1, inplace=False)
        (dropout2): Dropout(p=0.1, inplace=False)
      )
    )
  )
  (fc_train): Linear(in_features=256, out_features=25003, bias=True)
)

In [7]:
model.load_state_dict(torch.load("bertV12.pth"))
model.to("cuda")

  model.load_state_dict(torch.load("bertV12.pth"))


Transformer(
  (embedding): Embedding(25003, 256)
  (positional_encoding): Embedding(32, 256)
  (en): TransformerEncoder(
    (layers): ModuleList(
      (0-3): 4 x TransformerEncoderLayer(
        (self_attn): MultiheadAttention(
          (out_proj): NonDynamicallyQuantizableLinear(in_features=256, out_features=256, bias=False)
        )
        (linear1): Linear(in_features=256, out_features=512, bias=False)
        (dropout): Dropout(p=0.1, inplace=False)
        (linear2): Linear(in_features=512, out_features=256, bias=False)
        (norm1): LayerNorm((256,), eps=1e-05, elementwise_affine=True)
        (norm2): LayerNorm((256,), eps=1e-05, elementwise_affine=True)
        (dropout1): Dropout(p=0.1, inplace=False)
        (dropout2): Dropout(p=0.1, inplace=False)
      )
    )
  )
  (fc_train): Linear(in_features=256, out_features=25003, bias=True)
)

In [8]:
total_params = sum(p.numel() for p in model.parameters())
print(f"Total number of parameters: {total_params:,}")

Total number of parameters: 14,933,931


In [9]:
def text_to_token_ids(text, tokenizer):
    encoded = tokenizer.encode(text)
    encoded_tensor = torch.tensor(encoded).unsqueeze(0)  # add batch dimension
    return encoded_tensor


def token_ids_to_text(token_ids, tokenizer):
    flat = token_ids.squeeze(0)  # remove batch dimension
    return tokenizer.decode(flat.tolist())

In [10]:
def predict_masked(model,tokenizer,input,deivce):
    model.eval()
    inputs_masked = input + " " + "[MASK][MASK][MASK][MASK]-[MASK][MASK]-[MASK][MASK]"
    input_ids = tokenizer.encode(inputs_masked)
    input_ids = torch.tensor(input_ids).to(deivce)
    with torch.no_grad():
        logits = model(input_ids.unsqueeze(0))
        logits = logits.flatten(0, 1)
        probs = torch.argmax(logits,dim=-1,keepdim=True)
        token_ids = probs.squeeze(1)
        answer_ids = token_ids[-11:-1]
    return token_ids_to_text(answer_ids,tokenizer)

In [11]:
predict_masked(model,tokenizer,"12 آبان 1402","cuda")

'1402-08-12'

In [11]:
torch.save(model.state_dict(),"bertV12.pth")

In [12]:
model.push_to_hub("BERT-Text2Date")

model.safetensors: 100%|██████████| 59.7M/59.7M [05:35<00:00, 178kB/s]   


CommitInfo(commit_url='https://huggingface.co/Parssky/BERT-Text2Date/commit/36fb1a2a38da9b8dff7767902ae5b843182c1983', commit_message='Push model using huggingface_hub.', commit_description='', oid='36fb1a2a38da9b8dff7767902ae5b843182c1983', pr_url=None, pr_revision=None, pr_num=None)

In [13]:
tokenizer.push_to_hub("BERT-Text2Date")

CommitInfo(commit_url='https://huggingface.co/Parssky/BERT-Text2Date/commit/0a92ec5d2867dada01862f2601f15cb740753b34', commit_message='Upload tokenizer', commit_description='', oid='0a92ec5d2867dada01862f2601f15cb740753b34', pr_url=None, pr_revision=None, pr_num=None)