In [43]:
import torch
import torch.nn as nn
import torch.nn.functional as F

from torch.optim import Adam
from torch.utils.data import DataLoader, TensorDataset, random_split

import lightning as L

import numpy as np
import pandas as pd
import csv
import os
from torch.utils.data import TensorDataset
import re

In [44]:
class PositionEncoding(nn.Module):
    
    def __init__(self, d_model, max_len):
        
        super(PositionEncoding, self).__init__()
        
        pe = torch.zeros(max_len, d_model)
        
        position = torch.arange(start=0, end=max_len, dtype=torch.float).unsqueeze(1)
        embedding_index = torch.arange(start=0, end=d_model, step=2).float()
        
        div_term = 1/torch.tensor(10000.0)**(embedding_index / d_model)
        

        pe[:, 0::2] = torch.sin(position * div_term) 
        pe[:, 1::2] = torch.cos(position * div_term) 

        pe = pe.unsqueeze(0)
        
        self.register_buffer('pe', pe) 

        
    def forward(self, word_embeddings):
        
        return word_embeddings + self.pe[:word_embeddings.size(0), :]

In [45]:
class BinaryToNum(nn.Module):
    def __init__(self):
        '''
        Input: 64 | Output: 1
        No specific batch size requirement
        '''
        super(BinaryToNum, self).__init__()
        
        self.exponent = nn.Linear(in_features=11, out_features=1, bias=False)
        self.fraction = nn.Linear(in_features=52, out_features=1, bias=False)
        
        with torch.no_grad():
            exponent_powers = torch.tensor([2.0**i for i in range(10, -1, -1)], dtype=torch.float32)
            self.exponent.weight.copy_(exponent_powers.unsqueeze(0))

            fraction_powers = torch.tensor([2.0**(-i) for i in range(1, 53)], dtype=torch.float32)
            self.fraction.weight.copy_(fraction_powers.unsqueeze(0))
        
        for param in self.parameters():
            param.requires_grad = False
  
    def forward(self, input):
        sign_bits = input[:, :1]
        exponent_bits = input[:, 1:12]
        fraction_bits = input[:, 12:]

        sign_value = torch.tensor(-2, requires_grad=False)*sign_bits + 1
        exponent_value = self.exponent(exponent_bits) - 1023
        fraction_value = self.fraction(fraction_bits) + 1.0

        decimal_value = torch.pow(2, exponent_value) * fraction_value
        decimal_value = sign_value * decimal_value

        return decimal_value


In [50]:
class WordToNum(nn.Module):
    def __init__(self, vocab_size, embed_dim, max_seq_len):
        super().__init__()
        self.embedding = nn.Embedding(vocab_size, embed_dim)
        self.positional_encoding = PositionEncoding(embed_dim, max_seq_len)

        self.transformer = nn.TransformerEncoderLayer(d_model=embed_dim, nhead=8, dim_feedforward=512)
        self.encoder = nn.TransformerEncoder(self.transformer, num_layers=2)

        self.bit_layer = nn.Linear(embed_dim, 1)
        self.numeric_layer = BinaryToNum()
        
    def forward(self, tokens):
        embeddings = self.embedding(tokens)
        positional_embeddings = self.positional_encoding(embeddings)

        context_vectors = self.encoder(positional_embeddings)

        bit_probs = self.bit_layer(context_vectors).squeeze()
        numeric_values = self.numeric_layer(bit_probs)

        return numeric_values

In [None]:
def load_data_from_csv(file_path):
    df = pd.read_csv(file_path)
    

In [52]:
vocab_size = 1000
embed_dim = 128
max_seq_len = 64
batch_size = 32

test_model = nn.Embedding(vocab_size, embed_dim)
test_input = torch.randint(0, vocab_size, (batch_size, max_seq_len))
output = test_model(test_input)
print(output.shape)

test_model = PositionEncoding(embed_dim, max_seq_len)
output = test_model(output)
print(output.shape)

transformer = nn.TransformerEncoderLayer(d_model=embed_dim, nhead=8, dim_feedforward=512, batch_first=True)
test_model = nn.TransformerEncoder(transformer, num_layers=2)
output = test_model(output)
print(output.shape)

test_model = nn.Linear(embed_dim, 1)
output = test_model(output).squeeze()
print(output.shape)

test_model = BinaryToNum()
output = test_model(output)
print(output.shape)

test_model = WordToNum(vocab_size, embed_dim, max_seq_len)
output = test_model(test_input)
print(output.shape)

torch.Size([32, 64, 128])
torch.Size([32, 64, 128])
torch.Size([32, 64, 128])
torch.Size([32, 64])
torch.Size([32, 1])
torch.Size([32, 1])


