In [2]:
import torch
import os

os.chdir("c:/Users/cunn2/OneDrive/DSML/Project/thesis-repo")

from sms.exp1.config_classes import load_config_from_launchplan
from sms.exp1.run_training import build_encoder, build_projector
from sms.exp1.models.siamese import SiameseModel

# config = load_config_from_launchplan("sms/exp1/runs/run_20240926_162652/original_launchplan.yaml")

# encoder = build_encoder(config.model_dump())
# projector = build_projector(config.model_dump())

# model = SiameseModel(encoder, projector)

# print(encoder)
# print(projector)
# print(model)

In [3]:
# bert
from torch import nn
import numpy as np

class TokenAndPositionalEmbeddingLayer(nn.Module):
    def __init__(self, input_dim, emb_dim, max_len):
        super().__init__()
        self.max_len = max_len
        self.emb_dim = emb_dim
        self.input_dim = input_dim
        self.token_emb = nn.Conv1d(self.input_dim, self.emb_dim, 1)
        self.pos_emb = self.positional_encoding(self.max_len, self.emb_dim)

    def get_angles(self, pos, i, emb_dim):
        angle_rates = 1 / np.power(10000, (2 * (i // 2)) / np.float32(emb_dim))
        return pos * angle_rates

    def positional_encoding(self, position, emb_dim):
        angle_rads = self.get_angles(
            np.arange(position)[:, np.newaxis],
            np.arange(emb_dim)[np.newaxis, :],
            emb_dim,
        )

        angle_rads[:, 0::2] = np.sin(angle_rads[:, 0::2])
        angle_rads[:, 1::2] = np.cos(angle_rads[:, 1::2])
        pos_encoding = angle_rads[np.newaxis, ...]
        return torch.tensor(pos_encoding, dtype=torch.float32)

    def forward(self, x):
        seq_len = x.shape[1]
        x = torch.permute(x, (0, 2, 1))
        x = self.token_emb(x)
        x *= torch.sqrt(torch.tensor(self.emb_dim, dtype=torch.float32))
        x = torch.permute(x, (0, 2, 1))
        return x + self.pos_emb.to(x.device)[:, : x.shape[1]]

class BertEncoder(nn.Module):
    def __init__(self, config, input_shape=2, d_latent=64):
        super(BertEncoder, self).__init__()
        self.d_input = input_shape
        self.d_latent = d_latent
        self.d_model = config.get("d_model", 128)
        self.n_layers = config.get("n_layers", 4)

        self.emb = TokenAndPositionalEmbeddingLayer(
            input_dim=self.d_input, emb_dim=self.d_model, max_len=config.get("max_seq_len", 512)
        )

        encoder_layer = nn.TransformerEncoderLayer(
            d_model=self.d_model,
            nhead=config.get("n_heads", 8),
            dim_feedforward=config.get("d_ff", self.d_model * 4),
            dropout=config.get("dropout_rate", 0.1),
            batch_first=True,
        )
        self.transformer_encoder = nn.TransformerEncoder(
            encoder_layer, num_layers=self.n_layers
        )
        self.fc = nn.Linear(self.d_model, self.d_latent)
        self.pool = nn.AdaptiveAvgPool1d(1)

    def forward(self, batch):
        # (assuming input batch has shape [batch_size, padded_seq_length, point_dim])
        # batch_key_padding_mask are all False, so the output is the same as batch. This is because all inputs have the same length.
        batch_key_padding_mask = torch.zeros((batch.shape[0], batch.shape[1])).bool()
        batch_key_padding_mask = batch_key_padding_mask.to(batch.device)
        batch_emb = self.emb(batch)             # (batch_size, padded_seq_length, d_model)
        batch_emb = self.transformer_encoder(
            batch_emb, batch_key_padding_mask=batch_key_padding_mask
        )                                       # (batch_size, padded_seq_length, d_model)
        batch_emb = self.fc(batch_emb)          # (batch_size, padded_seq_length, d_latent)
        batch_emb = torch.permute(batch_emb, (0, 2, 1))  # (batch_size, d_latent, padded_seq_length)
        batch_emb = self.pool(batch_emb)            # (batch_size, d_latent, 1)
        batch_emb = torch.squeeze(batch_emb, dim=2)  # (batch_size, d_latent)

        return batch_emb
    
data = torch.load(r"C:\Users\cunn2\OneDrive\DSML\Project\thesis-repo\data\exp1\train_data.pt")
max_length = max([len(chunk) for chunk in data])
print(max_length)

dumped_lp_config = {
    "encoder": {
        "type": "BertEncoder",
        "params": {
            "config": {
                "d_model": 128,
                "n_layers": 4,
                "n_heads": 8,
                "d_ff": 512,
                "d_expander": 256,
                "dropout_rate": 0.1,
                "max_seq_len": 512
            }
        }
    },
    "dims": {
        "input_shape": 2,
        "d_latent": 64
    },
    "input": {
        "make_relative_pitch": True,
        "normalize_octave": False,
        "piano_roll": False,
        "quantize": False,
        "rest_pitch": -1,
        "steps_per_bar": 32,
        "pad_sequence": True,
        "pad_val": -1000,
        "goal_seq_len": max_length
    }
}

encoder = build_encoder(dumped_lp_config)


36


  data = torch.load(r"C:\Users\cunn2\OneDrive\DSML\Project\thesis-repo\data\exp1\train_data.pt")


In [4]:
encoder

BertEncoder(
  (emb): TokenAndPositionalEmbeddingLayer(
    (token_emb): Conv1d(2, 128, kernel_size=(1,), stride=(1,))
  )
  (transformer_encoder): TransformerEncoder(
    (layers): ModuleList(
      (0-3): 4 x TransformerEncoderLayer(
        (self_attn): MultiheadAttention(
          (out_proj): NonDynamicallyQuantizableLinear(in_features=128, out_features=128, bias=True)
        )
        (linear1): Linear(in_features=128, out_features=512, bias=True)
        (dropout): Dropout(p=0.1, inplace=False)
        (linear2): Linear(in_features=512, out_features=128, bias=True)
        (norm1): LayerNorm((128,), eps=1e-05, elementwise_affine=True)
        (norm2): LayerNorm((128,), eps=1e-05, elementwise_affine=True)
        (dropout1): Dropout(p=0.1, inplace=False)
        (dropout2): Dropout(p=0.1, inplace=False)
      )
    )
  )
  (fc): Linear(in_features=128, out_features=64, bias=True)
  (pool): AdaptiveAvgPool1d(output_size=1)
)

In [5]:
from sms.src.synthetic_data.formatter import InputFormatter

data_ex = data[:10]

formatter = InputFormatter(**dumped_lp_config['input'])
formatted_data_list = [torch.from_numpy(formatter(chunk).astype(np.float32).copy()) for chunk in data_ex]
formatted_data_stacked = torch.stack(formatted_data_list, dim=0) # shape [num_chunks, *input_shape]

In [6]:
encoder(formatted_data_stacked)

tensor([[-0.1694,  0.6252, -1.2926,  0.3294,  0.7557,  0.0592,  0.5362,  0.8396,
         -0.2769,  0.7450, -0.0067,  0.3244,  0.6042,  0.5901,  0.2478,  0.5685,
          0.0820, -0.6198, -0.8019, -0.3445,  1.0504, -0.4134, -0.6586,  0.0849,
          0.4765, -0.1593,  0.8542,  0.3546,  0.7130,  0.0470,  0.3648,  0.6061,
          0.0992,  0.2861, -0.2881,  0.5077,  0.5903,  0.0130, -0.3429, -0.5028,
          0.9624, -0.2409,  0.0261, -0.1905, -0.3776, -0.0946, -0.7997,  0.4746,
         -0.1005, -0.2990,  0.5326,  0.4430,  0.1011, -1.3552, -0.4408, -0.2314,
         -0.8058,  0.0802, -0.3196, -0.3843, -0.7096,  0.5022, -0.1368, -0.1357],
        [-0.2532,  0.4248, -0.8243,  0.2140,  0.5478,  0.1332,  0.4364,  0.5299,
         -0.4181,  0.6948, -0.2095,  0.2311,  0.8965,  0.6979,  0.3164,  0.4584,
         -0.0208, -0.6818, -0.6604, -0.2539,  0.8566, -0.4693, -0.6859,  0.0717,
          0.3832,  0.0359,  0.8730,  0.3418,  0.4909,  0.1239,  0.3962,  0.7210,
          0.3641,  0.3725, 

In [7]:
formatter = InputFormatter(pad_sequence=True)

print(data[0])
print(formatter(data[0]))


[[ 0.2 67. ]
 [ 1.  74. ]
 [ 2.  76. ]
 [ 0.8 74. ]]
[[ 2.0e-01  6.7e+01]
 [ 1.0e+00  7.4e+01]
 [ 2.0e+00  7.6e+01]
 [ 8.0e-01  7.4e+01]
 [-1.0e+03 -1.0e+03]
 [-1.0e+03 -1.0e+03]
 [-1.0e+03 -1.0e+03]
 [-1.0e+03 -1.0e+03]
 [-1.0e+03 -1.0e+03]
 [-1.0e+03 -1.0e+03]
 [-1.0e+03 -1.0e+03]
 [-1.0e+03 -1.0e+03]]


In [8]:
bool(sum(torch.tensor([5,2])))

True

In [11]:
config = load_config_from_launchplan(r"C:\Users\cunn2\OneDrive\DSML\Project\thesis-repo\sms\exp1\launchplans\transformer_rel.yaml")

dumped_lp_config = config.model_dump()
encoder = build_encoder(dumped_lp_config)

data_ex = data[:10]

formatter = InputFormatter(**dumped_lp_config['input'])
formatted_data_list = [torch.from_numpy(formatter(chunk).astype(np.float32).copy()) for chunk in data_ex]
formatted_data_stacked = torch.stack(formatted_data_list, dim=0) # shape [num_chunks, *input_shape]

print(sum(torch.all(formatted_data_stacked == -1000, dim=-1)))
print(bool(torch.sum(torch.all(formatted_data_stacked == -1000, dim=-1))))
print(formatted_data_stacked.shape)

encoder(formatted_data_stacked)


tensor([ 0,  0,  0,  1,  4,  5,  5,  7,  9, 10, 10, 10, 10, 10, 10, 10, 10, 10,
        10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
        10, 10])
True
torch.Size([10, 38, 2])


tensor([[ 0.2980, -0.1535, -0.1493,  ..., -0.2699, -0.0711, -0.3754],
        [ 0.4129,  0.0276, -0.0893,  ..., -0.2565,  0.0164, -0.1155],
        [ 0.1244,  0.1676, -0.2458,  ..., -0.0757, -0.0278, -0.1218],
        ...,
        [ 0.3211, -0.0522, -0.1985,  ..., -0.1279, -0.1014, -0.0584],
        [ 0.3891,  0.1941, -0.0784,  ..., -0.0367,  0.1587, -0.3743],
        [ 0.1002,  0.0830, -0.1873,  ...,  0.1531,  0.1510, -0.0222]],
       grad_fn=<SqueezeBackward1>)