# 모델 & 데이터 로딩

In [101]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.utils.data as data
from torch.nn import Transformer
import torch.nn.functional as F
import math
import random
import numpy as np

class PositionalEncoding(nn.Module):
    def __init__(self, d_model, max_len=5000):
        super(PositionalEncoding, self).__init__()
        pe = torch.zeros(max_len, d_model)
        position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1)
        div_term = torch.exp(torch.arange(0, d_model, 2).float() * (-math.log(10000.0) / d_model))
        pe[:, 0::2] = torch.sin(position * div_term)
        pe[:, 1::2] = torch.cos(position * div_term)
        pe = pe.unsqueeze(0)  # Shape: (1, max_len, d_model)
        self.register_buffer('pe', pe)

    def forward(self, x):
        # x shape: (batch_size, seq_len, d_model)
        x = x + self.pe[:, :x.size(1), :]  # Broadcasting positional encodings
        return x  # Shape: (batch_size, seq_len, d_model)
    
class NormEncoding(nn.Module):
    def __init__(self, d_model, max_len=5000):
        super(NormEncoding, self).__init__()
        pe = torch.zeros(max_len, d_model)
        position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1)
        div_term = torch.exp(torch.arange(0, d_model, 2).float() * (-math.log(10000.0) / d_model))
        pe[:, 0::2] = torch.sin(position * div_term)
        pe[:, 1::2] = torch.cos(position * div_term)
        pe = pe.unsqueeze(0)  # Shape: (1, max_len, d_model)
        self.register_buffer('pe', pe)

    def forward(self, x, lengths):
        # x shape: (batch_size, seq_len, d_model)
        x = x + self.pe[:, :x.size(1), :]  # Broadcasting positional encodings
        return x  # Shape: (batch_size, seq_len, d_model)


class MultiHeadAttention(nn.Module):
    def __init__(self, d_model, num_heads):
        super(MultiHeadAttention, self).__init__()
        assert d_model % num_heads == 0  # d_model must be divisible by num_heads
        self.d_k = d_model // num_heads
        self.num_heads = num_heads
        self.linears = nn.ModuleList([nn.Linear(d_model, d_model) for _ in range(4)])  # Q, K, V, and output projections

    def forward(self, query, key, value, mask=None):
        # query, key, value shape: (batch_size, seq_len, d_model)
        batch_size = query.size(0)

        # Linear projections
        query, key, value = [l(x).view(batch_size, -1, self.num_heads, self.d_k).transpose(1, 2)
                             for l, x in zip(self.linears, (query, key, value))]
        # After projection and reshaping: (batch_size, num_heads, seq_len, d_k)

        # Scaled dot-product attention
        scores = torch.matmul(query, key.transpose(-2, -1)) / math.sqrt(self.d_k)
        if mask is not None:
            scores = scores.masked_fill(mask == 0, -1e9)
        attn = F.softmax(scores, dim=-1)
        attn_output = torch.matmul(attn, value)
        # attn_output shape: (batch_size, num_heads, seq_len, d_k)

        # Concatenate heads and apply final linear projection
        attn_output = attn_output.transpose(1, 2).contiguous().view(batch_size, -1, self.num_heads * self.d_k)
        return self.linears[-1](attn_output)  # Shape: (batch_size, seq_len, d_model)

class FeedForward(nn.Module):
    def __init__(self, d_model, d_ff):
        super(FeedForward, self).__init__()
        self.linear1 = nn.Linear(d_model, d_ff)
        self.linear2 = nn.Linear(d_ff, d_model)

    def forward(self, x):
        # x shape: (batch_size, seq_len, d_model)
        return self.linear2(F.relu(self.linear1(x)))  # Shape: (batch_size, seq_len, d_model)
    
class EncoderLayer(nn.Module):
    def __init__(self, d_model, num_heads, d_ff, dropout=0.1):
        super(EncoderLayer, self).__init__()
        self.self_attn = MultiHeadAttention(d_model, num_heads)
        self.feed_forward = FeedForward(d_model, d_ff)
        self.layernorm1 = nn.LayerNorm(d_model)
        self.layernorm2 = nn.LayerNorm(d_model)
        self.dropout = nn.Dropout(dropout)

    def forward(self, x, mask=None):
        # x shape: (batch_size, seq_len, d_model)
        attn_output = self.self_attn(x, x, x, mask)  # Self-attention
        x = x + self.dropout(attn_output)  # Add & Norm
        x = self.layernorm1(x)

        ff_output = self.feed_forward(x)  # Feed forward
        x = x + self.dropout(ff_output)  # Add & Norm
        return self.layernorm2(x)  # Shape: (batch_size, seq_len, d_model)
    
class DecoderLayer(nn.Module):
    def __init__(self, d_model, num_heads, d_ff, dropout=0.1):
        super(DecoderLayer, self).__init__()
        self.self_attn = MultiHeadAttention(d_model, num_heads)
        self.cross_attn = MultiHeadAttention(d_model, num_heads)
        self.feed_forward = FeedForward(d_model, d_ff)
        self.layernorm1 = nn.LayerNorm(d_model)
        self.layernorm2 = nn.LayerNorm(d_model)
        self.layernorm3 = nn.LayerNorm(d_model)
        self.dropout = nn.Dropout(dropout)

    def forward(self, x, memory, src_mask=None, tgt_mask=None):
        # x shape: (batch_size, tgt_len, d_model)
        # memory shape: (batch_size, src_len, d_model)

        attn_output = self.self_attn(x, x, x, tgt_mask)  # Self-attention
        x = x + self.dropout(attn_output)  # Add & Norm
        x = self.layernorm1(x)

        attn_output = self.cross_attn(x, memory, memory, src_mask)  # Cross-attention
        x = x + self.dropout(attn_output)  # Add & Norm
        x = self.layernorm2(x)

        ff_output = self.feed_forward(x)  # Feed forward
        x = x + self.dropout(ff_output)  # Add & Norm
        return self.layernorm3(x)  # Shape: (batch_size, tgt_len, d_model)
    
class Encoder(nn.Module):
    def __init__(self, d_model, num_heads, d_ff, num_layers, vocab_size, max_len, dropout=0.1):
        super(Encoder, self).__init__()
        self.embedding = nn.Embedding(vocab_size, d_model)
        self.positional_encoding = PositionalEncoding(d_model, max_len)
        self.layers = nn.ModuleList([EncoderLayer(d_model, num_heads, d_ff, dropout) for _ in range(num_layers)])
        self.dropout = nn.Dropout(dropout)

    def forward(self, src, src_mask=None):
        # src shape: (batch_size, src_len)
        x = self.embedding(src)  # Embedding
        # x shape: (batch_size, src_len, d_model)
        x = self.positional_encoding(x)
        x = self.dropout(x)
        for layer in self.layers:
            x = layer(x, src_mask)
        return x  # Shape: (batch_size, src_len, d_model)
    
class Decoder(nn.Module):
    def __init__(self, d_model, num_heads, d_ff, num_layers, vocab_size, max_len, dropout=0.1):
        super(Decoder, self).__init__()
        self.embedding = nn.Embedding(vocab_size, d_model)
        self.positional_encoding = PositionalEncoding(d_model, max_len)
        self.layers = nn.ModuleList([DecoderLayer(d_model, num_heads, d_ff, dropout) for _ in range(num_layers)])
        self.dropout = nn.Dropout(dropout)

    def forward(self, tgt, memory, src_mask=None, tgt_mask=None):
        # tgt shape: (batch_size, tgt_len)
        x = self.embedding(tgt)  # Embedding
        # x shape: (batch_size, tgt_len, d_model)
        x = self.positional_encoding(x)
        x = self.dropout(x)
        for layer in self.layers:
            x = layer(x, memory, src_mask, tgt_mask)
        return x  # Shape: (batch_size, tgt_len, d_model)
    
class InstDecoder(nn.Module):
    def __init__(self, d_model, num_heads, d_ff, num_layers, vocab_size, max_len, dropout=0.1):
        super(InstDecoder, self).__init__()
        self.embedding = nn.Embedding(vocab_size, d_model)
        self.onehot_to_dmodel = nn.Linear(vocab_size, d_model)
        self.positional_encoding = PositionalEncoding(d_model, max_len)
        self.layers = nn.ModuleList([DecoderLayer(d_model, num_heads, d_ff, dropout) for _ in range(num_layers)])
        self.dropout = nn.Dropout(dropout)

    def forward(self, tgt, memory, src_mask=None, tgt_mask=None):
        # tgt shape: (batch_size, tgt_len, n_class)
        x = self.onehot_to_dmodel(tgt)  # Embedding
        # x shape: (batch_size, tgt_len, n_class, d_model)
        x = self.positional_encoding(x)
        x = self.dropout(x)
        for layer in self.layers:
            x = layer(x, memory, src_mask, tgt_mask)
        return x  # Shape: (batch_size, tgt_len, d_model)
    
class Transformer(nn.Module):
    def __init__(self, src_vocab_size, tgt_vocab_size, d_model=512, num_heads=8, d_ff=2048, num_layers=6, max_len=5000, dropout=0.1):
        super(Transformer, self).__init__()
        self.encoder = Encoder(d_model, num_heads, d_ff, num_layers, src_vocab_size, max_len, dropout)
        self.decoder = Decoder(d_model, num_heads, d_ff, num_layers, tgt_vocab_size, max_len, dropout)
        self.out = nn.Linear(d_model, tgt_vocab_size)
        
    def generate_src_mask(self, src):
        # src shape: (batch_size, src_len)
        return (src != 0).unsqueeze(1).unsqueeze(2)  # Shape: (batch_size, 1, 1, src_len)

    def generate_tgt_mask(self, tgt):
        # tgt shape: (batch_size, tgt_len)
        tgt_len = tgt.size(1)
        tgt_mask = (tgt != 0).unsqueeze(1).unsqueeze(2)  # Padding mask: (batch_size, 1, 1, tgt_len)
        nopeak_mask = torch.tril(torch.ones((tgt_len, tgt_len), device=tgt.device)).bool()  # Look-ahead mask
        tgt_mask = tgt_mask & nopeak_mask.unsqueeze(0)  # Combined mask: (batch_size, 1, tgt_len, tgt_len)
        return tgt_mask

    def forward(self, src, tgt, src_mask=None, tgt_mask=None):
        # src shape: (batch_size, src_len)
        # tgt shape: (batch_size, tgt_len)
        
        src_mask = self.generate_src_mask(src)  # Generate source mask
        tgt_mask = self.generate_tgt_mask(tgt)  # Generate target mask

        memory = self.encoder(src, src_mask)  # Encoder output
        # memory shape: (batch_size, src_len, d_model)

        output = self.decoder(tgt, memory, src_mask, tgt_mask)  # Decoder output
        # output shape: (batch_size, tgt_len, d_model)

        return self.out(output)  # Final output projection, shape: (batch_size, tgt_len, tgt_vocab_size)
    
    
class InstEncoder(nn.Module):
    def __init__(self, d_model, num_heads, d_ff, num_layers, vocab_size, inst_size, max_len, dropout=0.1):
        super(InstEncoder, self).__init__()
        self.embedding = nn.Embedding(vocab_size, d_model)
        self.positional_encoding = PositionalEncoding(d_model, max_len)
        self.layers = nn.ModuleList([EncoderLayer(d_model, num_heads, d_ff, dropout) for _ in range(num_layers)])
        self.dropout = nn.Dropout(dropout)
        self.inst_proj = nn.Linear(d_model, inst_size)
        
    def generate_src_mask(self, src):
        # src shape: (batch_size, src_len)
        return (src != 0).unsqueeze(1).unsqueeze(2)  # Shape: (batch_size, 1, 1, src_len)

    def forward(self, src, src_mask=None):
        # src shape: (batch_size, src_len)
        x = self.embedding(src)  # Embedding
        # x shape: (batch_size, src_len, d_model)
        
        src_mask = self.generate_src_mask(src)  # Generate source mask
        
        x = self.positional_encoding(x)
        x = self.dropout(x)
        for layer in self.layers:
            x = layer(x, src_mask)
        x = self.inst_proj(x)
        return x  # Shape: (batch_size, src_len, inst_size)
    
class InstNoPEEncoder(nn.Module):
    def __init__(self, d_model, num_heads, d_ff, num_layers, vocab_size, inst_size, max_len, dropout=0.1):
        super(InstNoPEEncoder, self).__init__()
        self.embedding = nn.Embedding(vocab_size, d_model)
        self.positional_encoding = PositionalEncoding(d_model, max_len)
        self.layers = nn.ModuleList([EncoderLayer(d_model, num_heads, d_ff, dropout) for _ in range(num_layers)])
        self.dropout = nn.Dropout(dropout)
        self.inst_proj = nn.Linear(d_model, inst_size)
        
    def generate_src_mask(self, src):
        # src shape: (batch_size, src_len)
        return (src != 0).unsqueeze(1).unsqueeze(2)  # Shape: (batch_size, 1, 1, src_len)

    def forward(self, src, src_mask=None):
        # src shape: (batch_size, src_len)
        x = self.embedding(src)  # Embedding
        # x shape: (batch_size, src_len, d_model)
        
        src_mask = self.generate_src_mask(src)  # Generate source mask
        
        # x = self.positional_encoding(x)
        x = self.dropout(x)
        for layer in self.layers:
            x = layer(x, src_mask)
        x = self.inst_proj(x)
        return x  # Shape: (batch_size, src_len, inst_size)

class InstNormEncoder(nn.Module):
    def __init__(self, d_model, num_heads, d_ff, num_layers, vocab_size, inst_size, max_len, dropout=0.1):
        super(InstEncoder, self).__init__()
        self.embedding = nn.Embedding(vocab_size, d_model//2)
        self.norm_pos = nn.Embedding(100, d_model//2)
        self.positional_encoding = PositionalEncoding(d_model, max_len)
        self.layers = nn.ModuleList([EncoderLayer(d_model, num_heads, d_ff, dropout) for _ in range(num_layers)])
        self.dropout = nn.Dropout(dropout)
        self.inst_proj = nn.Linear(d_model, inst_size)
        
    def generate_src_mask(self, src):
        # src shape: (batch_size, src_len)
        return (src != 0).unsqueeze(1).unsqueeze(2)  # Shape: (batch_size, 1, 1, src_len)

    def forward(self, src, src_mask=None):
        # src shape: (batch_size, src_len)
        x = self.embedding(src)  # Embedding
        norm_pos = self.embedding()
        # x shape: (batch_size, src_len, d_model)
        
        src_mask = self.generate_src_mask(src)  # Generate source mask
        
        # x = self.positional_encoding(x)
        x = self.dropout(x)
        for layer in self.layers:
            x = layer(x, src_mask)
        x = self.inst_proj(x)
        return x  # Shape: (batch_size, src_len, inst_size)

class InstTransformer(nn.Module):
    def __init__(self, src_vocab_size, tgt_vocab_size, d_model=512, num_heads=8, d_ff=2048, num_layers=6, max_len=5000, dropout=0.1):
        super(InstTransformer, self).__init__()
        self.encoder = Encoder(d_model, num_heads, d_ff, num_layers, src_vocab_size, max_len, dropout)
        self.decoder = InstDecoder(d_model, num_heads, d_ff, num_layers, tgt_vocab_size, max_len, dropout)
        self.out = nn.Linear(d_model, tgt_vocab_size)
        # self.inst_num = 132
        
    def generate_src_mask(self, src):
        # src shape: (batch_size, src_len)
        return (src != 0).unsqueeze(1).unsqueeze(2)  # Shape: (batch_size, 1, 1, src_len)

    def generate_tgt_mask(self, tgt):
        # tgt shape: (batch_size, tgt_len, n_class)
        tgt = torch.mean(tgt, dim=2)
        tgt_len = tgt.size(1)
        tgt_mask = (tgt != 0).unsqueeze(1).unsqueeze(2)  # Padding mask: (batch_size, 1, 1, tgt_len)
        nopeak_mask = torch.tril(torch.ones((tgt_len, tgt_len), device=tgt.device)).bool()  # Look-ahead mask
        tgt_mask = tgt_mask & nopeak_mask.unsqueeze(0)  # Combined mask: (batch_size, 1, tgt_len, tgt_len)
        return tgt_mask

    def forward(self, src, tgt, src_mask=None, tgt_mask=None):
        # src shape: (batch_size, src_len)
        # tgt shape: (batch_size, tgt_len)
        
        src_mask = self.generate_src_mask(src)  # Generate source mask
        tgt_mask = self.generate_tgt_mask(tgt)  # Generate target mask

        memory = self.encoder(src, src_mask)  # Encoder output
        # memory shape: (batch_size, src_len, d_model)

        output = self.decoder(tgt, memory, src_mask, tgt_mask)  # Decoder output
        # output shape: (batch_size, tgt_len, d_model)

        return self.out(output)  # Final output projection, shape: (batch_size, tgt_len, tgt_vocab_size)
    
    def infer(self, src, tgt, length=1024):
        output = self.forward(src, tgt)

In [102]:
import torch
import json
from tqdm import tqdm
from torch.utils.data import Dataset, DataLoader
from torch.nn.utils.rnn import pad_sequence
from sklearn.model_selection import train_test_split

class InstDataset(Dataset):
    def __init__(self, data):
        super().__init__()
        self.data = data
        inst_vocab_path = '/workspace/pj/data/vocabs/inst.json'
        chord_vocab_path = '/workspace/pj/data/vocabs/chord.json'
        with open(inst_vocab_path, 'r') as file:
            self.inst_vocab = json.load(file)
        with open(chord_vocab_path, 'r') as file:
            self.chord_vocab = json.load(file)

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        text_seq = self.data[idx]
        
        if isinstance(text_seq, str):
            toks = text_seq.split()
            
        l_toks = len(toks)
        ratio = 4
        chord_list = []
        inst_in_measure = []
        inst_list = []
        
        for idx in range(0, l_toks, ratio):
            t1, t2, t3, t4 = toks[idx : idx + 4]
            if t1[0] == 'H':
                chord_list.append(t1)

            if t4[0] == 'x' or t4[0] == 'X' or t4[0] == 'y' or t4 == '<unk>':
                inst_in_measure.append(t4)
                
            if (t1[0] == 'm' or t1[0] == 'M') and len(chord_list) > 0:
                inst_list.append(inst_in_measure)
                inst_in_measure = []
        inst_list.append(inst_in_measure)
        
        chord_tensor = [self.chord_vocab[chd] for chd in chord_list]
        inst_tensor, length = self.convert_inst_to_onehot(inst_list)
        
        target_chord_tensor = [2] + chord_tensor[:766] + [1]
        target_chord_tensor = torch.tensor(target_chord_tensor)
        
        target_inst_tensor = inst_tensor

        return target_chord_tensor, target_inst_tensor, length+2
    
    def convert_inst_to_onehot(self, inst_list):
        base_tensor = torch.zeros(len(inst_list), 133)
        bos_tensor = torch.zeros(1, 133)
        eos_tensor = torch.zeros(1, 133)
        bos_tensor[:,2] = 1
        eos_tensor[:,1] = 1
        
        for idx, inst_in_measure in enumerate(inst_list):
            if len(inst_in_measure) == 0:
                continue
            else:
                for inst in inst_in_measure:
                    base_tensor[idx, self.inst_vocab[inst]] = 1
        inst_tensor = torch.cat((bos_tensor,base_tensor[:766,:],eos_tensor), dim=0)
        return inst_tensor, len(inst_list)
    
def create_dataloaders(batch_size):
    raw_data_path = '../../../workspace/pj/data/corpus/raw_corpus_bpe.txt'
    # raw_data_path = '../../../workspace/data/corpus/first_5_lines_bpe.txt'
    raw_data = []
    with open(raw_data_path, 'r') as f:
        for line in tqdm(f, desc="reading original txt file..."):
            raw_data.append(line.strip())
            
    train, val_test = train_test_split(raw_data, test_size=0.1, random_state=5)
    val, test = train_test_split(val_test, test_size=0.2, random_state=5)
    # train, val_test = train_test_split(raw_data, test_size=0.5, random_state=5)
    # val, test = train_test_split(val_test, test_size=0.2)
    
    train_dataset = InstDataset(train)
    val_dataset = InstDataset(val)
    test_dataset = InstDataset(test)
    
    train_loader = DataLoader(train_dataset, batch_size=batch_size, collate_fn=collate_batch)
    val_loader = DataLoader(val_dataset, batch_size=batch_size, collate_fn=collate_batch)
    test_loader = DataLoader(test_dataset, batch_size=batch_size, collate_fn=collate_batch)

    # return train_loader, True, True
    return train_loader, val_loader, test_loader

def collate_batch(batch):
    chords, insts, length = zip(*batch)
    # padding_value = <eos>
    chord_padded = pad_sequence(chords, padding_value=0, batch_first=True)
    inst_padded = pad_sequence(insts, padding_value=0, batch_first=True)
    return chord_padded, inst_padded, length



# train_loader, val_loader, test_loader = create_dataloaders(2)

# for chord, inst, length in train_loader:
#     print(chord)
#     print(chord.shape)
#     print(inst.shape)
#     print(inst[:,0])
#     print(inst[:,1])
#     print(inst[:,-1])
#     print(length)
#     break

In [105]:
def subset_accuracy(outputs, targets, lengths):
    # Apply sigmoid to outputs to get probabilities
    probs = torch.sigmoid(outputs)  # probs: [batch_size, max_length, num_classes]
    
    # Convert probabilities to binary predictions
    preds = (probs > 0.5).float()  # preds: [batch_size, max_length, num_classes]
    
    # Create a mask based on lengths to ignore padding positions
    mask = torch.zeros_like(preds, dtype=torch.bool)
    
    for idx, length in enumerate(lengths):
        mask[idx, :length, :] = 1  # Only consider up to the length specified for each batch

    ones_tensor = torch.ones_like(targets)
    zeros_tensor = torch.zeros_like(targets)
    # Calculate correct, extra, and lack predictions with masking
    correct = (preds == targets)
    z_z = ((correct == torch.logical_not(targets)) & mask).sum().item()
    o_o = ((correct == targets) & mask).sum().item()
    extra = ((preds == targets + 1) & mask).sum().item()
    lack = ((preds == targets - 1) & mask).sum().item()
    
    # Target - Pred    
    zero_zero = torch.sum(((targets == 0) & (preds == 0)) & mask).item()
    zero_one = torch.sum(((targets == 0) & (preds == 1)) & mask).item()
    one_zero = torch.sum(((targets == 1) & (preds == 0)) & mask).item()
    one_one = torch.sum(((targets == 1) & (preds == 1)) & mask).item()
    
    total_cnt = sum(lengths)
    
    return total_cnt, zero_zero, one_one, zero_one, one_zero

# 모델 데이터 변수 설정

In [112]:
name = f'Proj_2'
model = InstEncoder(d_model=512, num_heads=8, d_ff=2048, num_layers=6, vocab_size=150, inst_size=133, max_len=1024)
model.load_state_dict(torch.load('/workspace/pj/out/inst/Test/model_135_0.239615_0.147177.pt'))
train_loader, val_loader, test_loader = create_dataloaders(2)


reading original txt file...: 46188it [00:15, 3069.33it/s]


# 샘플 확인

In [113]:
torch.set_printoptions(profile="full")
for (chords, targets, lengths) in tqdm(train_loader, ncols=60):
    print(chords)
    print(targets)

    outputs = model(chords)
        
    total_cnt, z_z, o_o, extra, lack = subset_accuracy(outputs, targets, lengths)
    print(z_z)
    print(o_o)
    print(extra)
    print(lack)
    break

  0%|                             | 0/20785 [00:00<?, ?it/s]

tensor([[  2,  15,  44,  92,  61,  15,  44,  92,  61,  16,  92,  93,  61,  16,
          92,  93,  62,  15,  21,  92,  61,  15,  21,  92,  61,  16,  92,  92,
          65,  16,  92,  92,  61, 117,  92,  92,  61,  61,  21,  92,  16,  15,
          92,  92,  61,  15,  92,  92,  61,  15,  34,  92,  16,  15,  92,  93,
          61,  16,  37,  92,  84, 117,  37,  92, 116, 117,   7,  92,  61,  16,
          37,   6,  59,  15,  93,  93,  61,  15,  93,  93,  61,  16,  92,  92,
          65,  16,  92,  92,  61,  16,  92,  92,  61,  61, 118,  92,  61,  16,
          36,  92,  61,  16, 122,  92,  61, 100,  34,  92,  61,  16,  36,   7,
          61,  15,  92,  92,  61,  15,  92,  92,  61,  15,  92,  92,  61,  15,
          92,  92,  61,  16,  92,  92,  61,  15,  40,  92,  61,  16,  92,  92,
          61,  15,  93,  92,  61,  16,  36,   6,  16,  16,  36,  93,  62,  16,
          36,  37,  62,   1],
        [  2, 114, 114, 114, 133, 114, 114, 114,  62, 114, 114, 114,  62, 118,
         114, 118, 114

  0%|                             | 0/20785 [00:01<?, ?it/s]

30282
2571
1195
0





In [114]:
print(z_z)
print(o_o)
print(extra)
print(lack)

30282
2571
1195
0


In [115]:
probs = torch.sigmoid(outputs)  # probs: [batch_size, max_length, num_classes]
    
# Convert probabilities to binary predictions
preds = (probs > 0.5).float()

print(preds)

tensor([[[0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
          0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0.,
          0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0.,
          0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 1., 1., 0., 0., 0., 0., 0.,
          0., 1., 1., 0., 0., 0., 1., 1., 0., 1., 0., 0., 0., 0., 0., 0., 0.,
          0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
          0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
          0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1.],
         [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
          0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0.,
          0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0.,
          0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 1., 1., 0., 0., 0., 0., 0.,
          0., 1., 1., 0., 0., 0., 0., 1., 0., 1., 0., 0., 0., 0., 0., 0., 0

In [15]:
pos_weight = torch.ones([64])
print(pos_weight.shape)

torch.Size([64])


In [99]:
import torch

# Example tensors
tensor1 = torch.tensor([0, 1, 1, 0, 0, 1])
tensor2 = torch.tensor([0, 0, 1, 1, 0, 1])

# Calculate pairs
zero_zero = torch.sum((tensor1 == 0) & (tensor2 == 0)).item()
zero_one = torch.sum((tensor1 == 0) & (tensor2 == 1)).item()
one_zero = torch.sum((tensor1 == 1) & (tensor2 == 0)).item()
one_one = torch.sum((tensor1 == 1) & (tensor2 == 1)).item()

# Display the results
print(f"0-0: {zero_zero}")
print(f"0-1: {zero_one}")
print(f"1-0: {one_zero}")
print(f"1-1: {one_one}")


0-0: 2
0-1: 1
1-0: 1
1-1: 2


In [129]:
train_loader, val_loader, test_loader = create_dataloaders(64)


reading original txt file...: 46188it [00:19, 2410.54it/s]


In [130]:
device = torch.device("cuda:4" if torch.cuda.is_available() else "cpu")
total_measure_len = 0
total_inst_sum = torch.zeros([133]).int().to(device)
torch.set_printoptions(profile="full")
for (chords, targets, lengths) in tqdm(test_loader, ncols=60):

    targets = torch.sum(targets.int().to(device), dim=(0,1)).to(device)
    total_inst_sum = targets + total_inst_sum
    total_measure_len += sum(lengths)
    # total_cnt, z_z, o_o, extra, lack = subset_accuracy(outputs, targets, lengths)
print(total_measure_len)
print(total_inst_sum)

  0%|                                | 0/15 [00:00<?, ?it/s]

100%|███████████████████████| 15/15 [01:05<00:00,  4.35s/it]

114287
tensor([    0,   924,   924,     0, 36708,  1263,   397,   567,   905,    98,
         1206,     0,   814, 10657,   332,  6131,  5666,  4297,  3583,    55,
          230,   279,   438,  1548,   277,   635,   724,     0,  2289,  2315,
          587,  2254,   200,  1396,  1729,   157,  4099,  6838,  3694,   416,
          145,   217,   485,   499, 26980, 13975, 20846, 19990,   830,  7076,
         7811, 23216, 40163,   766,   915,   109,  7653,   712,   289,   116,
        39078, 39133, 34916,   777, 44946,  1672,   442,   111,  2366, 22267,
        19255, 15992, 35862,  3166, 35737, 48890, 12766, 51063,  1406,   504,
          121,    40,   274,   221,   827,   837,    57,    66,     0,    68,
            0,   202,   188,   596,   241,    56,    18,    14,   202,     3,
            0,    32,   129,   200,   236,     0,     3,    26,     0,   724,
           74,    69,   128,    36,    28,    10,   189,     0,     0,    90,
          167,   368,     0,    25,     0,     0,     0, 




In [131]:
for (chords, targets, lengths) in tqdm(val_loader, ncols=60):

    targets = torch.sum(targets.int().to(device), dim=(0,1)).to(device)
    total_inst_sum = targets + total_inst_sum
    total_measure_len += sum(lengths)
    # total_cnt, z_z, o_o, extra, lack = subset_accuracy(outputs, targets, lengths)
print(total_measure_len)
print(total_inst_sum)

100%|███████████████████████| 58/58 [04:54<00:00,  5.08s/it]

599763
tensor([     0,   4619,   4619,      0, 194689,   5963,   2904,   3006,   4043,
          1865,   5910,   1144,   4199,  46075,   2347,  30494,  26470,  20009,
         21011,   1575,   2001,   1447,   1673,   8121,   2218,   5168,   3859,
           589,  13026,  10995,   3762,  12184,   2909,   6010,   4936,    800,
         25684,  29864,  18695,   3947,   1035,   1270,   1680,   1725, 133476,
         83210, 112155, 110429,   6010,  35518,  39569, 123554, 209919,   5973,
          6197,   1437,  41111,   3282,   1407,    803, 206607, 194703, 184401,
          3298, 246582,   7145,   2799,   1283,  13321, 111674,  94783,  80731,
        202936,  19339, 194458, 263524,  72578, 274172,   5851,   5062,    262,
           433,   3253,   2275,   3255,   3337,   2120,    159,    176,    982,
           170,   2839,   1002,   2214,    785,   1160,    363,    239,    452,
           334,    696,    212,    259,    738,    527,    124,    301,    151,
           325,   2918,    121,  




In [6]:
import torch
insts = torch.tensor([     1,   4619,   4619,      1, 194689,   5963,   2904,   3006,   4043,
          1865,   5910,   1144,   4199,  46075,   2347,  30494,  26470,  20009,
         21011,   1575,   2001,   1447,   1673,   8121,   2218,   5168,   3859,
           589,  13026,  10995,   3762,  12184,   2909,   6010,   4936,    800,
         25684,  29864,  18695,   3947,   1035,   1270,   1680,   1725, 133476,
         83210, 112155, 110429,   6010,  35518,  39569, 123554, 209919,   5973,
          6197,   1437,  41111,   3282,   1407,    803, 206607, 194703, 184401,
          3298, 246582,   7145,   2799,   1283,  13321, 111674,  94783,  80731,
        202936,  19339, 194458, 263524,  72578, 274172,   5851,   5062,    262,
           433,   3253,   2275,   3255,   3337,   2120,    159,    176,    982,
           170,   2839,   1002,   2214,    785,   1160,    363,    239,    452,
           334,    696,    212,    259,    738,    527,    124,    301,    151,
           325,   2918,    121,    465,    474,    692,    592,     79,   1128,
            24,    720,    164,   1640,   1216,     15,    469,     20,     99,
           235,     76,     21,     96,    131,     28, 221543])

zeros = 599763 - insts
insts = insts.float()
pos = ((zeros/insts)).floor().int()
# print(zeros)
print(pos)
print(pos.shape)

tensor([599762,    128,    128, 599762,      2,     99,    205,    198,    147,
           320,    100,    523,    141,     12,    254,     18,     21,     28,
            27,    379,    298,    413,    357,     72,    269,    115,    154,
          1017,     45,     53,    158,     48,    205,     98,    120,    748,
            22,     19,     31,    150,    578,    471,    356,    346,      3,
             6,      4,      4,     98,     15,     14,      3,      1,     99,
            95,    416,     13,    181,    425,    745,      1,      2,      2,
           180,      1,     82,    213,    466,     44,      4,      5,      6,
             1,     30,      2,      1,      7,      1,    101,    117,   2288,
          1384,    183,    262,    183,    178,    281,   3771,   3406,    609,
          3527,    210,    597,    269,    763,    516,   1651,   2508,   1325,
          1794,    860,   2828,   2314,    811,   1137,   4835,   1991,   3970,
          1844,    204,   4955,   1288, 

In [137]:
for (chords, targets, lengths) in tqdm(train_loader, ncols=60):

    targets = torch.sum(targets.int().to(device), dim=(0,1)).to(device)
    total_inst_sum = targets + total_inst_sum
    total_measure_len += sum(lengths)
    # total_cnt, z_z, o_o, extra, lack = subset_accuracy(outputs, targets, lengths)
print(total_measure_len)
print(total_inst_sum)

100%|█████████████████████| 650/650 [49:32<00:00,  4.57s/it]

6084876
tensor([      0,   46188,   46188,       0, 1965170,   52627,   19953,   27346,
          43178,   12233,   56133,    8498,   46038,  446219,   30396,  316394,
         299141,  201873,  204341,   14132,   17016,   16367,   17421,   88733,
          21916,   45745,   32097,    9057,  121791,  110299,   37992,  132749,
          27637,   47773,   45304,    5699,  251588,  315148,  180803,   28895,
          11680,    9634,   19766,   15624, 1326497,  871168, 1146786, 1073349,
          83884,  355458,  372425, 1268948, 2122342,   81723,   68123,   16416,
         407334,   26430,    9717,    7823, 2069709, 1954102, 1836015,   34275,
        2426194,   74003,   24758,   12064,  130216, 1108359,  931372,  782923,
        2007699,  192456, 1959928, 2628553,  738617, 2753066,   52856,   37749,
           4444,    4338,   26182,   21912,   35167,   34416,   17968,    2895,
           1462,    8413,    1562,   25468,   11404,   14087,    6955,   11182,
           4150,    3995,    571




In [5]:
import torch

insts = torch.tensor([      1,   46188,   46188,       1, 1965170,   52627,   19953,   27346,
          43178,   12233,   56133,    8498,   46038,  446219,   30396,  316394,
         299141,  201873,  204341,   14132,   17016,   16367,   17421,   88733,
          21916,   45745,   32097,    9057,  121791,  110299,   37992,  132749,
          27637,   47773,   45304,    5699,  251588,  315148,  180803,   28895,
          11680,    9634,   19766,   15624, 1326497,  871168, 1146786, 1073349,
          83884,  355458,  372425, 1268948, 2122342,   81723,   68123,   16416,
         407334,   26430,    9717,    7823, 2069709, 1954102, 1836015,   34275,
        2426194,   74003,   24758,   12064,  130216, 1108359,  931372,  782923,
        2007699,  192456, 1959928, 2628553,  738617, 2753066,   52856,   37749,
           4444,    4338,   26182,   21912,   35167,   34416,   17968,    2895,
           1462,    8413,    1562,   25468,   11404,   14087,    6955,   11182,
           4150,    3995,    5718,    4049,   11097,    2869,    3242,    7486,
           6618,    1661,    3252,    2992,    6783,   29266,    2395,    8733,
           4926,    7649,   10733,    1368,   12724,     401,    8195,    2366,
          13414,   11592,    1480,    4097,     260,     756,    2571,     278,
            218,     793,     471,    1569, 2227362])

zeros = 6084876 - insts
insts = insts.float()
pos = torch.ceil((zeros/insts)).int()
# print(zeros)
print(pos)
print(torch.median(pos.float()))
print(torch.mean(pos.float()))
print(pos.shape)

tensor([6084875,     131,     131, 6084875,       3,     115,     304,     222,
            140,     497,     108,     716,     132,      13,     200,      19,
             20,      30,      29,     430,     357,     371,     349,      68,
            277,     133,     189,     671,      49,      55,     160,      45,
            220,     127,     134,    1067,      24,      19,      33,     210,
            520,     631,     307,     389,       4,       6,       5,       5,
             72,      17,      16,       4,       2,      74,      89,     370,
             14,     230,     626,     777,       2,       3,       3,     177,
              2,      82,     245,     504,      46,       5,       6,       7,
              3,      31,       3,       2,       8,       2,     115,     161,
           1369,    1402,     232,     277,     173,     176,     338,    2101,
           4162,     723,    3895,     238,     533,     431,     874,     544,
           1466,    1523,    1064,    15

In [6]:
import torch

# Example tensor
float_tensor = torch.tensor([1.5, 2.3, 3.7])

# Convert to integer tensor with rounding up
int_tensor = torch.ceil(float_tensor).int()

print(int_tensor)
print(float_tensor[:-1])


tensor([2, 3, 4], dtype=torch.int32)
tensor([1.5000, 2.3000])
