In [1]:
import os
from music21 import converter, instrument, note, chord

def load_abc_files(directory):
    melody_data = []

    # Iterate through all files in the directory
    for filename in os.listdir(directory)[:100]:
        try:
            if filename.endswith('.abc'):
                file_path = os.path.join(directory, filename)
                # Parse the ABC file
                score = converter.parse(file_path)
    
                # Extract notes and durations
                for element in score.flatten().notes:
                    if isinstance(element, note.Note):
                        melody_data.append((str(element.nameWithOctave), str(element.quarterLength)))
                    elif isinstance(element, chord.Chord):
                        # For chords, take the first note and its duration
                        melody_data.append((str(element.notes[0].nameWithOctave), str(element.quarterLength)))
        except:
            continue

    return melody_data

# Load ABC files
abc_directory = 'abc_files'
melody_tokens = load_abc_files(abc_directory)

# Format the data into the desired 1D array
formatted_melody = []
for note, duration in melody_tokens:
    formatted_melody.append(note)
    formatted_melody.append(duration)

In [2]:
import numpy as np
from sklearn.preprocessing import LabelEncoder

# Separate notes and durations
notes = formatted_melody[::2]  # Every second item starting from index 0
durations = formatted_melody[1::2]  # Every second item starting from index 1

# Encode notes and durations
note_encoder = LabelEncoder()
duration_encoder = LabelEncoder()

note_encoder.fit(notes)
duration_encoder.fit(durations)

# Convert to numerical format
encoded_notes = note_encoder.transform(notes)
encoded_durations = duration_encoder.transform(durations)

# Combine into a single array
tokens = np.array([encoded_notes, encoded_durations]).flatten()

print(tokens)

[31 31  6 ...  8 12 11]


In [3]:
seq_length = 10  # Length of each input sequence
sequences = []
for i in range(len(tokens) - seq_length):
    sequences.append(tokens[i:i + seq_length + 1])  # +1 for the target

sequences = np.array(sequences)

print(sequences)

[[31 31  6 ... 19  6 32]
 [31  6 19 ...  6 32 40]
 [ 6 19  6 ... 32 40 28]
 ...
 [ 8 12  2 ...  2  0  8]
 [12  2  0 ...  0  8 12]
 [ 2  0  0 ...  8 12 11]]


In [8]:
import torch
import torch.nn as nn
import torch.optim as optim

class MelodyTransformer(nn.Module):
    def __init__(self, n_tokens, d_model=32, n_heads=1, n_layers=1, dropout=0.5):
        super(MelodyTransformer, self).__init__()
        self.embedding = nn.Embedding(n_tokens, d_model)
        self.positional_encoding = nn.Parameter(torch.zeros(1, 500, d_model))  # Adjust max length as needed
        self.transformer = nn.Transformer(d_model, n_heads, num_encoder_layers=n_layers, num_decoder_layers=n_layers, dropout=dropout, batch_first=True)
        self.fc_out = nn.Linear(d_model, n_tokens)

    def forward(self, src, tgt):
        src_emb = self.embedding(src) + self.positional_encoding[:, :src.size(1), :]
        tgt_emb = self.embedding(tgt) + self.positional_encoding[:, :tgt.size(1), :]
        
        output = self.transformer(src_emb, tgt_emb)
        
        output = self.fc_out(output)
        return output

# Initialize model
n_tokens = len(note_encoder.classes_) + len(duration_encoder.classes_)
model = MelodyTransformer(n_tokens)

# Move model to GPU if available
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)

MelodyTransformer(
  (embedding): Embedding(156, 32)
  (transformer): Transformer(
    (encoder): TransformerEncoder(
      (layers): ModuleList(
        (0): TransformerEncoderLayer(
          (self_attn): MultiheadAttention(
            (out_proj): NonDynamicallyQuantizableLinear(in_features=32, out_features=32, bias=True)
          )
          (linear1): Linear(in_features=32, out_features=2048, bias=True)
          (dropout): Dropout(p=0.5, inplace=False)
          (linear2): Linear(in_features=2048, out_features=32, bias=True)
          (norm1): LayerNorm((32,), eps=1e-05, elementwise_affine=True)
          (norm2): LayerNorm((32,), eps=1e-05, elementwise_affine=True)
          (dropout1): Dropout(p=0.5, inplace=False)
          (dropout2): Dropout(p=0.5, inplace=False)
        )
      )
      (norm): LayerNorm((32,), eps=1e-05, elementwise_affine=True)
    )
    (decoder): TransformerDecoder(
      (layers): ModuleList(
        (0): TransformerDecoderLayer(
          (self_attn):

In [9]:
import torch
from torch.utils.data import DataLoader, TensorDataset

# Hyperparameters
epochs = 1
batch_size = 32
learning_rate = 0.001

# Prepare dataset
X = torch.tensor(sequences[:, :-1], dtype=torch.long).to(device)  # Input sequences on GPU
y = torch.tensor(sequences[:, 1:], dtype=torch.long).to(device)   # Target sequences on GPU
dataset = TensorDataset(X, y)
dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)

# Optimizer and loss function
optimizer = optim.Adam(model.parameters(), lr=learning_rate)
criterion = nn.CrossEntropyLoss()

# Training loop
for epoch in range(epochs):
    model.train()  # Set the model to training mode
    total_loss = 0

    for batch_X, batch_y in dataloader:
        batch_X, batch_y = batch_X.to(device), batch_y.to(device)  # Move batch data to GPU
        optimizer.zero_grad()  # Zero the gradients

        # Forward pass
        output = model(batch_X, batch_y)  # Pass src and tgt to the model

        # Compute loss
        loss = criterion(output.view(-1, n_tokens), batch_y.view(-1))  # Reshape for loss calculation
        loss.backward()  # Backward pass
        optimizer.step()  # Update weights

        total_loss += loss.item()  # Accumulate loss

    # Print average loss for the epoch
    print(f'Epoch {epoch + 1}, Loss: {total_loss / len(dataloader):.4f}')

Epoch 1, Loss: 0.1098


In [13]:
import torch.nn.functional as F

def convert_to_note_duration(generated_sequence, note_encoder, duration_encoder):
    generated_notes = note_encoder.inverse_transform(generated_sequence[::2])
    generated_durations = duration_encoder.inverse_transform(generated_sequence[1::2])

    melody = []
    for note, duration in zip(generated_notes, generated_durations):
        melody.append(str(note))
        melody.append(str(duration))

    return melody

def apply_duration_penalty(logits, max_duration=16.0):
    # Create a penalty factor based on the duration values
    # Assuming the logits correspond to the indices of durations
    duration_penalty = torch.arange(len(logits)).float()  # Create a tensor of indices
    penalty = (duration_penalty / max_duration) ** 2  # Example penalty: quadratic scaling
    penalty = penalty.to(logits.device)  # Move penalty to the same device as logits

    # Apply the penalty to the logits
    logits -= penalty  # Subtract penalty from logits to reduce the probability of higher durations
    return logits

def generate_melody(model, seed, length=20, device='cpu', temperature=1.0, top_k=None, valid_tokens=None):
    model.eval()  # Set the model to evaluation mode
    generated = seed.tolist()  # Start with the seed sequence
    input_seq = torch.tensor(seed, dtype=torch.long).unsqueeze(0).to(device)  # Shape: (1, seq_length)

    # Set to track recent sequences
    recent_sequences = set()
    max_repeats = 3

    n_sequences = 0
    while n_sequences < length:
        with torch.no_grad():  # Disable gradient calculation
            output = model(input_seq, input_seq)
            logits = output[:, -1, :] / temperature
            
            # Apply duration penalty
            logits = apply_duration_penalty(logits)

            # Apply top-k sampling if specified
            if top_k is not None:
                top_k_values, top_k_indices = torch.topk(logits, top_k)
                top_k_probs = F.softmax(top_k_values, dim=-1)
                next_token = torch.multinomial(top_k_probs, num_samples=1)
                next_token = top_k_indices[0, next_token]  # Get the original token index
            else:
                probabilities = F.softmax(logits, dim=-1)
                next_token = torch.multinomial(probabilities, num_samples=1)

            # Convert next_token to a simple integer
            next_token = next_token.item()

            # Convert next_token to its corresponding string label
            if next_token < len(note_encoder.classes_):
                if next_token >= len(note_encoder.classes_):
                    continue
                next_token_str = note_encoder.classes_[next_token]
            else:
                if next_token >= next_token - len(note_encoder.classes_):
                    continue
                next_token_str = duration_encoder.classes_[next_token - len(note_encoder.classes_)]

            # Check if the generated token is in the list of valid tokens
            if next_token_str not in valid_tokens:
                print("invalid_token")
                continue  # Skip this token if it's not valid

            # Check for recent sequences
            generated_sequence = tuple(generated[-2:] + [next_token])  # Consider last 2 + next token
            if generated_sequence in recent_sequences:
                print("repeats")
                # If the sequence is already in the recent set, skip this token
                continue

            # Add the generated token to the list
            generated.append(next_token)

            # Update recent sequences
            recent_sequences.add(generated_sequence)
            if len(recent_sequences) > max_repeats:
                recent_sequences.pop()  # Keep the set size manageable

            # Update input_seq for the next iteration
            input_seq = torch.cat((input_seq, torch.tensor([[next_token]], dtype=torch.long).to(device)), dim=1)
            n_sequences += 1
            # print(f"Progress: {(n_sequences/length)*100}%")
    return generated

seed = sequences[0, :-1]  # Use the first sequence as a seed
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')  # Ensure the device is set

valid_tokens = np.concatenate((note_encoder.classes_, duration_encoder.classes_)).tolist()
generated_sequence = generate_melody(model, seed, length=200, device=device, temperature=1.5, top_k=10, valid_tokens=valid_tokens)

# Decoding the generated sequence
melody = convert_to_note_duration(generated_sequence, note_encoder, duration_encoder)
print("Generated Melody:", melody)

Generated Melody: ['E4', '13.0', 'A4', '10.5', 'A4', '13.125', 'E4', '10.5', 'C5', '0.875', 'A4', '0.875', 'A4', '0.875', 'A4', '0.875', 'A4', '0.875', 'A4', '0.875', 'A4', '0.875', 'A4', '0.875', 'A4', '0.875', 'A4', '0.875', 'A4', '0.875', 'A4', '0.875', 'A4', '0.875', 'A4', '0.875', 'A4', '0.875', 'A4', '0.875', 'A4', '0.875', 'A4', '0.875', 'A4', '0.875', 'A4', '0.875', 'A4', '0.875', 'A4', '0.875', 'A4', '0.875', 'A4', '0.875', 'A4', '0.875', 'A4', '0.875', 'A4', '0.875', 'A4', '0.875', 'A4', '0.875', 'A4', '0.875', 'A4', '0.875', 'A4', '0.875', 'A4', '0.875', 'A4', '0.875', 'A4', '0.875', 'A4', '0.875', 'A4', '0.875', 'A4', '0.875', 'A4', '0.875', 'A4', '0.875', 'A4', '0.875', 'A4', '0.875', 'B-5', '1.25', 'B-5', '1.25', 'B-5', '1.25', 'B-5', '1.25', 'B-5', '1.25', 'B-5', '1.25', 'B-5', '1.25', 'B-5', '1.25', 'B-5', '1.25', 'B-5', '1.25', 'B-5', '1.25', 'B-5', '1.25', 'B-5', '1.25', 'B-5', '1.25', 'B-5', '1.25', 'B-5', '1.25', 'B-5', '1.25', 'B-5', '1.25', 'B-5', '1.25', 'B-5', '

In [None]:
from music21 import stream, note, meter, tempo, midi

def melody_to_midi(melody, output_file='generated_melody.mid', tempo_bpm=120, time_signature='4/4'):
    # Create a music21 stream
    melody_stream = stream.Stream()

    # Add a tempo mark
    melody_stream.append(tempo.MetronomeMark(number=tempo_bpm))

    # Add a time signature
    time_signature_parts = time_signature.split('/')
    time_signature_obj = meter.TimeSignature(time_signature)
    melody_stream.append(time_signature_obj)

    # Iterate over the melody and create notes and durations
    for i in range(0, len(melody), 2):
        note_name = melody[i]  # Note
        duration_value = float(melody[i + 1])  # Duration

        # Create a music21 note
        new_note = note.Note(note_name)
        new_note.quarterLength = duration_value  # Set the duration

        # Append the note to the stream
        melody_stream.append(new_note)

    # Write the stream to a MIDI file
    melody_stream.write('midi', fp=output_file)
    print(f'MIDI file saved as {output_file}')

melody_to_midi(melody, output_file='generated_melody.mid', tempo_bpm=360, time_signature='4/4')