In [1]:
import pretty_midi

In [2]:
import numpy as np
import pandas as pd

In [None]:
cello_c_chord = pretty_midi.PrettyMIDI()
# Create an Instrument instance for a cello instrument
cello_program = pretty_midi.instrument_name_to_program('Cello')
cello = pretty_midi.Instrument(program=cello_program)
# Iterate over note names, which will be converted to note number later
for note_name in ['C5', 'E5', 'G5']:
    # Retrieve the MIDI note number for this note name
    note_number = pretty_midi.note_name_to_number(note_name)
    # Create a Note instance, starting at 0s and ending at .5s
    note = pretty_midi.Note(
        velocity=100, pitch=note_number, start=0, end=.5)
    # Add it to our cello instrument
    cello.notes.append(note)
# Add the cello instrument to the PrettyMIDI object
cello_c_chord.instruments.append(cello)
# Write out the MIDI data
cello_c_chord.write('cello-C-chord.mid')

In [None]:
midi_data = pretty_midi.PrettyMIDI('example.mid')
# Print an empirical estimate of its global tempo
print midi_data.estimate_tempo()
# Compute the relative amount of each semitone across the entire song,
# a proxy for key
total_velocity = sum(sum(midi_data.get_chroma()))
print [sum(semitone)/total_velocity for semitone in midi_data.get_chroma()]
# Shift all notes up by 5 semitones
for instrument in midi_data.instruments:
    # Don't want to shift drum notes
    if not instrument.is_drum:
        for note in instrument.notes:
            note.pitch += 5
# Synthesize the resulting MIDI data using sine waves
audio_data = midi_data.synthesize()

In [3]:
midi_data = pretty_midi.PrettyMIDI('ashover_simple_chords_5.mid')

In [11]:
print(midi_data.estimate_tempo())

192.0


In [9]:
print(sum(midi_data.get_chroma()))

[105. 105. 105. ... 320. 320. 320.]


In [10]:
midi_data.estimate_tempo()

192.0

In [17]:
midi_data.synthesize()

array([0.        , 0.02263388, 0.04517781, ..., 0.        , 0.        ,
       0.        ])

In [16]:
for i in midi_data.instruments:
    print(i.notes)

[Note(start=0.000000, end=0.248958, pitch=69, velocity=105), Note(start=0.250000, end=0.498958, pitch=74, velocity=105), Note(start=0.500000, end=0.748958, pitch=73, velocity=80), Note(start=0.750000, end=0.998958, pitch=74, velocity=80), Note(start=1.000000, end=1.248958, pitch=76, velocity=80), Note(start=1.250000, end=1.748958, pitch=78, velocity=95), Note(start=1.750000, end=2.248958, pitch=69, velocity=80), Note(start=2.250000, end=2.748958, pitch=71, velocity=105), Note(start=2.750000, end=3.248958, pitch=76, velocity=80), Note(start=3.250000, end=3.748958, pitch=73, velocity=95), Note(start=3.750000, end=4.248958, pitch=69, velocity=80), Note(start=4.250000, end=4.498958, pitch=74, velocity=105), Note(start=4.500000, end=4.748958, pitch=73, velocity=80), Note(start=4.750000, end=4.998958, pitch=74, velocity=80), Note(start=5.000000, end=5.248958, pitch=76, velocity=80), Note(start=5.250000, end=5.498958, pitch=78, velocity=95), Note(start=5.500000, end=5.748958, pitch=76, veloci

In [3]:
import pretty_midi
import numpy as np
import torch
import torch.nn as nn
from torch.autograd import Variable
import torch.utils.data as data
import os
import random

import sys

# Download midi library from here: http://deeplearning.net/tutorial/rnnrbm.html
# and do sysn.path.append with it, like below.
# Direct link: http://www.iro.umontreal.ca/~lisa/deep/midi.zip


In [4]:
sys.path.append("C:/Users/seanz/Documents/WORKFILE/CUHKSZ/Deep Learning/SILDES\CUHK_SZ_DL/Final_project/music generation/1.rnn-sequence-generation/midi/")

from utils import midiread, midiwrite

In [5]:
def midi_filename_to_piano_roll(midi_filename):
    
    midi_data = midiread(midi_filename, dt=0.3)
    
    piano_roll = midi_data.piano_roll.transpose()
    
    # Binarize the pressed notes
    piano_roll[piano_roll > 0] = 1
    
    return piano_roll


def pad_piano_roll(piano_roll, max_length=132333, pad_value=0):
    
    # We hardcode 128 -- because we will always use only
    # 128 pitches
    
    original_piano_roll_length = piano_roll.shape[1]
    
    padded_piano_roll = np.zeros((88, max_length))
    padded_piano_roll[:] = pad_value
    
    padded_piano_roll[:, :original_piano_roll_length] = piano_roll

    return padded_piano_roll


class NotesGenerationDataset(data.Dataset):
    
    def __init__(self, midi_folder_path, longest_sequence_length=1491):
        
        self.midi_folder_path = midi_folder_path
        
        midi_filenames = os.listdir(midi_folder_path)
        
        self.longest_sequence_length = longest_sequence_length
        
        midi_full_filenames = map(lambda filename: os.path.join(midi_folder_path, filename),
                                  midi_filenames)
        
        self.midi_full_filenames = midi_full_filenames
        
        if longest_sequence_length is None:
            
            self.update_the_max_length()
    
    
    def update_the_max_length(self):
        """Recomputes the longest sequence constant of the dataset.

        Reads all the midi files from the midi folder and finds the max
        length.
        """
        
        sequences_lengths = map(lambda filename: midi_filename_to_piano_roll(filename).shape[1],
                                self.midi_full_filenames)
        
        max_length = max(sequences_lengths)
        
        self.longest_sequence_length = max_length
                
    
    def __len__(self):
        
        return len(self.midi_full_filenames)
    
    def __getitem__(self, index):
        
        midi_full_filename = self.midi_full_filenames[index]
        
        piano_roll = midi_filename_to_piano_roll(midi_full_filename)
        
        # -1 because we will shift it
        sequence_length = piano_roll.shape[1] - 1
        
        # Shifted by one time step
        input_sequence = piano_roll[:, :-1]
        ground_truth_sequence = piano_roll[:, 1:]
                
        # pad sequence so that all of them have the same lenght
        # Otherwise the batching won't work
        input_sequence_padded = pad_piano_roll(input_sequence, max_length=self.longest_sequence_length)
        
        ground_truth_sequence_padded = pad_piano_roll(ground_truth_sequence,
                                                      max_length=self.longest_sequence_length,
                                                      pad_value=-100)
                
        input_sequence_padded = input_sequence_padded.transpose()
        ground_truth_sequence_padded = ground_truth_sequence_padded.transpose()
        
        return (torch.FloatTensor(input_sequence_padded),
                torch.LongTensor(ground_truth_sequence_padded),
                torch.LongTensor([sequence_length]) )

    
def post_process_sequence_batch(batch_tuple):
    
    input_sequences, output_sequences, lengths = batch_tuple
    
    splitted_input_sequence_batch = input_sequences.split(split_size=1)
    splitted_output_sequence_batch = output_sequences.split(split_size=1)
    splitted_lengths_batch = lengths.split(split_size=1)

    training_data_tuples = zip(splitted_input_sequence_batch,
                               splitted_output_sequence_batch,
                               splitted_lengths_batch)

    training_data_tuples_sorted = sorted(training_data_tuples,
                                         key=lambda p: int(p[2]),
                                         reverse=True)

    splitted_input_sequence_batch, splitted_output_sequence_batch, splitted_lengths_batch = zip(*training_data_tuples_sorted)

    input_sequence_batch_sorted = torch.cat(splitted_input_sequence_batch)
    output_sequence_batch_sorted = torch.cat(splitted_output_sequence_batch)
    lengths_batch_sorted = torch.cat(splitted_lengths_batch)
    
    # Here we trim overall data matrix using the size of the longest sequence
    input_sequence_batch_sorted = input_sequence_batch_sorted[:, :lengths_batch_sorted[0, 0], :]
    output_sequence_batch_sorted = output_sequence_batch_sorted[:, :lengths_batch_sorted[0, 0], :]
    
    input_sequence_batch_transposed = input_sequence_batch_sorted.transpose(0, 1)
    
    # pytorch's api for rnns wants lenghts to be list of ints
    lengths_batch_sorted_list = list(lengths_batch_sorted)
    lengths_batch_sorted_list = map(lambda x: int(x), lengths_batch_sorted_list)
    
    return input_sequence_batch_transposed, output_sequence_batch_sorted, lengths_batch_sorted_list

In [6]:
trainset = NotesGenerationDataset('C:/Users/seanz/Documents/WORKFILE/CUHKSZ/Deep Learning/SILDES/CUHK_SZ_DL/Final_project/music generation/1.rnn-sequence-generation/Nottingham/Nottingham/train/')

trainset_loader = torch.utils.data.DataLoader(trainset, batch_size=120,
                                              shuffle=True, num_workers=4, drop_last=True)

In [7]:
valset = NotesGenerationDataset('C:/Users/seanz/Documents/WORKFILE/CUHKSZ/Deep Learning/SILDES/CUHK_SZ_DL/Final_project/music generation/1.rnn-sequence-generation/Nottingham/Nottingham/valid/', longest_sequence_length=None)

valset_loader = torch.utils.data.DataLoader(valset, batch_size=30, shuffle=False, num_workers=4, drop_last=False)

TypeError: It is not a valid midi file!

In [8]:
class RNN(nn.Module):
    
    def __init__(self, input_size, hidden_size, num_classes, n_layers=2):
        
        super(RNN, self).__init__()
        
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.num_classes = num_classes
        self.n_layers = n_layers
        
        self.notes_encoder = nn.Linear(in_features=input_size, out_features=hidden_size)
        
        self.lstm = nn.LSTM(hidden_size, hidden_size, n_layers)
        
        self.logits_fc = nn.Linear(hidden_size, num_classes)
    
    
    def forward(self, input_sequences, input_sequences_lengths, hidden=None):
        
        batch_size = input_sequences.shape[1]

        notes_encoded = self.notes_encoder(input_sequences)
        
        # Here we run rnns only on non-padded regions of the batch
        packed = torch.nn.utils.rnn.pack_padded_sequence(notes_encoded, input_sequences_lengths)
        outputs, hidden = self.lstm(packed, hidden)
        outputs, output_lengths = torch.nn.utils.rnn.pad_packed_sequence(outputs) # unpack (back to padded)
        
        logits = self.logits_fc(outputs)
                
        logits = logits.transpose(0, 1).contiguous()
        
        neg_logits = (1 - logits)
        
        # Since the BCE loss doesn't support masking, we use the crossentropy
        binary_logits = torch.stack((logits, neg_logits), dim=3).contiguous()
        
        logits_flatten = binary_logits.view(-1, 2)
        
        return logits_flatten, hidden

In [10]:
rnn = RNN(input_size=88, hidden_size=512, num_classes=88)
# rnn = rnn.cuda()
criterion = nn.CrossEntropyLoss()
# criterion = nn.CrossEntropyLoss().cuda()

criterion_val = nn.CrossEntropyLoss(size_average=False).cuda()

learning_rate = 0.001
optimizer = torch.optim.Adam(rnn.parameters(), lr=learning_rate)




In [None]:
%matplotlib notebook

import sys, os
sys.path.append("/home/daniil/repos/pytorch-segmentation-detection/")
sys.path.insert(0, '/home/daniil/repos/pytorch-segmentation-detection/vision/')

from matplotlib import pyplot as plt

# importing a list that can be binded with a figure and update
# the figure when being appended
from pytorch_segmentation_detection.utils.visualization import VizList

# Creating figure, axes and binding to lists 
f, (loss_axis, validation_axis) = plt.subplots(2, 1)

loss_axis.plot([], [])
validation_axis.plot([], [])

loss_list = VizList()
val_list = VizList()

loss_list.bind_to_axis(loss_axis)
val_list.bind_to_axis(validation_axis)

loss_axis.set_title('Training Loss')
validation_axis.set_title('Validation Loss')

plt.tight_layout()