In [None]:
%tensorflow_version 2.x
import tensorflow as tf
print("Tensorflow version " + tf.__version__)

try:
  tpu = tf.distribute.cluster_resolver.TPUClusterResolver()  # TPU detection
  print('Running on TPU ', tpu.cluster_spec().as_dict()['worker'])
except ValueError:
  raise BaseException('ERROR: Not connected to a TPU runtime; please see the previous cell in this notebook for instructions!')

tf.config.experimental_connect_to_cluster(tpu)
tf.tpu.experimental.initialize_tpu_system(tpu)
tpu_strategy = tf.distribute.experimental.TPUStrategy(tpu)

In [1]:

!pip3 install torch matplotlib tqdm livelossplot gdown "pypianoroll>=1.0.2"

#!pip3 install py_midi
#import midi

Collecting livelossplot
  Downloading livelossplot-0.5.5-py3-none-any.whl (22 kB)
Collecting pypianoroll>=1.0.2
  Downloading pypianoroll-1.0.4-py3-none-any.whl (26 kB)
Collecting pretty-midi>=0.2.8
  Downloading pretty_midi-0.2.9.tar.gz (5.6 MB)
[K     |████████████████████████████████| 5.6 MB 7.2 MB/s 
Collecting mido>=1.1.16
  Downloading mido-1.2.10-py2.py3-none-any.whl (51 kB)
[K     |████████████████████████████████| 51 kB 5.6 MB/s 
Collecting ipython==7.*
  Downloading ipython-7.32.0-py3-none-any.whl (793 kB)
[K     |████████████████████████████████| 793 kB 53.0 MB/s 
Collecting prompt-toolkit!=3.0.0,!=3.0.1,<3.1.0,>=2.0.0
  Downloading prompt_toolkit-3.0.29-py3-none-any.whl (381 kB)
[K     |████████████████████████████████| 381 kB 55.9 MB/s 
Building wheels for collected packages: pretty-midi
  Building wheel for pretty-midi (setup.py) ... [?25l[?25hdone
  Created wheel for pretty-midi: filename=pretty_midi-0.2.9-py3-none-any.whl size=5591955 sha256=30bf5f51c18a34e6ffa366

In [2]:
from IPython.display import clear_output
from ipywidgets import interact, IntSlider

import os, os.path, shutil
import zipfile
import random
import json
import pickle
from pathlib import Path

import numpy as np
import matplotlib.pyplot as plt
import torch
import pypianoroll
import pretty_midi
from pypianoroll import Multitrack, Track
from tqdm import tqdm
from livelossplot import PlotLosses
from livelossplot.outputs import MatplotlibPlot



import itertools
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
import moviepy.editor as mpy
import music21
from IPython import display
import seaborn as sns

from google.colab import drive

drive.mount('/content/drive', force_remount=True)

Imageio: 'ffmpeg-linux64-v3.3.1' was not found on your computer; downloading it now.
Try 1. Download from https://github.com/imageio/imageio-binaries/raw/master/ffmpeg/ffmpeg-linux64-v3.3.1 (43.8 MB)
Downloading: 8192/45929032 bytes (0.0%)2449408/45929032 bytes (5.3%)4849664/45929032 bytes (10.6%)7561216/45929032 bytes (16.5%)10403840/45929032 bytes (22.7%)12558336/45929032 bytes (27.3%)14819328/45929032 bytes (32.3%)17178624/45929032 bytes (37.4%)19259392/45929032 bytes (41.9%)21078016/45929032 bytes (45.9%)22986752/45929032 bytes (50.0%)25673728/45929032 bytes (55.9%)28557312/45929032 bytes (62.2%)

In [24]:
# Data
n_tracks = 4  # number of tracks
n_pitches = 72  # number of pitches
lowest_pitch = 24  # MIDI note number of the lowest pitch
n_samples_per_song = 8  # number of samples to extract from each song in the datset
n_measures = 4  # number of measures per sample
beat_resolution = 4  # temporal resolution of a beat (in timestep)
programs = [0, 0, 0, 0]  # program number for each track
is_drums = [False, False, False, False]  # drum indicator for each track
track_names = ['Soprano', 'Alto', 'Tenor', 'Bass']  # name of each track
tempo = 100

# Training
batch_size = 16
latent_dim = 128
n_steps = 1000

# Sampling
sample_interval = 10  # interval to run the sampler (in step)
n_samples = 4

#Directories
!mkdir -p midiDataset

In [None]:
cwd = os.getcwd()  # Get the current working directory (cwd)
files = os.listdir(cwd)  # Get all the files in that directory
print("Files in %r: %s" % (cwd, files))

Files in '/content': ['.config', 'drive', 'midiDataset', 'sample_data']


In [9]:
"""Midi dataset."""

from typing import Tuple
from torch import Tensor

import torch
from torch import nn
from torch.utils.data import Dataset

import numpy as np
from music21 import midi
from music21 import converter
from music21 import note, stream, duration, tempo


class MidiDataset(Dataset):
    """MidiDataset.
    Parameters
    ----------
    path: str
        Path to dataset.
    split: str, optional (default="train")
        Split of dataset.
    n_bars: int, optional (default=2)
        Number of bars.
    n_steps_per_bar: int, optional (default=16)
        Number of steps per bar.
    """

    def __init__(
        self,
        path: str,
        split: str = "train",
        n_bars: int = 2,
        n_steps_per_bar: int = 16,
    ) -> None:
        """Initialize."""
        self.n_bars = n_bars
        self.n_steps_per_bar = n_steps_per_bar
        dataset = np.load(path, allow_pickle=True, encoding="bytes")[split]
        self.data_binary, self.data_ints, self.data = self.__preprocess__(dataset)

    def __len__(self) -> int:
        """Return the number of samples in dataset."""
        return len(self.data_binary)

    def __getitem__(self, index: int) -> Tensor:
        """Return one samples from dataset.
        Parameters
        ----------
        index: int
            Index of sample.
        Returns
        -------
        Tensor:
            Sample.
        """
        return torch.from_numpy(self.data_binary[index]).float()

    def __preprocess__(self, data: np.ndarray) -> Tuple[np.ndarray]:
        """Preprocess data.
        Parameters
        ----------
        data: np.ndarray
            Data.
        Returns
        -------
        Tuple[np.ndarray]:
            Data binary, data ints, preprocessed data.
        """
        data_ints = []
        for x in data:
            skip = True
            skip_rows = 0
            while skip:
                if not np.any(np.isnan(x[skip_rows: skip_rows + 4])):
                    skip = False
                else:
                    skip_rows += 4
            #print(x.shape)
            if self.n_bars * self.n_steps_per_bar < x.shape[0]:
                data_ints.append(x[skip_rows: self.n_bars * self.n_steps_per_bar + skip_rows, :])
        
        data_ints = np.array(data_ints)

        #print(data_ints.shape)
        self.n_songs = data_ints.shape[0]
        self.n_tracks = data_ints.shape[2]
        data_ints = data_ints.reshape([self.n_songs, self.n_bars * self.n_steps_per_bar, self.n_tracks])
        #print(data_ints.shape)
        max_note = 83
        mask = np.isnan(data_ints)
        data_ints[mask] = max_note + 1
        max_note = max_note + 1
        data_ints = data_ints.astype(int)
        print(data_ints.shape)
        
        num_classes = max_note + 1
        data_binary = np.eye(num_classes)[data_ints]
        data_binary[data_binary == 0] = -1
        data_binary = np.delete(data_binary, max_note, -1)
        data_binary = data_binary.transpose([0, 1, 3, 2])
        print(data_binary.shape)
        return data_binary, data_ints, data


def binarise_output(output: np.ndarray) -> np.ndarray:
    """Binarize output.
    Parameters
    ----------
    output: np.ndarray
        Output array.
    """
    max_pitches = np.argmax(output, axis=-1)
    return max_pitches


def postprocess(
    output: np.ndarray,
    n_tracks: int = 4,
    n_bars: int = 2,
    n_steps_per_bar: int = 16,
) -> stream.Score:
    """Postprocess output.
    Parameters
    ----------
    output: np.ndarray
        Output array.
    n_tracks: int, (default=4)
        Number of tracks.
    n_bars: int, (default=2)
        Number of bars.
    n_steps_per_bar: int, (default=16)
        Number of steps per bar.
    """
    parts = stream.Score()
    parts.append(tempo.MetronomeMark(number=66))
    max_pitches = binarise_output(output)
    midi_note_score = np.vstack([
        max_pitches[i].reshape([n_bars * n_steps_per_bar, n_tracks]) for i in range(len(output))
    ])
    for i in range(n_tracks):
        last_x = int(midi_note_score[:, i][0])
        s = stream.Part()
        dur = 0
        for idx, x in enumerate(midi_note_score[:, i]):
            x = int(x)
            if (x != last_x or idx % 4 == 0) and idx > 0:
                n = note.Note(last_x)
                n.duration = duration.Duration(dur)
                s.append(n)
                dur = 0
            last_x = x
            dur = dur + 0.25
        n = note.Note(last_x)
        n.duration = duration.Duration(dur)
        s.append(n)
        parts.append(s)
    #print(parts)
    return parts

In [28]:
def save_pianoroll_as_midi(pianoroll,
                  programs=programs,
                  track_names=track_names,
                  is_drums=is_drums,
                  tempo=tempo,           # in bpm
                  beat_resolution=beat_resolution,  # number of time steps
                  destination_path="/content/midiDataset/"
                  ):
    
    pianoroll = pianoroll > 0

    print(pianoroll.shape)

    # Reshape batched pianoroll array to a single pianoroll array
    pianoroll_ = pianoroll.reshape((-1, pianoroll.shape[2], pianoroll.shape[3]))

    print(pianoroll_.shape)

    measure_resolution = 4 * beat_resolution
    tempo_array = np.full((4 * 4 * measure_resolution, 1), tempo)

    # Create the tracks

    #for item in pianoroll_:
    tracks = []
    for idx in range(pianoroll_.shape[2]):
        tracks.append(pypianoroll.BinaryTrack(
            track_names[idx], programs[idx], is_drums[idx], pianoroll_[..., idx])) 

      
    multitrack = pypianoroll.Multitrack(
        tracks=tracks, tempo=tempo_array, resolution=beat_resolution)
      
        #print(multitrack)
        
    multitrack.write('./midiDataset/test.mid')
    print('Midi saved to ', destination_path)
    return destination_path    
    

In [31]:
from torch.utils import data
from numpy.core.fromnumeric import shape
d = np.load("/content/drive/MyDrive/Colab Notebooks/Honours code/Jsb16thSeparated.npz", allow_pickle=True, encoding = 'latin1')

path = ("/content/drive/MyDrive/Colab Notebooks/Honours code/Jsb16thSeparated.npz")

dataset = MidiDataset(path=path).data_binary

#dataset_2 = postprocess(output)
#print(dataset)
print(d.files)
train = d['train']
test = d['test']
valid = d['valid']

#print(dataset.data)
#MidiDataset.__getitem__(dataset, 2)
save_pianoroll_as_midi(dataset)
#print(train[7].shape)
#print(track)

#show_pianoroll(track)



(229, 32, 4)
(229, 32, 84, 4)
['test', 'train', 'valid']
(229, 32, 84, 4)
(7328, 84, 4)
Midi saved to  /content/midiDataset/


'/content/midiDataset/'

In [29]:
training_data = np.load('/content/drive/MyDrive/Colab Notebooks/Honours code/train-1.npy')
#print(training_data[0].shape)

save_pianoroll_as_midi(training_data)

(229, 32, 128, 4)
(7328, 128, 4)
Midi saved to  /content/midiDataset/


'/content/midiDataset/'

In [None]:
#display pianoroll

multitrack.plot()