# Make a Lots of Chorales from "Schmucke dich, o liebe Seele" by JS Bach BWV 180 
<p>The goals of this round:
    
- Create many 16 voice chorales and save them to files
    
 </p>
<p>Please note that the Csound instance in this notebook require sample files of a Bosendorfer piano which are licensed and cannot be included in the repository. The calls to Csound won't work without significant installation and configuration work.</P>

In [None]:
import numpy as np
import torch
import torch.nn as nn
import torch.utils.data
import matplotlib.pyplot as plt
import pandas as pd
import mido
import time
from midi2audio import FluidSynth
from IPython.display import Audio, display
import os
import muspy
import piano 
import subprocess
from numpy.random import default_rng
rng = default_rng(42) # random seed in parens.

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
softmax = torch.nn.functional.softmax

base_dir = ''
CSD_FILE = 'goldberg_aria1.csd'
NOTES_FILE = "goldberg_aria1.mac.csv"
LOGNAME = 'goldberg5.log'

In [None]:
print(rng.standard_normal())

## Load the csd file in prep for sending notes to Csound
This function creates a csound instance by loading a csd file, 
returning the ctcsound.CsoundPerformanceThread object and the ctcsound.Csound object
the caller can then send score events to the pt (CsoundPerformanceThread) object, and 
when finished, close the ctcsout.Csound object.
Caller needs to assemble a python list that looks like this:

<code>
#                  Inst Start  Dur   Vel   Ton  Oct  Voic Ste Envl Glis Upsm Renv 2-gl 3r-gl  Mult chan
# pfields contains [1, 0.3125, 1.0, 68.0, 90.0, 5.0, 1.0, 7.0, 0.0, 0.0, 0.0, 1.0, 0.0, 4.0, 10.0, 2.0]

</code>

In [None]:
def load_csound_piano():
    piano.start_logger(fname=LOGNAME)
    piano.logging.info(f'Logging messages to: {LOGNAME}')
    csd_content, lines = piano.load_csd(CSD_FILE)
    piano.logging.info(f'Loaded the csd file {CSD_FILE}. There are {lines} lines read containg {len(csd_content)} bytes')
    cs, pt = piano.load_csound(csd_content)
    return (pt,cs)

In [None]:
# set global variables

I = 4 # number of voices
T = 32 # length of samples (32 = two 4/4 measures in 1/16th note increments)
P = (86-30) +1 # number of different pitches
print(f'I voices: {I}, T sample length: {T}, P number of distinct pitches in the input chorales: {P}')

In [None]:
# function for converting arrays of shape (T, 4) into midi files
# the input array has entries that are np.nan (representing a rest)
# or an integer between 0 and 127 inclusive
#
# Altered to accept pieces of arbitrary number of voices. 
# Mine are all 264 notes by 16 voices per chorale
# what comes into this is (16,264)
def piano_roll_to_midi(piece):
    """
    piece is a an array of shape (T, 4) for some T.
    The (i,j)th entry of the array is the midi pitch of the jth voice at time i. It's an integer in range(128).
    outputs a mido object mid that you can convert to a midi file by called its .save() method
    """
    # piece = np.concatenate([piece, [[np.nan, np.nan, np.nan, np.nan]]], axis=0)

    bpm = 50
    microseconds_per_beat = 60 * 1000000 / bpm

    mid = mido.MidiFile()
    
    # modified to make the number of voices dependent on what is passed into the function
    v = 0
    tracks = {}
    past_pitches = {}
    delta_time = {}
    for voice in play_chorale:
        tracks['piano' + str(v)] = mido.MidiTrack()
        past_pitches['piano' + str(v)] = np.nan
        delta_time['piano' + str(v)] = 0
        v += 1
    
    # create a track containing tempo data
    metatrack = mido.MidiTrack()
    metatrack.append(mido.MetaMessage('set_tempo',
                                      tempo=int(microseconds_per_beat), time=0))
    mid.tracks.append(metatrack)

    # create the N voice tracks (was 4)
    for voice in tracks:
        mid.tracks.append(tracks[voice])
        tracks[voice].append(mido.Message(
            'program_change', program=0, time=0)) # choir aahs=52, piano = 0

    # add notes to the N voice tracks
    # this function expects an array in this form: chorale type: <class 'numpy.ndarray'>
    # piece.shape: (33, 4) 
    # mine are (16,264)
    
    pitches = {}
    for i in range(piece[1].shape[0]): # 0 - 263 in my case
        v = 0
        for voice in piece: # 0-15 in my case
            pitches['piano'+str(v)] = piece[v,i] # i is from 0 to 263, v is 0 to 15
            v += 1
        for voice in tracks:
            if np.isnan(past_pitches[voice]):
                past_pitches[voice] = None
            if np.isnan(pitches[voice]):
                pitches[voice] = None
            if pitches[voice] != past_pitches[voice]:
                if past_pitches[voice]:
                    tracks[voice].append(mido.Message('note_off', note=int(past_pitches[voice]),
                                                      velocity=64, time=delta_time[voice]))
                    delta_time[voice] = 0
                if pitches[voice]:
                    tracks[voice].append(mido.Message('note_on', note=int(pitches[voice]),
                                                      velocity=64, time=delta_time[voice]))
                    delta_time[voice] = 0
            past_pitches[voice] = pitches[voice]
            # 480 ticks per beat and each line of the array is a 16th note
            delta_time[voice] += 120

    return mid

In [None]:
class Chorale:
    """
    A class to store and manipulate an array self.arr that stores a chorale.
    """
    def __init__(self, arr, subtract_30=False):
        # arr is an array of shape (4, 32) with values in range(0, 57)
        self.arr = arr.copy()
        if subtract_30:
            self.arr -= 30
            
        # the one_hot representation of the array
        reshaped = self.arr.reshape(-1)
        self.one_hot = np.zeros((I*T, P))
        r = np.arange(I*T)
        self.one_hot[r, reshaped] = 1
        self.one_hot = self.one_hot.reshape(I, T, P)
        

    def to_image(self):
        # visualize the four tracks as a images
        soprano = self.one_hot[0].transpose()
        alto = self.one_hot[1].transpose()
        tenor = self.one_hot[2].transpose()
        bass = self.one_hot[3].transpose()
        
        fig, axs = plt.subplots(1, 4)
        axs[0].imshow(np.flip(soprano, axis=0), cmap='hot', interpolation='nearest')
        axs[0].set_title('soprano')
        axs[1].imshow(np.flip(alto, axis=0), cmap='hot', interpolation='nearest')
        axs[1].set_title('alto')
        axs[2].imshow(np.flip(tenor, axis=0), cmap='hot', interpolation='nearest')
        axs[2].set_title('tenor')
        axs[3].imshow(np.flip(bass, axis=0), cmap='hot', interpolation='nearest')
        axs[3].set_title('bass')
        fig.set_figheight(5)
        fig.set_figwidth(15)
        return fig, axs
    
    def play(self, filename='midi_track.mid'):
        # display an in-notebook widget for playing audio
        # saves the midi file as a file named name in base_dir/midi_files
        
        midi_arr = self.arr.transpose().copy()
        midi_arr += 30
        midi = piano_roll_to_midi(midi_arr)
        midi.save(base_dir + 'midi_files/' + filename)
        play_midi('midi_files/' + filename,10)
        
    def elaborate_on_voices(self, voices, model):
        # voice is a set consisting of 0, 1, 2, or 3
        # create a mask consisting of the given voices
        # generate a chorale with the same voices as in voices
        mask = np.zeros((I, T))
        y = np.random.randint(P, size=(I, T))
        for i in voices:
            mask[i] = 1
            y[i] = self.arr[i].copy()
        return harmonize(y, mask, model)
    
    # I think we could improve this scoring method. It's pretty lame.
    def score(self):
        consonance_dict = {0: 1, 1: 0, 2: 0, 3: 1, 4: 1, 5: 1, 6: 0, 
                           7: 1, 8: 1, 9: 1, 10: 0, 11: 0}
        consonance_score = 0
        for k in range(32):
            for i in range(4):
                for j in range(i):
                    consonance_score += consonance_dict[((self.arr[i, k] - self.arr[j, k]) % 12)]
        
        note_score = 0
        for i in range(4):
            for j in range(1, 32):
                if self.arr[i, j] != self.arr[i, j-1]:
                    note_score += 1
        return consonance_score, note_score
        
# harmonize a melody
def harmonize(y, C, model):
    """
    Generate an artificial Bach Chorale starting with y, and keeping the pitches
    where C==1.
    Here C is an array of shape (4, 32) whose entries are 0 and 1.
    The pitches outside of C are repeatedly resampled to generate new values.
    For example, to harmonize the soprano line, let y be random except y[0] 
    contains the soprano line, let C[1:] be 0 and C[0] be 1.
    """
    model.eval()
    with torch.no_grad():
        x = y
        C2 = C.copy()
        num_steps = int(2*I*T)
        alpha_max = .999
        alpha_min = .001
        eta = 3/4
        for i in range(num_steps):
            p = np.maximum(alpha_min, alpha_max - i*(alpha_max-alpha_min)/(eta*num_steps))
            sampled_binaries = np.random.choice(2, size = C.shape, p=[p, 1-p])
            C2 += sampled_binaries
            C2[C==1] = 1
            x_cache = x
            x = model.pred(x, C2)
            x[C2==1] = x_cache[C2==1]
            C2 = C.copy()
        return x
    
def generate_random_chorale(model): # 
    """
    Calls harmonize with random initialization and C=0, masking none 
    and so generates a new sample that sounds like Bach.
    """
    y = np.random.randint(P, size=(I, T)).astype(int)
    C = np.zeros((I, T)).astype(int)
    x = harmonize(y, C, model)
    return (x)

In [None]:
hidden_size = 32

class Unit(nn.Module):
    """
    Two convolution layers each followed by batchnorm and relu, 
    plus a residual connection.
    """
    def __init__(self):
        super(Unit, self).__init__()
        self.conv1 = nn.Conv2d(hidden_size, hidden_size, 3, padding=1)
        self.batchnorm1 = nn.BatchNorm2d(hidden_size)
        self.relu1 = nn.ReLU()
        self.conv2 = nn.Conv2d(hidden_size, hidden_size, 3, padding=1)
        self.batchnorm2 = nn.BatchNorm2d(hidden_size)
        self.relu2 = nn.ReLU()
        
        
    def forward(self, x):
        y = x
        y = self.conv1(y)
        y = self.batchnorm1(y)
        y = self.relu1(y)
        y = self.conv2(y)
        y = self.batchnorm2(y)
        y = y + x
        y = self.relu2(y)
        return y
    
    

class Net(nn.Module):
    """
    A CNN that where you input a starter chorale and a mask and it outputs a prediction for the values
    in the starter chorale away from the mask that are most like the training data.
    """
    def __init__(self):
        super(Net, self).__init__()
        self.initial_conv = nn.Conv2d(2*I, hidden_size, 3, padding=1)
        self.initial_batchnorm = nn.BatchNorm2d(hidden_size)
        self.initial_relu = nn.ReLU()
        self.unit1 = Unit()
        self.unit2 = Unit()
        self.unit3 = Unit()
        self.unit4 = Unit()
        self.unit5 = Unit()
        self.unit6 = Unit()
        self.unit7 = Unit()
        self.unit8 = Unit()
        self.unit9 = Unit()
        self.unit10 = Unit()
        self.unit11 = Unit()
        self.unit12 = Unit()
        self.unit13 = Unit()
        self.unit14 = Unit()
        self.unit15 = Unit()
        self.unit16 = Unit()
        self.affine = nn.Linear(hidden_size*T*P, I*T*P)
        
    def forward(self, x, C):
        # x is a tensor of shape (N, I, T, P)
        # C is a tensor of 0s and 1s of shape (N, I, T)
        # returns a tensor of shape (N, I, T, P)
        
        # get the number of batches
        N = x.shape[0]
        
        # tile the array C out of a tensor of shape (N, I, T, P)
        tiled_C = C.view(N, I, T, 1)
        tiled_C = tiled_C.repeat(1, 1, 1, P)
        
        # mask x and combine it with the mask to produce a tensor of shape (N, 2*I, T, P)
        y = torch.cat((tiled_C*x, tiled_C), dim=1)
        
        # apply the convolution and relu layers
        y = self.initial_conv(y)
        y = self.initial_batchnorm(y)
        y = self.initial_relu(y)
        y = self.unit1(y)
        y = self.unit2(y)
        y = self.unit3(y)
        y = self.unit4(y)
        y = self.unit5(y)
        y = self.unit6(y)
        y = self.unit7(y)
        y = self.unit8(y)
        y = self.unit9(y)
        y = self.unit10(y)
        y = self.unit11(y)
        y = self.unit12(y)
        y = self.unit13(y)
        y = self.unit14(y)
        y = self.unit15(y)
        y = self.unit16(y)
            
        # reshape before applying the fully connected layer
        y = y.view(N, hidden_size*T*P)
        y = self.affine(y)
        
        # reshape to (N, I, T, P)
        y = y.view(N, I, T, P)
                
        return y
    
    def pred(self, y, C):
        # y is an array of shape (I, T) with integer entries in [0, P)
        # C is an array of shape (I, T) consisting of 0s and 1s
        # the entries of y away from the support of C should be considered 'unknown'
        
        # x is shape (I, T, P) one-hot representation of y
        compressed = y.reshape(-1)
        x = np.zeros((I*T, P))
        r = np.arange(I*T)
        x[r, compressed] = 1
        x = x.reshape(I, T, P)
        
        # prep x and C for the plugging into the model
        x = torch.tensor(x).type(torch.FloatTensor).to(device)
        x = x.view(1, I, T, P)
        C2 = torch.tensor(C).type(torch.FloatTensor).view(1, I, T).to(device)
        
        # plug x and C2 into the model
        with torch.no_grad():
            out = self.forward(x, C2).view(I, T, P).cpu().numpy()
            out = out.transpose(2, 0, 1) # shape (P, I, T)
            probs = np.exp(out) / np.exp(out).sum(axis=0) # shape (P, I, T)
            cum_probs = np.cumsum(probs, axis=0) # shape (P, I, T)
            u = np.random.rand(I, T) # shape (I, T)
            return np.argmax(cum_probs > u, axis=0)         

In [None]:
model = Net().to(device) # need this in order to load the model.

In [None]:
# uncomment to load the previously trained model
model.load_state_dict(torch.load('model1.pt'))

In [None]:
def pad_number(n):
    """
    prepare numbers for better file storage
    """
    if n == 0:
        return '00000'
    else:
        digits = int(np.ceil(np.log10(n)))
        pad_zeros = 5 - digits
        return '0'* pad_zeros + str(n)


## Decompression of model output back to the 2 1/2 measure segment.
<p>This section turns the output of the model into a 40 slot segment from the output of the model. We compress the segment going into the model, so we decompress it coming out of the model. Decompress does several things. 
    
- expands the end of segment for 0,1,2,3 and convert it to 4x40 array
- fixes the end of the 4,5,6th by adding padding to convert to a 4x40 array
    
This will be called just after emerging from the model and before the midi file is written    

In [None]:
def decompress_end_of_segment(this_segment):
    # this function expects a single segment of shape (4,32)
    # this function will de-compress the last 8 1/16th notes into the space of 16 16/th notes
    # it moves elements in the four voices, one at a time the new, larger array

    expanded = np.zeros((4,40),dtype=int) # this is the original shape of the array before compression
    # print(f'this_segment.shape: {this_segment.shape}')
    
    for voice in range(4): # for all voices in the segment, copy the first 32 slots into a new larger numpy array
        # print(f'voice: {voice}')
        for source_index in range(24): # copy the first 24 slots with no change
            # print(f'source_index: {source_index}  ')
            # print(f'this_segment[{voice}][{source_index}]:  {this_segment[voice][source_index]}')                  
            expanded[voice][source_index] = this_segment[voice][source_index]
    
    
    for voice in range(4): # then for each voice in the segment, spread the last 8 slots over 16 slots in the expanded array.
        target_index = 24
        for source_index in range(24,32):
            # print(f'voice: {voice}, source_index: {source_index}')
            expanded[voice,target_index] = this_segment[voice,source_index]
            target_index += 1
            # print(f'source_index: {source_index}, target_index starts at 24 and is now: {target_index}')
            expanded[voice,target_index] = this_segment[voice,source_index]
            target_index += 1

    return(expanded) # return all four voices all notes in each voice. Return all 40 slots

def decompress(arr):
    s = 0
    my_expanded_segment = np.zeros((7,4,40),dtype=int)
    # for segments 0,1,2,3 passed into this function (that is the first ten measures of the chorale, which comprise four phrases, each 2 1/2 measures long)
    for seg in arr: 
        if s > 3: break # process the decompression on segments 0,1,2,3. If you reach 4, stop processing
        # print(f'arr.shape: {arr.shape}')
        # print(f'arr[s].shape: {arr.shape[s]}')
        my_expanded_segment[s] = decompress_end_of_segment(arr[s]) 
        s += 1

    pad8 = np.zeros((4,8))  # pad the end of the segment with zeros
    for i in range(4,7): # segments 4,5,6
        my_expanded_segment[i] = np.concatenate((arr[i],pad8),axis=1)
    return(my_expanded_segment)

## Transpose from the key of C to the original key
This is done to restore what the input midi file key was. I found that model inputs in the key of C are harmonized much better than those that are in other keys. I thought they took care of this in the model by transposing to different keys, but my experience suggests otherwize.
Add the value of root (F is 5) to each note in the array, with the exception of the 0's, which have to remain the same 0. 

In [None]:
def transpose_up_segment(my_segment,root):
    new_segment = np.copy(my_segment) # just make a copy, you will change the non zero elements 
    v = 0
    for voice in new_segment:
        n = 0
        for note in voice:
            if note > 0:
                new_segment[v,n] = note + root
            n += 1
        v += 1
        
    return(new_segment)

def transpose_up(segments,root): # read in 
    s = 0
    new_segment = np.copy(segments)
    for seg in segments:
        new_segment[s] = transpose_up_segment(seg,root)
    return(new_segment)   

## MIDI helper functions 
1. Save an array to a midi file
2. Generate a random chorale
3. Harmonize one particular line in a chorale and let the model figure out the other notes.

In [None]:
# Save a array as a midi file. It can be any length
def save_midi_chorale(prediction, id_number):
    """
    Save an existing chorale in a midi file named {id_number}midi.mid
    """  
    prediction = prediction.transpose().tolist() 
    # prediction = np.array(prediction)
    midi_output = piano_roll_to_midi(prediction)
    save_name = str(pad_number(id_number)) + 'midi.mid'
    # print(f'in save_midi_chorale. About to save file to: {save_name}')
    midi_output.save(save_name)    
    

# generate a totally random chorale out of whole cloth
def save_midi_random(id_number):
    """
    Generate an artificial chorale from a random seed 
    """
    prediction = generate_random_chorale(model) + 30 # 30 back on before passing to piano_roll_to_midi
    save_midi_chorale(prediction, id_number)
    
# keep one voice, and choose the other voices as you might if you were Bach on deadline.
def save_midi_harm(base_chorale, keep, id_number):
    """
    Keep one voice and harmonize around it with the other three. 
    Before passing to the model, the assumption is that you need to subtract 30 from the midi note numbers 
    This is so that the model never sees a number greater than 56
    """
    chorale_type = Chorale(base_chorale) 
    chorale = chorale_type.elaborate_on_voices([keep], model)
    expanded_chorale = decompress(chorale)
    transposed_chorale = transpose_up(expanded_chorale,root)
    save_midi_chorale(transposed_chorale + 30, id_number)
    

In [None]:
import logging
def start_logger(fname = 'coconet.log'):
      logger = logging.getLogger()
      fhandler = logging.FileHandler(filename=fname, mode='w')
      formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
      fhandler.setFormatter(formatter)
      logger.addHandler(fhandler)
      logger.setLevel(logging.DEBUG)
start_logger()  

## Messing with BWV 180 Schmucke dich, o liebe Seele
This next section is dividing the above named chorale by Bach into segments that are two measures long to match the model input dimensions. Some have to be compressed, some lengthened, some are already two measures long. The requirement is for 32 1/6th note segments.
<img src='BWV 180 Schmücke dich, o liebe Seele.png' width="800" height="400">
<p>Here is a table of segments that I will create</p>

| segment | note# | measures | start & | end keys |
| :-: | :-: | :-: | :-: | :-: |
| 0 | 1-9 | 2 1/2 | F | F |
| 1 | 10-18 | 2 1/2 | C | F | 
| 2 | 19-27 | 2 1/2 | F | C |
| 3 | 28-36 | 2 1/2 | F | C |
| 4 | 37-44 | 2 | C | C |
| 5 | 44-52 | 2 | A min | C |
| 6 | 53 | 1 | C | C |

<p>Segments 0 through 3 need to have the final 16 1/16th notes reduced to 8 1/16th notes, and then get the output of the model stretched back out again. The segments 4 and 5 don't need adjusting. Segment 6 needs to be doubled in length, by repeating the last measure a second time. When played, the chord will just be held a bit longer.


In [None]:
!ls -lth '/home/prent/Downloads/chorales_018007b_(c)greentree.mid'

In [None]:
# load the BWV 180 Schmucke dich, o liebe Seele Chorale - nice variety of phrase lengths.
# load a midi file into a list called sample - load the entire file, all tracks, all notes in all tracks
# if the midi file has a key signature, it will print what it is. 
# the notes will be transposed by the loader to the key of C, by subtracting the root from each note. F = 5
file_name = '/home/prent/Downloads/chorales_018007b_(c)greentree.mid'

## Load a midi file into a numpy array
Set certain values:

- the numpy array of the whole piece is stored in variable "sample'
- store the root key and mode (F major, for example)
- print the values of the time signature (must be 4/4 of you will need to do some extra work), quarter note clicks, clicks per 1/16th notes
- any transpositions that must be performed to restore the original key
- print the first 5 notes in each voice
- print the shape of the variable "sample" containing the whole midi file

In [None]:
# read in a midi file, check the key, load into piano roll, set up np.array containing Nx4 sample.
# calling program should slice the returned array as needed to create two measure segments for sending into the prediction model.

def midi_to_input(midi_file):
    music = muspy.read(midi_file)
    if music.key_signatures != []: # check if the midi file includes a key signature - some don't
        root = music.key_signatures[0].root 
        mode = music.key_signatures[0].mode # major or minor
    else: 
        print('Warning: no key signature found. Assuming C major')
        mode = "major"
        root = 0    
    if music.time_signatures != []: # check if the midi file includes a time signature - some don't
        numerator = music.time_signatures[0].numerator
        denominator = music.time_signatures[0].denominator 
    else: 
        print('Warning: no time signature found. Assuming 4/4')
        numerator = 4
        denominator = 4
    # turn it into a piano roll
    piano_roll = muspy.to_pianoroll_representation(music,encode_velocity=False) # boolean piano roll if False, default True
    # print(piano_roll.shape) # should be one time step for every click in the midi file
    q = music.resolution # quarter note value in this midi file. 
    q16 = q // 4 # my desired resolution is by 1/16th notes
    print(f'time signatures: {numerator}/{denominator}')
    time_steps = piano_roll.shape[0] // q16
    print(f'music.resolution is q: {q}. q16: {q16} time_steps: {time_steps} 1/16th notes')
    sample= np.zeros(shape=(time_steps,4)).astype(int) # default is float unless .astype(int)
    # This loop is able to load an array of shape N,4 with the notes that are being played in each time step
    for click in range(0,piano_roll.shape[0],q16): # q16 is skip 240 steps for 1/16th note resolution
        voice = 3 # start with the low voices and decrement for the higher voices as notes get higher
        for i in range(piano_roll.shape[1]): # check if any notes are non-zero
            time_interval = (click) // q16 
            if (piano_roll[click][i]): # if velocity anything but zero - unless you set encode_velocity = False
                # if time_interval % 16 == 0:
                #     print(f'time step: {click} at index {i}, time_interval: {time_interval}, voice: {voice}')
                # i is the midi note number. I want to transpose it into C
                sample[time_interval][voice] = i - root # index to the piano roll with a note - transposed by the key if not C which is 0
                voice -= 1 # next instrument will get the higher note
    return (sample,root,mode)            

In [None]:
# load the midi file into an instance of the music class from muspy.
sample, root, mode = midi_to_input(file_name) # sample is time interval, voice
keys = ['C ','C#','D ','D#','E ','F ','F#','G ','G#','A ','A#','B ']
print(f'{file_name}, \n{keys[root]} {mode} transposed into C and then used to create the segments')
i = 0
for t in sample: # for each time interval
    i += 1
    for v in t: # for each voice
        print(v,' ' , end='')
    print('')
    if i > 4: break

print(f'sample.shape: {sample.shape}. dtype(sample): {type(sample[0,0])}')

## Divide the sample into segments based on phrase length
In this case, the 1st four segments are 2 1/2 measures long. That Bach guy was full of surprises. The next two are repeats and can be discarded for now. The 4th and 5th are 2 measures long, which is what the model expects. The final one is the closing chord. At the end of this cell, you have a variable called "segment" which contains an array of 0 through 6 segments of the piece, each with 40 time slots for each of 4 voices.

In [None]:
# sample is a piano roll of pitches in 1/16th note intervals of dimension (320 time intervals, 4 voices, 1 pitch per time interval and voice)

seg_num = 0 # index into the segment array
segment = np.zeros((7,4,40),dtype=int)  # seg_num, voices, 1/16th note values
print(f'seg_num\tlength\tstart\tend')
pad8 = np.zeros((8,4)) # 8 zeros in each of four voices for segments 4 & 5

phrase_len = int(4 * 4 * 2.5) # the first segmenst have phrases of 2 1/2 measures in length 4*4*2.5 = 40 12/16th notes
for i in range(6): # sample 0 though 5, seg_num 0,1,2,3
    start = i * phrase_len 
    end = (i + 1) * phrase_len
    if i in (2,3): # note that the first two segments are repeated, so we can discard segments 2 & 3    
        pass
        # print(f'Ignore segments 2 & 3 they are repeats. seg_num: {seg_num}')
    else:
        print(f'{seg_num}\t{phrase_len}\t{start}\t{end-1}')
        transfer = sample[start:end]
        segment[seg_num] = transfer.transpose()
        seg_num += 1
    
phrase_len = int(4 * 4 * 2) # 32 1/16th notes   
for i in range(6, 8): # seg_num: 4 & 5
    start = end 
    end = (start + phrase_len)
    print(f'{seg_num}\t{phrase_len}\t{start}\t{end-1}')
    transfer = np.concatenate((sample[start:end],pad8),axis=0) # load the segment with the first 8 1/16th notes from the next segment. We will ignore these later.
    segment[seg_num] = transfer.transpose()
    seg_num += 1

phrase_len = int(4 * 2) # 8 1/16th notes in a whole note
for i in range(8,9): # seg_num 6
    start = end 
    end = (start + phrase_len)
    print(f'{seg_num}\t{phrase_len}\t{start}\t{end-1}')
    transfer = sample[start:end], # load the segment with the first 8 1/16th notes from the next segment. We will ignore these later.
    transfer = np.concatenate(transfer*5) # put 5 copies of the 8 1/16th notes one after the other fill out to 40 slots. Ignore the later slots.
    segment[seg_num] = transfer.transpose()

## Compress the 40 slot segments down to 32 slots
This is done to match the model requirements. We create a helper function that compresses the last 16 slots down to 8 by skipping every other note in the 16. Not as crude at the clipping that was done in the mode, but it looses some information that cannot be retrieved upon decompressions. At the end of this process, we have a 7,4,32 array with 7 segments that are all 32 1/16th notes in length in a variable called "sub_segment".

In [None]:
# This function will take a 4,40 array and return a 4,32 array. It compresses the last 16 slots into 8 slots by skipping every other slot in the array.
def compress_end_of_segment(input_array):
    # let numpy do the slicing. It's better than a python list
    # np_input = np.array(input_array) # don't need it because it's already a np.array
    # this function will compress the last 16 1/16th notes into the space of 8 16/th notes
    # it looks at the four voices, one at a time and moves the 
    for v in range(4):
        n = 24 # start at this slot for each voice
        for i in range(n,40,2): # start at 24, increment until just before 40 by 2 each time
            input_array[v][n] = input_array[v][i]
            n += 1
    return(input_array[:,:-8]) # return all four voices all notes in each voice. Return only the first 32 slots

In [None]:
# compress segments 0,1,2,3 from 40 slots to 32 slots for all four voices
# It leaves segments 4 & 5 alone, and expands the held note on segment 6 to 32 time slices.
# print(segment)
print(segment.shape)
pad8 = np.reshape(pad8,(4,8))
for seg_num in range(4): # we need to take the 40 slot arrays and reduce them to 32 slots.
    print(f'seg_num: {seg_num} before compression') 
    print(f'segment[{seg_num}]: {segment[seg_num][0]}')
    my_segment = compress_end_of_segment(segment[seg_num])
    print('after compression')
    print(f'my_segment: {my_segment[0]}')
    segment[seg_num] = np.concatenate((my_segment,pad8),axis=1)
sub_segment = segment[:,:,:32] # chop off the 33-40'th 1/16th note in the piano roll leaving 32 slots    

In [None]:
sub_segment.shape

## Harmonize to the bass part
This next cell does everything the previous one, except it adds harmonized versions of the segments.

- Takes the segments of the original midi file, which have been compressed down to 7 segments of 32 1/16th notes.
- passes that through the harmonization function of the chorale class, preserving the bass line, letting the model figure out new soprano, alto, and tenor lines.
- decompresses the prediction output by taking the last 8 slots and turning them into 16 slots for a total of 40 in the segment
- transposes the segments back to the original key. 
- concatenates all the segments into one long array. Make sure to only include 32 slots for segments 4 and 5. 
<code>trans_segment[4,:,:32],trans_segment[5,:,:32]</code>
- passes that to generate a midi file
- plays the midi file

## Create a 16xN array stacking 4 predictions on top of each other
What comes in is a 7x4x40 array of notes. 7 segments, each with 4 voices, and each voice with 40 notes in each segment. Segments 4&5 are padded with 8 additional zero notes, which have to be stripped away. I create a set of 4 predictions using only 32 of the notes in each segment. The preditions are based on harmonization of one part, the one specified by keep = 3, which is the bass part. I stack those 4 predictions on top of each other to create a 16 voice array of 16 by 264 notes. This cell takes about 10 minutes to execute.

In [None]:
%%time
def stack_em(sub_segment,keep):
    trans_segment = np.zeros((4,4,40),dtype=int) # save all four predictions after being de-compressed and transposed, then combined into fourple_segment
    fourple_segment = np.zeros((7,16,40),dtype=int) # combine four trans_segments into one - don't need to predefine these
    pad8 = np.zeros((4,8))  # pad the end of the segment with zeros
    for examples in range(1):
        s = 0
        for seg in sub_segment: # for each segment of the chorale
            print(f'\nbeginning prediction for segment {s}')
            for pred in range(4): # make four predictions and save all of them into trans_segment
                # the next line takes about 31 seconds of wall clock time per prediction
                prediction = Chorale(seg - 30).elaborate_on_voices([keep], model) + 30 # subtract 30 to go through the model, then add 30 back after.
                print(f'prediction {pred} complete. last 8 notes of the soprano part {pred}: {prediction[0,24:]}')
                # print(f'prediction.shape: {prediction.shape}')
                if s in (0,1,2,3): # need to expand these
                    expanded_segment = decompress_end_of_segment(prediction) #  just decompress one segment
                    # print(f'expanded_segment last 16 notes of the soprano part: {expanded_segment[0,24:]}')
                    trans_segment[pred] = transpose_up_segment(expanded_segment,root) # you need to preserve all four transposed predictions
                else:
                    expanded_segment = prediction[:,:32] 
                    # print(f'expanded_segment last 8 notes of the soprano part: {expanded_segment[0,24:]}')
                    # ValueError: could not broadcast input array from shape (4,32) into shape (4,40)
                    # the transpose_up_segment returns (4,32), and trans_segment[pred] expects (4,40) - need to pad 4,32 before sending to 4,40
                    trans_segment[pred] = np.concatenate((transpose_up_segment(expanded_segment,root),pad8),axis=1) # you need to preserve all four transposed predictions
                # print(f'trans_segment[{pred}] from slot 24 to the end: {trans_segment[pred,0,24:]}') 

            # print(f'trans_segment.shape: {trans_segment.shape}')
            fourple_segment[s] = np.reshape(trans_segment,(16,40))
            # print(f'fourple_segment[{s}].shape: {fourple_segment[s].shape}')
            s += 1
            # if s > 1: break
        # print(f'shape of fourple_segment: {fourple_segment.shape}')
        # you have to concatenate the fourple_segments, not reshape, because some are 40 and some are 32 slots long.
        concat_chorale = np.concatenate((fourple_segment[0],
                        fourple_segment[1],
                        fourple_segment[2],
                        fourple_segment[3],
                        fourple_segment[4,:,:32],
                        fourple_segment[5,:,:32],
                        fourple_segment[6]),axis=1)
        return(concat_chorale)

In [None]:
for v in range(1):
    for i in range(8):        
        concat_chorale = stack_em(sub_segment,v)
        outfile = 'saved_chorale' + str(v) + str(i) + '.npy' #<-- if you don't end it with .npy then it appends .npy to the name automatically
        np.save(outfile, concat_chorale)
!ls -lth *.npy
    # saved_chorale = np.load(outfile)

## Play a piano roll of any dimension
This function takes in an array of (voices,time_steps) of indeterminant shape. It sends notes to csound for playing to an audio wave file.
<p>There are several adjustments that can be made. Velocity is the proxy for how hard the keys are pressed. If it's less than 62, then the csound instance chooses extremely soft samples, as if the player is barely touching the keys. Here is a complete breakdown of velocity and sample sets:</p>
    
| velocity | sample set | max volume |
| :-: | :-: | :-: |
| 60 | 25 | 24 | 
| 62 | 31 | 16 |    
| 64 | 39 | 15 |
| 66 | 47 | 13 |    
| 68 | 63 | 10 |
| 70 | 78 | 10 |
| 72 | 85 | 10 |
 
<p>Notice that the range of the velocity is very small. Anything 60 or less uses the 25 sample set, which is very soft and quiet, while anything 72 or more uses the 85 sample set. There are three additional sample sets at 99, 113, and 127, which I left out of the csound csd file to reduce storage. I think it might be better to eliminate some of the higher notes, every other sample, and include the louder ones, but that's a job for another day.</p>

In [None]:
def piano_roll_to_csound(piece,velocity,volume,tpq):
    if os.path.exists('goldberg5.log'):
        os.remove("goldberg5.log") # make sure the log starts over with a fresh log file. Next line starts the logger.
    pt,cs = load_csound_piano() # load the csd file and return a performance thread and a Csound instance, start the logger.
    piano.logging.info('ins star dur vel ton oc voi stero env glis upsa rEnv 2nd 3rd vol chan')
    tp16th = tpq / 4 # time per 1/16th note
    hold = 0.2 # how long to hold to make more of a legato
    pfields = []
       
    v = 0
    for voice in piece: # once for each voice
        prev_note = 0
        duration = 0
        start_time = 0
        first = True
        # print(f'\tvoice\tstart\tnote\tduration')
        for note in voice: # one note for each time step in this voice [69 69 69 69 67 67 67 67 65 65 65 65 67 67 67 67]
            if first:
                prev_note = note
                first = False
            if note == prev_note:
                duration += tp16th # add another 1/16th note duration to the current duration
            else: # send the note to csound
                octave = prev_note // 12
                if octave > 0:
                    tone = prev_note - 12 * octave
                    octave -= 1
                    #        inst start       duration  velocity  tone  octave voice ster env glis upsa rEnv 2nd 3rd gl mult chan
                    random_velocity = velocity + np.random.randint(-3,2) # chose different sample sets based on greater or lesser velocity
                    random_start = start_time + round(rng.standard_normal()/75,5)
                    if random_start < 0: random_start = 0
                    stereo = v // 4 * 4 + 2 # based on the voice returns: 2 2 2 2 6 6 6 6 10 10 10 10 14 14 14 14 to space out the pianos
                    pfields.append([1, random_start, duration + hold, random_velocity, tone, octave, 1.0, stereo, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, volume, 1])
                start_time += duration
                duration = tp16th
                prev_note = note 
        v += 1
    
    pfields.sort() # This is done automatically by csound when called from the command line, but not when note events are sent
    
    print(f'list of notes is {len(pfields)} long')
    for i in range(len(pfields)):
        # print(f'index: {i}\t{pfields[i]}')
        pt.scoreEvent(0, 'i', pfields[i]) # here is where the notes are sent to ctcsound
        piano.logging.info(pfields[i]) 

    piano.printMessages(cs)
    delay_time =  max(10,len(pfields) // 35) # need enough time to prevent csound being told to stop processing prematurely
    print(f'about to delay to allow ctcsound to process the notes. delay_time: {delay_time}')
    print(f'last start time was at {start_time}. Set f0 to {start_time+1}')
    time.sleep(delay_time) # once you hit the next line csound stops
    pt.stop() # this is important I think. It closes the output file.
    pt.join()   
    piano.printMessages(cs)    
    cs.reset()
    subprocess.run(['grep', 'invalid\|range\|error\|replacing\|overall', 'goldberg5.log']) # look in the log for important messages
    audio = Audio('/home/prent/Music/sflib/goldberg_aria1.wav')
    display(audio)  

## Remove any dissonant notes from each time step
This is a hack to remove some of the notes that aren't in the key of F major. I'm not happy with it. Some issues:

- Who am I to doubt coconet's wisdom? Well, I'm stacking four chorales on top of each other, and each one may be coherent to itself, but it has no knowledge of any of the other three chorales. So I've got that going for me.
- I replace the note that is "bad" with the key of the piece, F major. I tried replacing it with the most common note, but what if that common note was not actually in the scale? I ended up with a loop.

In [None]:
def _get_scale(root: int, mode: str):
    """Return the scale mask for a specific root."""
    if mode == "major":
        c_scale = np.array([1, 0, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1], bool)
    elif mode == "minor":
        c_scale = np.array([1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 0], bool)
    else:
        raise ValueError("`mode` must be either 'major' or 'minor'.")
    return np.roll(c_scale, root)

# this function checks the consonance of a single time step of 16 notes. 
# I need to find out which notes are causing the trouble, exclude them, then try again, until I get a better score.
def quick_score(time_step,root):
    scale = _get_scale(root, mode.lower())
    note_count = 0
    in_scale_count = 0
    bad_note = 0
    for note in time_step:
        if scale[note % 12]:
            in_scale_count += 1
        else: 
            bad_note = note_count
        note_count += 1
    if note_count < 1:
        return math.nan
    return (in_scale_count / note_count), bad_note

In [None]:
def strip_dissonant(chorale):
    chorale_t = chorale.transpose() # turn a (16,264) array into (264,16)
    
    t = 0
    root = 5
    mode = 'major'
    
    for time_step in chorale_t:
        loop_check = 0
        most_common_note = np.bincount(time_step).argmax() # what if the most common note is not in the scale? Loop-de-loop!
        replacement_note = root + 60
        consonant = 0
        while consonant < 1:
            consonant,bad_note = quick_score(time_step,root)
            if consonant < 1.0: 
                # print(f'time_step: {t}, bad_note@ {bad_note},\
                # time_step[{bad_note}]: {time_step[bad_note]},\
                # most_common_note: {most_common_note}, replacement_note: {replacement_note}')
                time_step[bad_note] = replacement_note
                # print(f'time_step[{bad_note}]: {time_step[bad_note]}')
            loop_check += 1
            if loop_check > 15: 
                print('entered an endless loop. Halt & catch fire')
                break
        t += 1
    return chorale_t.transpose() # turn it back the way it was

In [None]:
# print(chorale[:4,:40])
def arpeggiate(chorale,mask):
    #              +-- start
    #              |  +-- stop
    #              |  |                                +-- step size 2 or 3
    for i in range(0, chorale.shape[1]// mask.shape[1],2): # don't arpegiate the final 16 notes, stop at the 29th arpegiation, skip every third one.
        start = i * 8
        end = (i+1) * 8
        chorale[:,start:end] = mask * chorale[:,start:end]
    v = 0
    for voice in chorale:
        n = 0
        for note in voice:
            if v == 0 : # soprano - need to randomly add an octave or two to the soprano voices
                note += np.random.choice([0,0,0,12,24])
            elif v == 3: # bass reduce the octave
                note += np.random.choice([-36,-24,-12,0,0,0,0,])
            n += 1
        

    return(chorale)

In [None]:
# note, you can't just multiply by other than 1. If you multiply by 2, you take a midi number like 65 and make it 130, which makes an invalid midi number
mask = np.zeros((16,8))

# 1st part
mask[0,] =  [0,0,0,1,1,0,1,1]
mask[1,] =  [0,0,1,1,0,1,1,1]
mask[2,] =  [0,1,1,0,1,1,1,0]
mask[3,] =  [1,1,1,1,1,1,0,1]
# 2nd part
mask[4,] =  [0,1,1,1,0,1,1,1]
mask[5,] =  [0,0,1,1,0,0,1,1]
mask[6,] =  [0,0,0,1,0,0,0,1]
mask[7,] =  [1,1,1,1,1,0,1,0]
# 3rd part
mask[8,] =  [0,0,1,1,0,1,1,1]
mask[9,] =  [0,1,1,1,0,0,0,1]
mask[10,] = [0,0,0,1,0,0,1,1]
mask[11,] = [1,1,1,0,1,0,1,0]
# 4th part
mask[12,] = [0,0,0,1,1,0,1,1]
mask[13,] = [0,0,1,1,0,1,1,1]
mask[14,] = [0,1,1,0,1,1,1,0]
mask[15,] = [1,1,1,0,1,1,0,1]

numpy_file = os.path.join('numpy_chorales','saved_chorale399.npy')
chorale = strip_dissonant(np.load(numpy_file))
new_chorale = arpeggiate(chorale,mask)
np.save(os.path.join('numpy_chorales','arpeggio399.npy'),new_chorale)

## Play some of the randomly generated chorales stored earlier
In the cells above, we created and saved some chorales based on preserved voices of the Schmucke chorale. They are named with this convention:
<code>
    +-- name collection
    |            +-- voice that was preserved. 0: soprano, 1: alto, 2:tenor, 3:bass
    |            |+-- which one of several made, this is the sixth in the series
    saved_chorale06.npy
</code>   
<p>Save them to a wav file and also a midi file.</p>

In [None]:
play_chorale = np.load(os.path.join('numpy_chorales','arpeggio398.npy'))

In [None]:
# play_chorale = transpose_up_segment(play_chorale,5) # chorales stored in the key of C by mistake.
# print(play_chorale[:16,240:]) # print the final 24 notes
print(f'shape sent to piano_roll_to_csound: {play_chorale.shape}')    

midi_output = piano_roll_to_midi(play_chorale) # convert to mido object
music = muspy.from_mido(midi_output) # convert mido to muspy music
muspy.write_midi('apreggio398.mid',music)
tpq = 1.8 # time per quarter note
#                    +-- name of the array containing the notes 16x264 or other shape
#                    |            +-- velocity used to chose the sample set
#                    |            |  +-- overall volume
#                    |            |  |  +-- time per quarter note
piano_roll_to_csound(play_chorale,69,13,tpq)

## Evaluate the various artificial chorales using Muspy metrics

- read in the numpy arrays in a folder, one at a time
- convert them to midi
- load them into a music class
- evaluate them by some metrics
- sort the results by one metric
- print them all

Docs on muspy:
https://salu133445.github.io/muspy/

In [None]:
def sort_key(elem):
    return (-elem)

dirname = 'numpy_chorales'
metrics = []
print(f'\tfile\t\tpitch\tpitch\tpitch\tpitch\tpoly-\tpoly\tscale\tclass\tconson\tnote')
print(f'\tname\t\trange\tuse\tclas\tentr\tphony\trate\tconsist\tentropy\tscore\tscore')
for filename in os.listdir(dirname):
    path = os.path.join(dirname,filename)
    chorale = np.load(path) # bring in a premade chorale stored as a numpy array (16,264)
    # consonant, note_score = quick_score(chorale,root) # I screwed this up.
    consonant = 0
    note_score = 0 
    # print(f'chorale.shape: {chorale.shape}')
    midi_output = piano_roll_to_midi(chorale) # convert to mido object
    music = muspy.from_mido(midi_output) # convert mido to muspy music
    metric = [os.path.basename(filename), # make a list of metrics
        muspy.pitch_range(music),
        muspy.n_pitches_used(music),
        muspy.n_pitch_classes_used(music), 
        round(muspy.pitch_entropy(music),3),
        round(muspy.polyphony(music),3), 
        round(muspy.polyphony_rate(music),3),
        round(muspy.scale_consistency(music),3), 
        round(muspy.pitch_class_entropy(music),3),
        consonant, note_score]
    metrics.append(metric)
metrics.sort(key=sort_key())
for metric in metrics:
    print('{:15}'.format(metric[0]),'\t',metric[1],'\t',metric[2],\
          '\t',metric[3],'\t',metric[4],'\t',metric[5],'\t',metric[6],\
          '\t',metric[7],'\t',metric[8],
         '\t',metric[9],'\t',metric[10])

## Convolve with the impulse response from Teatro Alcorcon in Madrid from Angelo Farina Collection
<p>The next few cells require a great deal of installation work to accomplish. You need to install the following:
    
- Csound - available in most Linux repos with the operating system's standard program installer
- sox 
- ffmpeg
   

In [None]:
!csound goldberg_aria1c.csd

In [None]:
!ls -lth /home/prent/Music/sflib/goldberg_aria1a-c.wav

In [None]:
!sox /home/prent/Music/sflib/goldberg_aria1a-c.wav save1.wav reverse
!sox save1.wav save2.wav silence 1 0.01 0.01
!sox save2.wav save1.wav reverse
!sox save1.wav /home/prent/Music/sflib/goldberg_aria1-t9.wav silence 1 0.01 0.01
!rm save1.wav
!rm save2.wav
!ls -lth /home/prent/Music/sflib/goldberg_aria1-t9.wav

In [None]:
!ffmpeg -y -i /home/prent/Music/sflib/goldberg_aria1-t9.wav -b:a 320k /home/prent/Music/sflib/goldberg_aria1-t9.mp3
audio = Audio('/home/prent/Music/sflib/goldberg_aria1-t9.mp3')
display(audio)