In [None]:
!pip install mitdeeplearning
!apt-get update -y
!apt-get install -y abcmidi timidity
!wget https://raw.githubusercontent.com/aamini/introtodeeplearning/1fae3136f01b12c93b182c30599c93285ffeb38f/mitdeeplearning/bin/abc2wav
!chmod 755 abc2wav

In [None]:
import mitdeeplearning as mdl
from IPython.display import Audio

import os
import numpy as np
import torch
from torch import nn, optim
import torch.nn.functional as F

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu' )

In [None]:
# Download the dataset
songs = mdl.lab1.load_training_data()

# Print one of the songs to inspect it in greater detail!
example_song = songs[100]
print("\nExample song: ")
print(example_song)

Found 817 songs in text

Example song: 
X:32
T:Crabs in the Skillet
Z: id:dc-jig-26
M:6/8
L:1/8
K:G Dorian
D|G3 d2F|G2A B2c|d_ed cBA|dcB AGF|!
G3 d2F|G2A B2d|c=Bc fed|cAF F2:|!
d|gag gfe|fgf fed|cde fed|cAG G2d|!
gag gfe|fgf fed|cde fed|cAF F2:|!
D|G3 A3|B3 c3|d_ed cBA|dcB AGF|!
G3 A3|B3 d3|c=Bc fed|cAF F2:|!


In [None]:
def save_song_to_abc(song, filename="tmp"):
    save_name = "{}.abc".format(filename)
    with open(save_name, "w") as f:
        f.write(song)
    return filename

def abc2wav(abc_file):
    path_to_tool = os.path.join(os.getcwd(), 'bin', 'abc2wav')
    cmd = "{} {}".format(path_to_tool, abc_file)
    return os.system(cmd)

basename = save_song_to_abc(example_song)
! ./abc2wav tmp.abc
Audio(data=basename+'.wav')

In [None]:
songs_joined = "\n\n".join(songs) 

# Find all unique characters in the joined string
vocab = sorted(set(songs_joined))
print("There are", len(vocab), "unique characters in the dataset")

char2idx = {u:i for i, u in enumerate(vocab)}
idx2char = np.array(vocab)

print('{')
for char,_ in zip(char2idx, range(20)):
    print('  {:4s}: {:3d},'.format(repr(char), char2idx[char]))
print('  ...\n}')

There are 83 unique characters in the dataset
{
  '\n':   0,
  ' ' :   1,
  '!' :   2,
  '"' :   3,
  '#' :   4,
  "'" :   5,
  '(' :   6,
  ')' :   7,
  ',' :   8,
  '-' :   9,
  '.' :  10,
  '/' :  11,
  '0' :  12,
  '1' :  13,
  '2' :  14,
  '3' :  15,
  '4' :  16,
  '5' :  17,
  '6' :  18,
  '7' :  19,
  ...
}


In [None]:
def vectorize_string(string):
  vectorized_output = np.array([char2idx[char] for char in string])
  return vectorized_output

vectorized_songs = vectorize_string(songs_joined)

In [None]:
def get_batch(vectorized_songs, seq_length, batch_size):
  # the length of the vectorized songs string
  n = vectorized_songs.shape[0] - 1
  # randomly choose the starting indices for the examples in the training batch
  idx = np.random.choice(n-seq_length, batch_size)

  '''TODO: construct a list of input sequences for the training batch'''
  input_batch = [vectorized_songs[i : i+seq_length] for i in idx]
  # input_batch = # TODO
  '''TODO: construct a list of output sequences for the training batch'''
  output_batch = [vectorized_songs[i+1 : i+seq_length+1] for i in idx]
  # output_batch = # TODO

  # x_batch, y_batch provide the true inputs and targets for network training
  x_batch = np.reshape(input_batch, [batch_size, seq_length])
  y_batch = np.reshape(output_batch, [batch_size, seq_length])
  return torch.from_numpy(x_batch), torch.from_numpy(y_batch)

In [None]:
x_batch, y_batch = get_batch(vectorized_songs, seq_length=5, batch_size=1)

for i, (input_idx, target_idx) in enumerate(zip(np.squeeze(x_batch), np.squeeze(y_batch))):
    print("Step {:3d}".format(i))
    print("  input: {} ({:s})".format(input_idx, repr(idx2char[input_idx])))
    print("  expected output: {} ({:s})".format(target_idx, repr(idx2char[target_idx])))

Step   0
  input: 59 ('d')
  expected output: 59 ('d')
Step   1
  input: 59 ('d')
  expected output: 61 ('f')
Step   2
  input: 61 ('f')
  expected output: 82 ('|')
Step   3
  input: 82 ('|')
  expected output: 2 ('!')
Step   4
  input: 2 ('!')
  expected output: 0 ('\n')


In [None]:
class RNN(nn.Module):

  def __init__(self, vocab_size, embed_size, hidden_size):
    super(RNN, self).__init__()

    self.hidden_size = hidden_size
    self.vocab_size  = vocab_size
    self.teacher_forcing_prob = 0.5

    self.embed  = nn.Embedding(vocab_size, embed_size )
    self.rnn    = nn.LSTM(embed_size, hidden_size, batch_first=True)
    self.linear = nn.Linear(hidden_size, vocab_size)

  
  # teacher_forcing
  def forward(self, x, teacher_forcing):

    if not teacher_forcing:
      return self.forward_inference(x)

    # x - N x S - 10 x 100
    hidden = torch.zeros( 1, x.size(0), self.hidden_size ).to(device)
    cell   = torch.zeros( 1, x.size(0), self.hidden_size ).to(device)

    embed = self.embed(x)
    output, (hidden, cell) = self.rnn(embed, (hidden, cell))
    output = self.linear(output)

    # N x S x D
    return output

  def forward_inference(self, x):
    
    # x - N x S - 10 x 100
    hidden = torch.zeros( 1, x.size(0), self.hidden_size ).to(device)
    cell   = torch.zeros( 1, x.size(0), self.hidden_size ).to(device)

    next_input = x[:,0:1]
    outputs = []
    for idx in range(x.size(1)):

      embed = self.embed(next_input)
      output, (hidden, cell) = self.rnn(embed, (hidden, cell))
      output = self.linear(output.squeeze(1))

      outputs.append(output)

      indices = torch.multinomial( F.softmax(output).detach(), num_samples=1 )
      next_input = indices

    # S x N x D
    return torch.stack(outputs).permute(1,0,2)

In [None]:
### Hyperparameter setting and optimization ###

# Optimization parameters:
num_training_iterations = 2000  # Increase this to train longer
batch_size = 5    # Experiment between 1 and 64
seq_length = 100  # Experiment between 50 and 500
learning_rate = 5e-3  # Experiment between 1e-5 and 1e-1

# Model parameters: 
vocab_size = len(vocab)
embedding_dim = 256 
rnn_units = 1024  # Experiment between 1 and 2048

net = RNN(vocab_size, embedding_dim, rnn_units).to(device)
print(net)

RNN(
  (embed): Embedding(83, 256)
  (rnn): LSTM(256, 1024, batch_first=True)
  (linear): Linear(in_features=1024, out_features=83, bias=True)
)


In [None]:
criterio  = nn.CrossEntropyLoss().to(device)
optimizer = optim.Adam(net.parameters(), lr=5e-4)

In [None]:
net.train()
loss_acumulada = []
teacher_forcing_prob = 1.1
for iter in range(num_training_iterations):

  # pegue um batch de dados
  x_batch, y_batch = get_batch(vectorized_songs, seq_length=seq_length, batch_size=batch_size)

  # coloque na GPU
  x_batch = x_batch.to(device)
  y_batch = y_batch.to(device)
  
  # passe o dado na rede

  teacher_forcing = True if np.random.rand() < teacher_forcing_prob else False
  saida = net(x_batch, teacher_forcing)

  # calcula a loss
  # if not teacher_forcing:
  #   print(saida.size())
  loss = criterio(saida.reshape(saida.size(0)*saida.size(1), saida.size(2)), 
                  y_batch.view(y_batch.size(0)*y_batch.size(1)))

  loss_acumulada.append(loss.item())

  #### imprime exemplo saida
  if (iter+1) % 100 == 0:
    arr = np.asarray(loss_acumulada)
    print('*'*50)
    print(f'Iter {iter+1} - Loss:{arr.mean():.2f} +/- {arr.std():.2f}\n' )

    value, indices = torch.topk( saida[0].detach().cpu(), 1, dim=-1 )
    # indices = torch.multinomial( F.softmax(saida[0]).detach().cpu(), num_samples=1 ) 
    out = [idx2char[idx] for idx in indices.squeeze(1)]
    print(''.join(out))
    print('*'*50)

  # zera o gradiente
  optimizer.zero_grad()
  # calcula os novos gradientes
  loss.backward()
  # muda os pesos da rede
  optimizer.step()

**************************************************
Iter 100 - Loss:0.78 +/- 0.09

gaeaA2 cdec|gee2 acgc|e
egdcc decAB|c2ec cAGE|=EFD EDAB|cAGE EDD:|!

X:361
T:Himestone Rock
Z: id:dc
**************************************************
**************************************************
Iter 200 - Loss:0.75 +/- 0.09

dfBffeB AFA|!
F3B EFE|DFA B2F|DdB AFE|FDD D2:|!
A|fdB ABd|faa afd|cfg eed|!ee efg|!
fdB ABd|faa afa|
**************************************************
**************************************************
Iter 300 - Loss:0.74 +/- 0.09

T:1/8
K:D Major
A|A2B AFFD|B3A Bcde|ADB AFED|Bfed BddB|!

|f2d e3e|f3d ecde|f3e e3fg|afef BddB|f
f3d
**************************************************
**************************************************
Iter 400 - Loss:0.72 +/- 0.10

A GGG2|BeG2 AAdc|BGG2 AGG2|AcFF2 GfdcA|G
dGG2 DGG2|AdG2 cAdc|B1 d2G2dDGGFG|AGFF2 GcdcA||[2 dAGG Aege
**************************************************
**************************************************
Iter 500 - Lo

# Criando novas músicas!

### Gerando toda a sequência de uma vez

In [None]:
start_string = "X"*1000
input_eval = [char2idx[s] for s in start_string]
input_eval = torch.cat( 2*[torch.from_numpy(np.expand_dims(input_eval, 0))] ) 

saida = net(input_eval.to(device), teacher_forcing=False).detach().cpu()
print(saida.size())
text_generated = []
text_generated2 = []

for i in range(saida.size(1)):

  s = saida[0][i:i+1]
  indices = torch.multinomial( F.softmax(s), num_samples=1 ) 
  text_generated.append(idx2char[indices])

print(start_string[0], end='')
print(''.join(text_generated))



torch.Size([2, 1000, 83])
X:230
T:Oirliiinl's
Z: id:dc-jig-635
M:6/8
L:1/8
K:A Major
DED2D G2 F2B|AGE BBA|GBE AAG|cAE D3DG^ d2d|!
gf BBB|GFG cEF|G2B cBf|gee ece|dBA G2:|!

|GBd g3|eed cAG|GBd c3|fad gAF|!
G2 F2g|dBG GAB|AAe f3A:!
G3d G2|Ad G2-|A
df2 g2f|gbd cdb|G2d g3|f2E ^AF|!FG A3D||!
Bee f2g|cAF G2E|ded =BE|DEA ccA|G2G AFD:!
d3G _2g|fed cdc|fgG eAA|d2e FEF|c
A2D G2B|d3 2c|d2eddcG|BED D2:|!
eX:1

T: eay et
Z: id:dc-heel-376
M:C
L:1/8
K:D Maxolydian
AGe|a3 eag|ffA|Bcg ecB|cAA cAA||!

BAc|dce|cee ega|bbf a2e|e
dAA gAc|ede eff|g2a gfe|fef g2f gf:!
fBd G2G|B2G|Gff efd 3 f2 g2|d4:d2|B4|d2||!
d4|g4 f2|g4 c2 B2|A4 AG|A3D DF|!4:|!

2|da dB f4 gfd2|B2 A Bd|d2 EC|B2 A2|F4 D2|A2 G2|F6 G4 c2|G3 fe|!4BB2 de|!
d2 BA DA|B3 d3 f2|ge f/A2B|!3 A2|A2|B2 A2 e2|aa d2 a2|b2|g2 d3 BAAA
d2 dd B2 c2 d2|d6 B2 G2|D2||!
dX:24
T:LitooehIall n's Fplee e
e
n
Z: id:dc-rig-1

M:6/8
L:1/8
K:D Mixor
D|G2GA2|F2  DE BE|FG EG E2|D
F/B c2|BGB Fc|Bc A2|!
c2 E>B|BB DB|G2 DF|E
E3F E2|C2 A2 g2 B/Bc|c2 DF/2:]!

X:26T:Bonn's Fa

  del sys.path[0]


In [None]:
song="X:142\nT:Naareng t'Kiel\nZ: id:dc-reel-21\nM:C\nL:1/8\nK:A Major\nAE|D3CA D2FA|defe aggf|ABFA BAFA|G2BG DEFE|!\nd2B Bd A|Begf efdB|feBA BAde|efdB AFF:|!\nB|fdde e2|a|fBaf d2ed|ffgf gbfe|deAF dEE2|!"

In [None]:
print(song)

X:142
T:Naareng t'Kiel
Z: id:dc-reel-21
M:C
L:1/8
K:A Major
AE|D3CA D2FA|defe aggf|ABFA BAFA|G2BG DEFE|!
d2B Bd A|Begf efdB|feBA BAde|efdB AFF:|!
B|fdde e2|a|fBaf d2ed|ffgf gbfe|deAF dEE2|!


In [None]:
basename = save_song_to_abc(song)
!./abc2wav tmp.abc
Audio(data=basename+'.wav')

4.02 February 22 2017 abc2midi
writing MIDI file tmp.mid
Playing tmp.mid
MIDI file: tmp.mid
Format: 0  Tracks: 1  Divisions: 480
Text: note track
Sequence: Naareng t'Kiel
Text: Z:id:dc-reel-21
Output tmp.wav
Playing time: ~43 seconds
Notes cut: 0
Notes lost totally: 0
Playing tmp.wav
MIDI file: tmp.wav
tmp.wav: Not a MIDI file!
