## Importing Libraries

In [None]:
import tensorflow 
import numpy as np 
import pandas as pd 
from collections import Counter
import random
import IPython
from IPython.display import Image, Audio
import music21
from music21 import *
import matplotlib.pyplot as plt 
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
import tensorflow.keras.backend as K
from tensorflow.keras.optimizers import Adamax
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
%matplotlib inline
import sys
import warnings
warnings.filterwarnings("ignore")
warnings.simplefilter("ignore")
np.random.seed(42)

## Loading Data

### Loading and parsing the midi files as stream

In [None]:
filepath = "../input/beeth/"

all_midis= []
for i in os.listdir(filepath):
    if i.endswith(".mid"):
        tr = filepath+i
        midi = converter.parse(tr)
        all_midis.append(midi)

### Getting the list of notes as corpus

In [None]:
def extract_notes(file):
    notes = []
    pick = None
    for j in file:
        songs = instrument.partitionByInstrument(j)
        for part in songs.parts:
            pick = part.recurse()
            for element in pick:
                if isinstance(element, note.Note):
                    notes.append(str(element.pitch))
                elif isinstance(element, chord.Chord):
                    notes.append(".".join(str(n) for n in element.normalOrder))

    return notes

Corpus= extract_notes(all_midis)
print("Total notes in evey Beethoven midi in the dataset: ", len(Corpus))

## Data Exploration

### Analysing the first 50 values in corpus

In [None]:
print("First fifty values in the Corpus:", Corpus[:50])

### Printing the music sheet

In [None]:
def show(music):
    display(Image(str(music.write("lily.png"))))
    
def chords_n_notes(Snippet):
    Melody = []
    offset = 0
    for i in Snippet:
        # In case it is a chord
        if ("." in i or i.isdigit()):
            chord_notes = i.split(".")
            notes = [] 
            for j in chord_notes:
                inst_note=int(j)
                note_snip = note.Note(inst_note)            
                notes.append(note_snip)
                chord_snip = chord.Chord(notes)
                chord_snip.offset = offset
                Melody.append(chord_snip)
        # In case it is a note
        else: 
            note_snip = note.Note(i)
            note_snip.offset = offset
            Melody.append(note_snip)

        offset += 1
        
    Melody_midi = stream.Stream(Melody)   
    return Melody_midi

Melody_Snippet = chords_n_notes(Corpus[:100])
show(Melody_Snippet)

### Playing the above sheet music

In [None]:
print("Sample Audio From Data")
IPython.display.Audio("../input/music-generated-lstm/Corpus_Snippet.wav") 

### Examining every note in the corpus

In [None]:
count_num = Counter(Corpus)
print("Total of unique notes in the corpus: ", len(count_num))

### Exploring the prev notes dictionary

In [None]:
Notes = list(count_num.keys())
Recurrence = list(count_num.values())

# Average recurrency for a note in corpus
def Average(lst):
    return sum(lst) / len(lst)
print("Average recurrency for a note in corpus: ", Average(Recurrence))
print("Most frequent note in corpus appeared: ", max(Recurrence), " times")
print("Least frequent note in corpus appeared: ", min(Recurrence), " time")

### Plotting the distribution of notes

In [None]:
plt.figure(figsize=(18,3),facecolor="#97BACB")
bins = np.arange(0,(max(Recurrence)), 50) 
plt.hist(Recurrence, bins=bins, color="#97BACB")
plt.axvline(x=100,color="#DBACC1")
plt.title("Frequency distribution of notes in the corpus")
plt.xlabel("Frequency of chords in corpus")
plt.ylabel("Number of chords")
plt.show()

- Getting a list of rare chords

In [None]:
rare_note = []
for index, (key, value) in enumerate(count_num.items()):
    if value < 100:
        m =  key
        rare_note.append(m)
        
print("Total number of notes that occur less than 100 times: ", len(rare_note))

- Eliminating the rare notes

In [None]:
for element in Corpus:
    if element in rare_note:
        Corpus.remove(element)

print("Length of corpus after elemination the rare notes: ", len(Corpus))

## Data Preprocessing

### Storing all the unique characters present in my corpus to bult a mapping dic. 

In [None]:
symb = sorted(list(set(Corpus)))

L_corpus = len(Corpus)
L_symb = len(symb)

mapping = dict((c, i) for i, c in enumerate(symb))
reverse_mapping = dict((i, c) for i, c in enumerate(symb))

print("Total number of characters: ", L_corpus)
print("Number of unique characters: ", L_symb)

### Encoding and splitting the corpus as labels and targets

In [None]:
length = 40
features = []
targets = []
for i in range(0, L_corpus - length, 1):
    feature = Corpus[i:i + length]
    target = Corpus[i + length]
    features.append([mapping[j] for j in feature])
    targets.append(mapping[target])
    
    
L_datapoints = len(targets)
print("Total number of sequences in the corpus: ", L_datapoints)

In [None]:
# reshape X and normalize
X = (np.reshape(features, (L_datapoints, length, 1)))/ float(L_symb)
# one hot encode the output variable
y = tensorflow.keras.utils.to_categorical(targets) 

In [None]:
X_train, X_seed, y_train, y_seed = train_test_split(X, y, test_size=0.2, random_state=42)

## Model Building