In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from torch.utils.data import DataLoader
import IPython.display as ipd

from utils.midi import *
from utils.data import *

In [None]:
path = './data/midi/'
dfs = get_dfs_from_midi(path, min_notes=50)

In [None]:
lens = [len(df) for df in dfs]
print('Number of songs:', len(dfs))
print('Max length:', max(lens))
print('Min length:', min(lens))
print('Mean length:', np.mean(lens))
sorted_lens = sorted(lens)
plt.bar(range(len(sorted_lens)), sorted_lens);

In [None]:
# Duration of melodies
durations = []
for df in dfs:
    durations.append(df['End'].max())
plt.hist(durations, bins=100);
min(durations), max(durations)

In [None]:
# plot pitch range
pitch_ranges = []
pitch_mins = []
pitch_maxs = []
for df in dfs:
    pitch_mins.append(df['Pitch'].min())
    pitch_maxs.append(df['Pitch'].max())
    pitch_range = df['Pitch'].max() - df['Pitch'].min()
    pitch_ranges.append(pitch_range)
pitch_mins = np.array(pitch_mins)
pitch_maxs = np.array(pitch_maxs)
pitch_ranges = np.array(pitch_ranges)
plt.hist(pitch_ranges, bins=128, range=(0, 128));

In [None]:
fig, ax = plt.subplots(figsize=(20, 5))
ax.bar(range(len(pitch_mins)), height=pitch_maxs-pitch_mins, bottom=pitch_mins, width=1.0);

In [None]:
# measure gap between start of next note and end of previous note
neg_gaps = []
pos_gaps = []
for df in dfs:
    gap = df['Start'].iloc[1:].values - df['End'].iloc[:-1].values
    if gap.min() < 0:
        neg_gaps.append(df)
    if gap.max() > 0:
        pos_gaps.append(df)
print(len(neg_gaps), len(pos_gaps))

In [None]:
pd.set_option('display.max_rows', 500)
gaps[0]

In [None]:
Fs = 44100
midi_data = df_to_midi(gaps[0])
audio_data = midi_data.synthesize(fs=Fs)
ipd.Audio(audio_data, rate=Fs)

In [None]:
dataset = MIDIDataset(dfs, sample_len=50, cols=[0,1,2], scale=1., stride=10)
dataloader = DataLoader(dataset, batch_size=64, shuffle=True, num_workers=0, drop_last=True)

In [None]:
len(dataloader)

In [None]:
is_get_data = 1
is_get_matrix = 0
if is_get_data == 1:
    a = './data/XML/b/beatles/hey-jude/chorus.xml'
    list_file = get_listfile(a)
    list_ = check_chord_type(list_file)
    list_of_four_beat = beats_(list_)
    c_key_list,d_key_list,e_key_list,f_key_list,g_key_list,a_key_list,b_key_list = get_key(list_of_four_beat)
    note_list_all,dur_list_all = transform_note(c_key_list,d_key_list,e_key_list,f_key_list,g_key_list,a_key_list,b_key_list)
    in_range,note_list_all_c,dur_list_all_c = check_melody_range(note_list_all,dur_list_all)
    print('total normal chord: {}'.format(len(list_)))
    print('total in four: {}'.format(len(list_of_four_beat)))
    print('melody in range: {}'.format(len(note_list_all)))

if is_get_matrix == 1:
    note_list_all_c = np.load('note_list_all_c.npy')
    dur_list_all_c = np.load('dur_list_all_c.npy')

    data_x, prev_x,zero_counter = build_matrix(note_list_all_c,dur_list_all_c)
    np.save('data_x.npy',data_x)
    np.save('prev_x.npy',prev_x)

    print('final tab num: {}'.format(len(note_list_all_c)))
    print('songs not long enough: {}'.format(zero_counter))
    print('sample shape: {}, prev sample shape: {}'.format(data_x.shape, prev_x.shape))

In [None]:
path = './data/XML/b/beatles/hey-jude/chorus.xml'
check_chord_type([path])

In [None]:
chorus_file = ET.parse(path)
root = chorus_file.getroot()
check_list = []
counter = 0
None_counter = 0
for item in root.iter(tag='fb'):
    check_list.append(item.text)
    counter +=1
    if item.text is None:
        None_counter +=1
for item in root.iter(tag='borrowed'):
    check_list.append(item.text)
    counter +=1
    if item.text is None:
        None_counter +=1
print(check_list)
print(counter)
print(None_counter)

In [None]:
chorus_file = ET.parse(path)
root = chorus_file.getroot()

In [None]:
for item in root.iter(tag='note'):
    note = item[4].text
    dur = item[3].text
    octave = item[5].text
    print(note,dur,octave)