In [None]:
import os
# 总文件夹
output_dir = 'multi_huge'
if not os.path.exists(output_dir):
    os.mkdir(output_dir)

# 基本配置
instrument = [
    0, 1, 2, 6, 7,
    8, 10, 14,
    19, 21, 22,
    24, 25, 30, 38,
    40, 42, 44, 46, 48,
    53, 54,
    56, 60, 61,
    66, 68, 71,
    72, 77,
    81, 88, 98
]   # 33种乐器
midi_num = 300
octave_weight = [1, 3, 5, 6, 5, 3, 1]
frames = 660

do_CQT = False
# CQT配置
import tomllib
with open('../../model/config.toml', 'br') as f:
    CQTconfig = tomllib.load(f)['CQT']
s_per_frame = CQTconfig['hop'] / CQTconfig['fs']
piece_len = frames * s_per_frame


In [None]:
import os
# 总文件夹
output_dir = 'multi_small'
if not os.path.exists(output_dir):
    os.mkdir(output_dir)

# 基本配置
instrument = [
    0, 1, 2, 6, 7,
    8, 10, 14,
    19, 21,
    24, 25, 30,
    42, 44, 46, 48,
    53, 54,
    56, 60, 61,
    66, 68, 71,
    72, 77,
    81, 88, 98
]   # 30种乐器
midi_num = 24
octave_weight = [1, 3, 5, 6, 5, 3, 1]
frames = 660

do_CQT = False
# CQT配置
import tomllib
with open('../../model/config.toml', 'br') as f:
    CQTconfig = tomllib.load(f)['CQT']
s_per_frame = CQTconfig['hop'] / CQTconfig['fs']
piece_len = frames * s_per_frame


In [2]:
from gen import Notes
import numpy as np
import torchaudio
import sys
sys.path.append('..')
from fluidsynth import Synth
sys.path.append('../..')
from utils.midiarray import numpy2midi
from utils.wavtool import cutWave
from model.CQT import CQTsmall
from model.layers import EnergyNorm

In [3]:
generator = Notes(
    octave_weight = octave_weight,
    len_range = (3, 120),
    len_mean = 24,
    len_sigma = -1
)
s = Synth(CQTconfig['fs'], gain = 0.8)
cqt = CQTsmall(
    CQTconfig['fs'],
    fmin=CQTconfig['fmin'],
    octaves=CQTconfig['octaves'],
    bins_per_octave=CQTconfig['bins_per_octave'],
    hop=CQTconfig['hop'],
    filter_scale=CQTconfig['filter_scale'],
    requires_grad=False
)
norm = EnergyNorm(output_type=0)

In [11]:
# 乐器分类
for inst in instrument:
    sub_dir = os.path.join(output_dir, f"inst{inst}")
    if not os.path.exists(sub_dir):
        os.mkdir(sub_dir)
    for midi_id in range(midi_num):
        np_name = os.path.join(sub_dir, f"{midi_id}.npy")
        midi_name = os.path.join(sub_dir, f"{midi_id}.mid")
        wav_name = os.path.join(sub_dir, f"{midi_id}.wav")

        np_midi = generator.generate(frames, 0.165, 0.7)
        np.save(np_name, np_midi)
        numpy2midi(np_midi, s_per_frame, time_first=False, random=True, instrument=inst).save(midi_name)
        s.midi2audio(midi_name, wav_name)
        if do_CQT:
            # 生成CQT
            cqt_name = os.path.join(sub_dir, f"{midi_id}.cqt.npy")
            waveform, sample_rate = torchaudio.load(wav_name, normalize=True)
            waveform = waveform.unsqueeze(0)    # 增加batch维
            cqt_data = norm(cqt(waveform)).squeeze(0).numpy() # 去掉batch维 [2, 288, time]
            np.save(cqt_name, cqt_data[:, :, :frames])  # 截取前frames帧
        # 剪裁音频
        cutWave(wav_name, wav_name, 0, piece_len, mono=True)

In [None]:
# 观察数据
import matplotlib.pyplot as plt
from utils.midiarray import midi2numpy
import numpy as np

# 随机选择一个乐器和序号
random_inst = np.random.choice(instrument)
random_midi_id = np.random.randint(midi_num)

# 打开对应的npy文件
random_np_name = os.path.join(output_dir, f"inst{random_inst}", f"{random_midi_id}.npy")
random_np_midi = np.load(random_np_name)

# 打开对应的CQT文件
if do_CQT:
    random_cqt_name = os.path.join(output_dir, f"inst{random_inst}", f"{random_midi_id}.cqt.npy")
    random_cqt = np.load(random_cqt_name)
    noise = np.random.normal(0, 1, random_cqt.shape) * 0.01 # 测试加噪水平
    random_cqt += noise
    random_cqt = np.sqrt(random_cqt[0]**2 + random_cqt[1]**2)

# 从midi文件中读取数据，应该和npy一样
random_midi_name = os.path.join(output_dir, f"inst{random_inst}", f"{random_midi_id}.mid")
frommidi = midi2numpy(random_midi_name, s_per_frame)
if frommidi.shape[1] < frames:
    padding = np.zeros((frommidi.shape[0], frames - frommidi.shape[1]))
    frommidi = np.hstack((frommidi, padding))
else:
    frommidi = frommidi[:, :frames]

# 设置总标题
plt.figure(figsize=(14, 38 if do_CQT else 26))
plt.suptitle(f'Instrument: {random_inst}, MIDI ID: {random_midi_id}', fontsize=16)
fig_num = 3 if do_CQT else 2

# 绘制random_np_midi
plt.subplot(fig_num, 1, 1)
plt.imshow(random_np_midi, aspect='auto', origin='lower', cmap='gray')
plt.title('Random Piano Roll Data')
plt.xlabel('Time Frame')
plt.ylabel('MIDI Note')
plt.gcf().set_size_inches(14, 12)  # 设置图像大小

plt.subplot(fig_num, 1, 2)
plt.imshow(frommidi, aspect='auto', origin='lower', cmap='gray')
plt.title('from MIDI Data')
plt.xlabel('Time Frame')
plt.ylabel('MIDI Note')
plt.gcf().set_size_inches(14, 12)  # 设置图像大小

# 绘制random_cqt
if do_CQT:
    plt.subplot(fig_num, 1, 3)
    plt.imshow(random_cqt, aspect='auto', origin='lower', cmap='hot')
    plt.title('Random CQT Data')
    plt.xlabel('Time Frame')
    plt.ylabel('Frequency Bin')
    plt.gcf().set_size_inches(14, 13)  # 设置图像大小
    plt.colorbar()

plt.tight_layout()
plt.show()