In [5]:
import glob

wav_files = sorted(glob.glob("../Data/MusicNet_Dataset/musicnet/musicnet/train_data/*.wav"))
csv_files = sorted(glob.glob("../Data/MusicNet_Dataset/musicnet/musicnet/train_labels/*.csv"))


In [6]:
import time
import torch.optim as optim
import torch.utils.data as data
from Model.CNN_3L_pro import *
import os

# start_file = "../Data/MusicNet_Dataset/musicnet/musicnet/train_data/2443.wav"
# start_index = wav_files.index(start_file) if start_file in wav_files else 0
# 
# wav_files = wav_files[start_index:]
# csv_files = csv_files[start_index:]
# 
# print(f"Bắt đầu train tiếp từ file: {wav_files[0]}")

device = torch.device("mps" if torch.backends.mps.is_available() else "cpu")

# Khởi tạo mô hình, hàm mất mát và optimizer
model = CNN_Pro().to(device)
criterion = nn.BCEWithLogitsLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

if os.path.exists("CNN_Pro21.pth"):       
    checkpoint = torch.load("CNN_Pro21.pth", map_location=device)
    model.load_state_dict(checkpoint['model_state_dict'])
    optimizer.load_state_dict(checkpoint['optimizer_state_dict'])

# Huấn luyện từng file một
for wav_path, csv_path in zip(wav_files, csv_files):
    print(time.strftime("%H:%M:%S")+f"\nfile : {wav_path}")
    X_train, y_train = load_wav_csv(wav_path, csv_path)
    dataset = MusicDataset(X_train, y_train)
    train_loader = data.DataLoader(dataset, batch_size=64, shuffle=False)

    for epoch in range(20):  # Giảm số epoch cho từng file
        model.train()
        running_loss = 0.0
        correct = 0
        total = 0

        for inputs, labels in train_loader:
            inputs, labels = inputs.to(device), labels.to(device).float()
            
            optimizer.zero_grad()
            outputs = model(inputs)
             # BCEWithLogitsLoss yêu cầu labels dạng float
            loss = criterion(outputs, labels)  
            loss.backward()
            optimizer.step()
        
            running_loss += loss.item()
            # Chuyển đầu ra thành nhị phân (multi-label classification)
            predicted = (outputs > 0.5).float()
        
            correct += (predicted == labels).sum().item() / 128  # Chia 128 để chuẩn hóa
            total += labels.size(0)

        if epoch >= 19:
            print(f"Epoch {epoch+1}/20, Loss: {running_loss/len(train_loader):.4f}, "
              f"Accuracy: {100 * correct / total:.2f}%")

    torch.save({
        'model_state_dict': model.state_dict(),
        'optimizer_state_dict': optimizer.state_dict()
    }, "CNN_Pro21.pth")
    print(f"Updated checkpoint sau file {wav_path}")

print("done training !")

16:57:11
file : ../Data/MusicNet_Dataset/musicnet/musicnet/train_data/1727.wav
Epoch 20/20, Loss: 0.1178, Accuracy: 96.43%
Updated checkpoint sau file ../Data/MusicNet_Dataset/musicnet/musicnet/train_data/1727.wav
17:04:06
file : ../Data/MusicNet_Dataset/musicnet/musicnet/train_data/1728.wav


KeyboardInterrupt: 

In [1]:
import glob
from Model.CNN_3L_pro import *

test_wav_files = sorted(glob.glob("../Data/MusicNet_Dataset/musicnet/musicnet/test_data/*.wav"))
test_csv_files = sorted(glob.glob("../Data/MusicNet_Dataset/musicnet/musicnet/test_labels/*.csv"))

device = torch.device("mps" if torch.backends.mps.is_available() else "cpu")

model = CNN_Pro().to(device)
checkpoint = torch.load("../Trained/CNN_Pro.pth", map_location=device)
model.load_state_dict(checkpoint['model_state_dict'])
model.eval()  # Đưa model vào chế độ đánh giá

import numpy as np
import torch.utils.data as data

total_correct = 0
total_samples = 0



for wav_path, csv_path in zip(test_wav_files, test_csv_files):
    print(f"\nĐang test file: {wav_path}")

    X_test, y_test = load_wav_csv(wav_path, csv_path)

    test_dataset = MusicDataset(X_test, y_test)
    test_loader = data.DataLoader(test_dataset, batch_size=64, shuffle=False)

    correct = 0
    total = 0

    with torch.no_grad():  # Không tính gradient khi test
        for inputs, labels in test_loader:
            inputs, labels = inputs.to(device), labels.to(device).float()

            outputs = model(inputs)
            predicted = (outputs > 0.5).float()  # Chuyển thành 0 hoặc 1
            
            correct += (predicted == labels).sum().item() / 128  # Chia 128 để chuẩn hóa
            total += labels.size(0)

    accuracy = 100 * correct / total
    print(f"Accuracy: {accuracy:.2f}%")

    total_correct += correct
    total_samples += total

print(f"\n🔥 Accuracy chung trên tập test: {100 * total_correct / total_samples:.2f}%")

KeyboardInterrupt: 

In [3]:
from Model.CNN_3L_pro import *
import librosa.display
import gc

gc.collect()
torch.mps.empty_cache()
wav_path = "../Data/MusicNet_Dataset/musicnet/musicnet/test_data/1819.wav"
notes = predict_notes(wav_path, model, device=device)

print(notes)

[]


In [None]:
from music21 import stream, note, midi

def export_to_musicxml(note_list, output_file="output.musicxml"):
    s = stream.Stream()
    
    for pitch, duration in note_list:
        n = note.Note(pitch)
        n.quarterLength = duration  # Đặt độ dài nốt theo đơn vị quarter
        s.append(n)
    
    s.write('musicxml', fp=output_file)
    print(f"Xuất thành công: {output_file}")

# Ví dụ danh sách nốt (nốt, trường độ)
notes = [("C4", 1.0), ("D4", 0.5), ("E4", 0.5), ("F4", 1.0), ("G4", 2.0)]

# Xuất file MusicXML
export_to_musicxml(notes)

In [1]:
import numpy as np
import librosa
from Model.CNN_3L_pro import *
import gc
import torch

device = torch.device("mps" if torch.backends.mps.is_available() else "cpu")

model = CNN_Pro().to(device)
checkpoint = torch.load("../Trained/CNN_Pro.pth", map_location=device, weights_only=True)
model.load_state_dict(checkpoint['model_state_dict'])
model.eval() 


gc.collect()
torch.mps.empty_cache() 
# Dự đoán cho một file WAV
wav_path = "../Data/MusicNet_Dataset/musicnet/musicnet/test_data/2106.wav"
notes = CNN_predict(wav_path, model, device=device)


Đang dự đoán file: ../Data/MusicNet_Dataset/musicnet/musicnet/test_data/2106.wav
[{'note': 50, 'start': 0.4876190476190476, 'duration': 0.4876190476190476}, {'note': 66, 'start': 0.4876190476190476, 'duration': 0.4876190476190476}, {'note': 62, 'start': 0.2438095238095238, 'duration': 0.7314285714285714}, {'note': 69, 'start': 0.9752380952380952, 'duration': 0.2438095238095238}, {'note': 69, 'start': 1.9504761904761905, 'duration': 0.2438095238095238}, {'note': 62, 'start': 2.681904761904762, 'duration': 0.2438095238095238}, {'note': 66, 'start': 3.1695238095238096, 'duration': 0.2438095238095238}, {'note': 73, 'start': 3.900952380952381, 'duration': 0.2438095238095238}, {'note': 66, 'start': 4.144761904761904, 'duration': 0.2438095238095238}, {'note': 62, 'start': 4.388571428571429, 'duration': 0.2438095238095238}, {'note': 74, 'start': 4.388571428571429, 'duration': 0.4876190476190476}, {'note': 86, 'start': 4.388571428571429, 'duration': 0.4876190476190476}, {'note': 62, 'start': 5

In [3]:
from routes.sounds import *

tempo, _ = get_tempo_pulse(wav_path)
notes_beat = process_notes_sum(notes, tempo)

for idx, event in enumerate(sorted(notes_beat, key=lambda x: x["start_beat"])):
    print(f"{idx+1}. Note {event['note']} - Start: {event['start_beat']:.1f}, Duration: {event['duration_beat']:.3f}, value: {event['note_value']}")

1. Note 62 - Start: 0.5, Duration: 4.250, value: whole
2. Note 50 - Start: 1.0, Duration: 3.000, value: dotted half
3. Note 66 - Start: 1.0, Duration: 3.000, value: dotted half
4. Note 69 - Start: 2.5, Duration: 1.500, value: dotted quarter
5. Note 69 - Start: 5.0, Duration: 1.500, value: dotted quarter
6. Note 62 - Start: 6.5, Duration: 1.500, value: dotted quarter
7. Note 66 - Start: 8.0, Duration: 1.500, value: dotted quarter
8. Note 73 - Start: 9.5, Duration: 1.500, value: dotted quarter
9. Note 66 - Start: 10.0, Duration: 1.500, value: dotted quarter
10. Note 62 - Start: 11.0, Duration: 1.500, value: dotted quarter
11. Note 74 - Start: 11.0, Duration: 3.000, value: dotted half
12. Note 86 - Start: 11.0, Duration: 3.000, value: dotted half
13. Note 62 - Start: 12.5, Duration: 1.500, value: dotted quarter
14. Note 64 - Start: 13.0, Duration: 1.500, value: dotted quarter
15. Note 45 - Start: 13.0, Duration: 3.000, value: dotted half
16. Note 67 - Start: 15.5, Duration: 1.500, value: 

In [8]:
notes_beat = process_notes(notes, tempo)

for idx, event in enumerate(sorted(notes_beat, key=lambda x: x["start_beat"])):
    print(f"{idx+1}. 🎵 Note {event['note']} - Start: {event['start_beat']:.1f}, Duration: {event['duration_beat']:.3f}, value: {event['note_value']}")

1. 🎵 Note 62 - Start: 0.5, Duration: 1.750, value: half
2. 🎵 Note 50 - Start: 1.0, Duration: 1.250, value: tied quarter-sixteenth
3. 🎵 Note 66 - Start: 1.0, Duration: 1.250, value: tied quarter-sixteenth
4. 🎵 Note 69 - Start: 2.5, Duration: 0.625, value: quarter
5. 🎵 Note 69 - Start: 5.0, Duration: 0.625, value: quarter
6. 🎵 Note 62 - Start: 6.5, Duration: 0.625, value: quarter
7. 🎵 Note 66 - Start: 8.0, Duration: 0.625, value: quarter
8. 🎵 Note 73 - Start: 9.5, Duration: 0.625, value: quarter
9. 🎵 Note 66 - Start: 10.0, Duration: 0.625, value: quarter
10. 🎵 Note 62 - Start: 11.0, Duration: 0.625, value: quarter
11. 🎵 Note 74 - Start: 11.0, Duration: 1.250, value: tied quarter-sixteenth
12. 🎵 Note 86 - Start: 11.0, Duration: 1.250, value: tied quarter-sixteenth
13. 🎵 Note 62 - Start: 12.5, Duration: 0.625, value: quarter
14. 🎵 Note 64 - Start: 13.0, Duration: 0.625, value: quarter
15. 🎵 Note 45 - Start: 13.0, Duration: 1.250, value: tied quarter-sixteenth
16. 🎵 Note 67 - Start: 15.5, D

In [6]:
import json

tempo, pulse = get_tempo_pulse("../Data/MusicNet_Dataset/musicnet/musicnet/test_data/1819.wav")
notes_beat = process_notes_sum(notes, tempo)
musicxml_score = convert_to_musicxml(notes_beat, tempo, pulse)
musicxml_score.show()
musicxml_score.write("musicxml", "output.xml")



SubConverterException: Cannot find a path to the 'mscore' file at /Applications/MuseScore 3.app/Contents/MacOS/mscore -- download MuseScore

In [1]:
import soundfile as sf

print(sf.available_formats())

{'AIFF': 'AIFF (Apple/SGI)', 'AU': 'AU (Sun/NeXT)', 'AVR': 'AVR (Audio Visual Research)', 'CAF': 'CAF (Apple Core Audio File)', 'FLAC': 'FLAC (Free Lossless Audio Codec)', 'HTK': 'HTK (HMM Tool Kit)', 'SVX': 'IFF (Amiga IFF/SVX8/SV16)', 'MAT4': 'MAT4 (GNU Octave 2.0 / Matlab 4.2)', 'MAT5': 'MAT5 (GNU Octave 2.1 / Matlab 5.0)', 'MPC2K': 'MPC (Akai MPC 2k)', 'MP3': 'MPEG-1/2 Audio', 'OGG': 'OGG (OGG Container format)', 'PAF': 'PAF (Ensoniq PARIS)', 'PVF': 'PVF (Portable Voice Format)', 'RAW': 'RAW (header-less)', 'RF64': 'RF64 (RIFF 64)', 'SD2': 'SD2 (Sound Designer II)', 'SDS': 'SDS (Midi Sample Dump Standard)', 'IRCAM': 'SF (Berkeley/IRCAM/CARL)', 'VOC': 'VOC (Creative Labs)', 'W64': 'W64 (SoundFoundry WAVE 64)', 'WAV': 'WAV (Microsoft)', 'NIST': 'WAV (NIST Sphere)', 'WAVEX': 'WAVEX (Microsoft)', 'WVE': 'WVE (Psion Series 3)', 'XI': 'XI (FastTracker 2)'}
