<a href="https://colab.research.google.com/github/qwehoi/music_theory_code/blob/main/midi4_train.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import os
import numpy as np
import tensorflow as tf
from music21 import *

from tensorflow.keras.layers import (
    Input, Embedding, LSTM, Dense, Dropout,
    concatenate, Flatten, Layer
)

from tensorflow.keras.models import Model, Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout, Input, concatenate
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau
from tensorflow.keras.optimizers import Adam
import matplotlib.pyplot as plt

from tensorflow.keras.layers import Embedding
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input


# 模型參數
class Config:
    def __init__(self):
        self.sequence_length = 50    # 輸入序列長度
        self.d_x = 1                 # 輸入維度
        self.d_h = 256               # 隱藏層維度
        self.batch_size = 64         # 批次大小
        self.epochs = 2             # 訓練輪數
        self.learning_rate = 0.001   # 學習率
        self.dropout_rate = 0.3      # Dropout率
        self.validation_split = 0.2  # 驗證集比例

config = Config()

def process_midi_files(midi_path):
    notes = []
    durations = []
    velocities = []
    offsets = []
    time_signatures = []
    key_signatures = []
    tempos = []

    for file in os.listdir(midi_path):
        if file.endswith(".mid"):
            try:
                print(f"Processing {file}...")
                midi = converter.parse(os.path.join(midi_path, file))

                # 提取全局特徵
                ts = midi.getTimeSignatures()[0] if midi.getTimeSignatures() else None
                ks = midi.analyze('key')

                # 修改速度提取方式
                tempo = None
                try:
                    mm = midi.metronomeMarkBoundaries()[0][2]
                    if isinstance(mm, tempo.MetronomeMark):
                        tempo = mm.number
                    else:
                        tempo = 120
                except:
                    tempo = 120

                notes_to_parse = None
                parts = instrument.partitionByInstrument(midi)

                if parts:  # 如果樂器已分開
                    main_part = None
                    # 尋找包含最多音符的部分
                    max_notes = 0
                    for part in parts.parts:
                        notes_count = len([n for n in part.recurse()
                                         if isinstance(n, note.Note) or isinstance(n, chord.Chord)])
                        if notes_count > max_notes:
                            max_notes = notes_count
                            main_part = part
                    if main_part:
                        notes_to_parse = main_part.recurse()
                    else:
                        notes_to_parse = midi.flat.notes
                else:  # 如果是單軌道MIDI
                    notes_to_parse = midi.flat.notes

                # 處理音符
                for element in notes_to_parse:
                    if isinstance(element, note.Note):
                        notes.append(int(element.pitch.midi))
                        durations.append(float(element.duration.quarterLength))
                        velocities.append(element.volume.velocity if element.volume.velocity is not None else 64)
                        offsets.append(float(element.offset))
                        time_signatures.append(f"{ts.numerator}/{ts.denominator}" if ts else "4/4")
                        key_signatures.append(str(ks))
                        tempos.append(float(tempo))

                    elif isinstance(element, chord.Chord):
                        # 只取和弦的根音或最低音
                        notes.append(int(element.root().midi))
                        durations.append(float(element.duration.quarterLength))
                        velocities.append(element.volume.velocity if element.volume.velocity is not None else 64)
                        offsets.append(float(element.offset))
                        time_signatures.append(f"{ts.numerator}/{ts.denominator}" if ts else "4/4")
                        key_signatures.append(str(ks))
                        tempos.append(float(tempo))

            except Exception as e:
                print(f"Error processing {file}: {str(e)}")
                continue

    print(f"Processed {len(notes)} notes")

    if len(notes) == 0:
        raise ValueError("No notes were extracted from the MIDI files")

    return {
        'notes': np.array(notes),
        'durations': np.array(durations),
        'velocities': np.array(velocities),
        'offsets': np.array(offsets),
        'time_signatures': np.array(time_signatures),
        'key_signatures': np.array(key_signatures),
        'tempos': np.array(tempos)
    }




def prepare_sequences(music_data, config):
    # 為每個特徵創建映射
    unique_notes = sorted(set(music_data['notes']))
    unique_durations = sorted(set(music_data['durations']))
    unique_velocities = sorted(set(music_data['velocities']))
    unique_time_signatures = sorted(set(music_data['time_signatures']))
    unique_keys = sorted(set(music_data['key_signatures']))

    print("數據統計：")
    print(f"Unique notes: {len(unique_notes)}")
    print(f"Unique durations: {len(unique_durations)}")
    print(f"Unique velocities: {len(unique_velocities)}")
    print(f"Unique time signatures: {len(unique_time_signatures)}")
    print(f"Unique keys: {len(unique_keys)}")

    # 創建映射字典
    note_to_int = dict((note, number) for number, note in enumerate(unique_notes))
    duration_to_int = dict((dur, number) for number, dur in enumerate(unique_durations))
    velocity_to_int = dict((vel, number) for number, vel in enumerate(unique_velocities))
    time_sig_to_int = dict((ts, number) for number, ts in enumerate(unique_time_signatures))
    key_to_int = dict((key, number) for number, key in enumerate(unique_keys))

    network_input_notes = []
    network_input_durations = []
    network_input_velocities = []
    network_input_time_sigs = []
    network_input_keys = []

    network_output_notes = []
    network_output_durations = []
    network_output_velocities = []

    # 創建序列
    sequence_length = config.sequence_length
    for i in range(0, len(music_data['notes']) - sequence_length):
        network_input_notes.append([note_to_int[note] for note in music_data['notes'][i:i + sequence_length]])
        network_input_durations.append([duration_to_int[dur] for dur in music_data['durations'][i:i + sequence_length]])
        network_input_velocities.append([velocity_to_int[vel] for vel in music_data['velocities'][i:i + sequence_length]])
        network_input_time_sigs.append([time_sig_to_int[ts] for ts in music_data['time_signatures'][i:i + sequence_length]])
        network_input_keys.append([key_to_int[key] for key in music_data['key_signatures'][i:i + sequence_length]])

        network_output_notes.append(note_to_int[music_data['notes'][i + sequence_length]])
        network_output_durations.append(duration_to_int[music_data['durations'][i + sequence_length]])
        network_output_velocities.append(velocity_to_int[music_data['velocities'][i + sequence_length]])

    # 轉換為numpy數組
    input_notes = np.array(network_input_notes)
    input_durations = np.array(network_input_durations)
    input_velocities = np.array(network_input_velocities)
    input_time_sigs = np.array(network_input_time_sigs)
    input_keys = np.array(network_input_keys)

    # 轉換輸出為one-hot編碼
    output_notes = to_categorical(network_output_notes, num_classes=len(unique_notes))
    output_durations = to_categorical(network_output_durations, num_classes=len(unique_durations))
    output_velocities = to_categorical(network_output_velocities, num_classes=len(unique_velocities))

    print("\n數據形狀：")
    print(f"Input notes shape: {input_notes.shape}")
    print(f"Input durations shape: {input_durations.shape}")
    print(f"Input velocities shape: {input_velocities.shape}")
    print(f"Output notes shape: {output_notes.shape}")
    print(f"Output durations shape: {output_durations.shape}")
    print(f"Output velocities shape: {output_velocities.shape}")

    # 確保網絡輸入輸出的順序匹配
    network_input = {
        'note_input': input_notes,
        'duration_input': input_durations,
        'velocity_input': input_velocities,
        'time_sig_input': input_time_sigs,
        'key_input': input_keys
    }

    network_output = {
        'note_output': output_notes,        # 76 classes
        'duration_output': output_durations,  # 23 classes
        'velocity_output': output_velocities  # 95 classes
    }

    # 轉換為numpy數組並標準化
    n_vocab = {
        'notes': len(unique_notes),
        'durations': len(unique_durations),
        'velocities': len(unique_velocities),
        'time_signatures': len(unique_time_signatures),
        'key_signatures': len(unique_keys)
    }

    print("\n確認輸出維度：")
    print(f"note_output shape: {output_notes.shape}")
    print(f"duration_output shape: {output_durations.shape}")
    print(f"velocity_output shape: {output_velocities.shape}")

    return network_input, network_output, n_vocab, note_to_int




def create_enhanced_model(config, n_vocab):
    print("\n創建模型：")
    print(f"Vocabulary sizes: {n_vocab}")

    # 輸入層
    note_input = Input(shape=(config.sequence_length,), name='note_input')
    duration_input = Input(shape=(config.sequence_length,), name='duration_input')
    velocity_input = Input(shape=(config.sequence_length,), name='velocity_input')
    time_sig_input = Input(shape=(config.sequence_length,), name='time_sig_input')
    key_input = Input(shape=(config.sequence_length,), name='key_input')

    # 嵌入層
    embedding_dim = 32
    note_embedding = Embedding(n_vocab['notes'], embedding_dim)(note_input)
    duration_embedding = Embedding(n_vocab['durations'], embedding_dim)(duration_input)
    velocity_embedding = Embedding(n_vocab['velocities'], embedding_dim)(velocity_input)
    time_sig_embedding = Embedding(n_vocab['time_signatures'], embedding_dim)(time_sig_input)
    key_embedding = Embedding(n_vocab['key_signatures'], embedding_dim)(key_input)

    # LSTM層
    lstm_units = 128
    note_lstm = LSTM(lstm_units)(note_embedding)
    duration_lstm = LSTM(lstm_units)(duration_embedding)
    velocity_lstm = LSTM(lstm_units)(velocity_embedding)
    time_sig_lstm = LSTM(lstm_units)(time_sig_embedding)
    key_lstm = LSTM(lstm_units)(key_embedding)

    # 合併所有特徵
    merged = concatenate([note_lstm, duration_lstm, velocity_lstm, time_sig_lstm, key_lstm])

    # 共享層
    shared = Dense(512, activation='relu')(merged)
    shared = Dropout(config.dropout_rate)(shared)

    # 分支層
    # 音符分支
    note_hidden = Dense(256, activation='relu')(shared)
    note_hidden = Dropout(config.dropout_rate)(note_hidden)
    note_output = Dense(n_vocab['notes'], activation='softmax', name='note_output')(note_hidden)

    # 持續時間分支
    duration_hidden = Dense(128, activation='relu')(shared)
    duration_hidden = Dropout(config.dropout_rate)(duration_hidden)
    duration_output = Dense(n_vocab['durations'], activation='softmax', name='duration_output')(duration_hidden)

    # 音量分支
    velocity_hidden = Dense(128, activation='relu')(shared)
    velocity_hidden = Dropout(config.dropout_rate)(velocity_hidden)
    velocity_output = Dense(n_vocab['velocities'], activation='softmax', name='velocity_output')(velocity_hidden)

    model = Model(
        inputs=[note_input, duration_input, velocity_input, time_sig_input, key_input],
        outputs=[note_output, duration_output, velocity_output]
    )

    # 編譯模型，為每個輸出指定度量
    model.compile(
        optimizer=Adam(learning_rate=config.learning_rate),
        loss={
            'note_output': 'categorical_crossentropy',
            'duration_output': 'categorical_crossentropy',
            'velocity_output': 'categorical_crossentropy'
        },
        metrics={
            'note_output': ['accuracy'],
            'duration_output': ['accuracy'],
            'velocity_output': ['accuracy']
        }
    )

    return model


def train_model(model, network_input, network_output, config):
    print("\n開始訓練：")
    print("Input shapes:")
    for name, data in network_input.items():
        print(f"{name}: {data.shape}")
    print("\nOutput shapes:")
    for name, data in network_output.items():
        print(f"{name}: {data.shape}")

    # 準備訓練數據
    train_size = int(len(network_input['note_input']) * (1 - config.validation_split))

    # 訓練數據
    x_train = {
        'note_input': network_input['note_input'][:train_size],
        'duration_input': network_input['duration_input'][:train_size],
        'velocity_input': network_input['velocity_input'][:train_size],
        'time_sig_input': network_input['time_sig_input'][:train_size],
        'key_input': network_input['key_input'][:train_size]
    }

    y_train = {
        'note_output': network_output['note_output'][:train_size],
        'duration_output': network_output['duration_output'][:train_size],
        'velocity_output': network_output['velocity_output'][:train_size]
    }

    # 驗證數據
    x_val = {
        'note_input': network_input['note_input'][train_size:],
        'duration_input': network_input['duration_input'][train_size:],
        'velocity_input': network_input['velocity_input'][train_size:],
        'time_sig_input': network_input['time_sig_input'][train_size:],
        'key_input': network_input['key_input'][train_size:]
    }

    y_val = {
        'note_output': network_output['note_output'][train_size:],
        'duration_output': network_output['duration_output'][train_size:],
        'velocity_output': network_output['velocity_output'][train_size:]
    }

    callbacks = [
        ModelCheckpoint(
            "best_model.keras",
            monitor='val_loss',
            save_best_only=True,
            mode='min'
        ),
        EarlyStopping(
            monitor='val_loss',
            patience=10,
            restore_best_weights=True
        ),
        ReduceLROnPlateau(
            monitor='val_loss',
            factor=0.5,
            patience=5,
            min_lr=0.00001
        )
    ]

    try:
        history = model.fit(
            x_train,
            y_train,
            validation_data=(x_val, y_val),
            batch_size=config.batch_size,
            epochs=config.epochs,
            callbacks=callbacks,
            verbose=1
        )
        return history

    except Exception as e:
        print(f"\n訓練錯誤：{str(e)}")
        print("\n模型配置：")
        model.summary()
        print("\n損失函數：", model.loss)
        print("\n度量：", model.metrics_names)
        raise e





















def save_model_multiple_formats(model, base_name="enhanced_music_model"):
    # 保存為 Keras 格式
    model.save(f"{base_name}.keras")

    # 保存為 HDF5 格式（如果需要）
    try:
        model.save(f"{base_name}.h5", save_format='h5')
    except:
        print("無法保存為 HDF5 格式")

    # 保存模型權重
    model.save_weights(f"{base_name}.weights.h5")

def plot_training_history(history):
    plt.figure(figsize=(15, 5))

    # 繪製損失
    plt.subplot(1, 2, 1)
    for loss_type in history.history.keys():
        if 'loss' in loss_type and 'val' not in loss_type:
            plt.plot(history.history[loss_type], label=loss_type)
    plt.title('Training Loss')
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.legend()

    # 繪製準確率
    plt.subplot(1, 2, 2)
    for metric in history.history.keys():
        if 'accuracy' in metric and 'val' not in metric:
            plt.plot(history.history[metric], label=metric)
    plt.title('Training Accuracy')
    plt.xlabel('Epoch')
    plt.ylabel('Accuracy')
    plt.legend()

    plt.tight_layout()
    plt.show()

def generate_music(model, mappings, seed_sequence, n_vocab, sequence_length=50, num_steps=200):
    # 初始化生成序列
    generated_notes = list(seed_sequence['notes'])
    generated_durations = list(seed_sequence['durations'])
    generated_velocities = list(seed_sequence['velocities'])

    for _ in range(num_steps):
        # 準備輸入序列
        input_notes = np.expand_dims(generated_notes[-sequence_length:], axis=0)
        input_durations = np.expand_dims(generated_durations[-sequence_length:], axis=0)
        input_velocities = np.expand_dims(generated_velocities[-sequence_length:], axis=0)
        input_time_sigs = np.expand_dims(seed_sequence['time_signatures'][-sequence_length:], axis=0)
        input_keys = np.expand_dims(seed_sequence['key_signatures'][-sequence_length:], axis=0)

        # 預測下一個音符
        predictions = model.predict({
            'note_input': np.expand_dims(input_notes, -1),
            'duration_input': np.expand_dims(input_durations, -1),
            'velocity_input': np.expand_dims(input_velocities, -1),
            'time_sig_input': np.expand_dims(input_time_sigs, -1),
            'key_input': np.expand_dims(input_keys, -1)
        })

        # 獲取預測結果
        next_note = np.argmax(predictions[0])
        next_duration = np.argmax(predictions[1])
        next_velocity = np.argmax(predictions[2])

        # 將預測結果轉換回實際值
        generated_notes.append(mappings['int_to_note'][next_note])
        generated_durations.append(mappings['int_to_duration'][next_duration])
        generated_velocities.append(mappings['int_to_velocity'][next_velocity])

    return {
        'notes': generated_notes[sequence_length:],
        'durations': generated_durations[sequence_length:],
        'velocities': generated_velocities[sequence_length:]
    }

def create_midi_file(generated_sequence, output_file="generated_music.mid"):
    midi_stream = stream.Stream()

    for note_num, duration, velocity in zip(
        generated_sequence['notes'],
        generated_sequence['durations'],
        generated_sequence['velocities']
    ):
        new_note = note.Note(note_num)
        new_note.duration = duration.Duration(duration)
        new_note.volume.velocity = velocity
        midi_stream.append(new_note)

    midi_stream.write('midi', fp=output_file)

def main():

    # 設置隨機種子
    tf.random.set_seed(42)
    np.random.seed(42)
    try:
        print("處理MIDI文件...")
        music_data = process_midi_files('/content/midi_data2')

        print("\n準備序列...")
        network_input, network_output, n_vocab, mappings = prepare_sequences(music_data, config)

        print("\n創建模型...")
        model = create_enhanced_model(config, n_vocab)
        model.summary()

        print("\n訓練模型...")
        history = train_model(model, network_input, network_output, config)

        print("\n保存模型...")
        save_model_multiple_formats(model)

        print("\n繪製訓練歷史...")
        plot_training_history(history)

        return model, mappings, n_vocab

    except Exception as e:
        print(f"\n發生錯誤：{str(e)}")
        return None, None, None


    # 生成音樂示例
    print("生成音樂示例...")
    seed_sequence = {
        'notes': network_input['notes'][0],
        'durations': network_input['durations'][0],
        'velocities': network_input['velocities'][0],
        'time_signatures': network_input['time_signatures'][0],
        'key_signatures': network_input['key_signatures'][0]
    }

    generated_sequence = generate_music(model, mappings, seed_sequence, n_vocab)
    create_midi_file(generated_sequence, "generated_music.mid")

    return model, mappings, n_vocab

if __name__ == '__main__':
    model, mappings, n_vocab = main()


處理MIDI文件...
Processing beethoven_hammerklavier_2_format0.mid...
Processing beethoven_hammerklavier_3_format0.mid...
Processing beethoven_hammerklavier_1_format0.mid...
Processing appass_2_format0.mid...
Processing appass_1_format0.mid...
Processing beethoven_hammerklavier_4_format0.mid...
Processing appass_3_format0.mid...
Processed 21142 notes

準備序列...
數據統計：
Unique notes: 76
Unique durations: 23
Unique velocities: 95
Unique time signatures: 6
Unique keys: 4

數據形狀：
Input notes shape: (21092, 50)
Input durations shape: (21092, 50)
Input velocities shape: (21092, 50)
Output notes shape: (21092, 76)
Output durations shape: (21092, 23)
Output velocities shape: (21092, 95)

確認輸出維度：
note_output shape: (21092, 76)
duration_output shape: (21092, 23)
velocity_output shape: (21092, 95)

創建模型...

創建模型：
Vocabulary sizes: {'notes': 76, 'durations': 23, 'velocities': 95, 'time_signatures': 6, 'key_signatures': 4}



訓練模型...

開始訓練：
Input shapes:
note_input: (21092, 50)
duration_input: (21092, 50)
velocity_input: (21092, 50)
time_sig_input: (21092, 50)
key_input: (21092, 50)

Output shapes:
note_output: (21092, 76)
duration_output: (21092, 23)
velocity_output: (21092, 95)
Epoch 1/2

訓練錯誤：Arguments `target` and `output` must have the same shape. Received: target.shape=(None, 23), output.shape=(None, 76)

模型配置：



損失函數： {'note_output': 'categorical_crossentropy', 'duration_output': 'categorical_crossentropy', 'velocity_output': 'categorical_crossentropy'}

度量： ['loss', 'compile_metrics', 'note_output_loss', 'duration_output_loss', 'velocity_output_loss']

發生錯誤：Arguments `target` and `output` must have the same shape. Received: target.shape=(None, 23), output.shape=(None, 76)


In [None]:
import os
import numpy as np
import tensorflow as tf
from music21 import *
from tensorflow.keras.layers import (
    Input, Embedding, LSTM, Dense, Dropout,
    concatenate, Flatten
)
from tensorflow.keras.models import Model
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau
from tensorflow.keras.optimizers import Adam
import matplotlib.pyplot as plt

# 配置參數
class Config:
    def __init__(self):
        self.sequence_length = 50
        self.batch_size = 64
        self.epochs = 2
        self.learning_rate = 0.001
        self.dropout_rate = 0.3
        self.validation_split = 0.2
        self.embedding_dim = 32
        self.lstm_units = 128

config = Config()

def process_midi_files(midi_path):
    """處理MIDI文件並提取音樂特徵"""
    notes = []
    durations = []
    velocities = []
    time_signatures = []
    key_signatures = []

    try:
        midi_files = [f for f in os.listdir(midi_path) if f.endswith('.mid')]
        if not midi_files:
            raise ValueError(f"No MIDI files found in {midi_path}")

        for file in midi_files:
            try:
                print(f"Processing {file}...")
                midi = converter.parse(os.path.join(midi_path, file))

                # 獲取時間和調號信息
                ts = midi.getTimeSignatures()[0] if midi.getTimeSignatures() else None
                ks = midi.analyze('key')

                # 處理音符
                for element in midi.flat.notes:
                    if isinstance(element, (note.Note, chord.Chord)):
                        # 獲取音高（對和弦取根音）
                        pitch = element.pitch.midi if isinstance(element, note.Note) else element.root().midi
                        notes.append(int(pitch))

                        # 獲取持續時間
                        durations.append(float(element.duration.quarterLength))

                        # 獲取音量
                        velocity = element.volume.velocity if element.volume.velocity is not None else 64
                        velocities.append(int(velocity))

                        # 添加時間和調號信息
                        time_signatures.append(f"{ts.numerator}/{ts.denominator}" if ts else "4/4")
                        key_signatures.append(str(ks))

            except Exception as e:
                print(f"Error processing {file}: {str(e)}")
                continue

        if not notes:
            raise ValueError("No notes were extracted from the MIDI files")

        print(f"Successfully processed {len(notes)} notes from {len(midi_files)} files")

        # 返回處理後的數據
        return {
            'notes': np.array(notes, dtype=np.int32),
            'durations': np.array(durations, dtype=np.float32),
            'velocities': np.array(velocities, dtype=np.int32),
            'time_signatures': np.array(time_signatures),
            'key_signatures': np.array(key_signatures)
        }

    except Exception as e:
        print(f"Error in process_midi_files: {str(e)}")
        raise

def validate_data(network_input, network_output, n_vocab, config):
    """驗證數據的完整性和維度"""
    try:
        # 檢查輸入數據
        for key, value in network_input.items():
            if value.ndim != 2:
                raise ValueError(f"Input {key} should be 2D, got shape {value.shape}")
            if value.shape[1] != config.sequence_length:
                raise ValueError(f"Input {key} sequence length should be {config.sequence_length}, got {value.shape[1]}")

        # 檢查輸出數據
        expected_outputs = {'note_output', 'duration_output', 'velocity_output'}
        if set(network_output.keys()) != expected_outputs:
            raise ValueError(f"Missing output keys. Expected {expected_outputs}, got {network_output.keys()}")

        # 檢查詞彙表大小
        required_vocabs = {'notes', 'durations', 'velocities', 'time_signatures', 'key_signatures'}
        if set(n_vocab.keys()) != required_vocabs:
            raise ValueError(f"Missing vocabulary keys. Expected {required_vocabs}, got {n_vocab.keys()}")

        if not all(isinstance(v, int) and v > 0 for v in n_vocab.values()):
            raise ValueError("Invalid vocabulary sizes")

        print("Data validation passed")
        return True

    except Exception as e:
        print(f"Data validation error: {e}")
        return False


In [None]:
def prepare_sequences(music_data, config):
    """準備訓練序列"""
    try:
        # 獲取唯一值並創建映射
        unique_notes = sorted(set(music_data['notes']))
        unique_durations = sorted(set(music_data['durations']))
        unique_velocities = sorted(set(music_data['velocities']))
        unique_time_signatures = sorted(set(music_data['time_signatures']))
        unique_keys = sorted(set(music_data['key_signatures']))

        # 創建映射字典
        mappings = {
            'notes': dict((note, number) for number, note in enumerate(unique_notes)),
            'durations': dict((dur, number) for number, dur in enumerate(unique_durations)),
            'velocities': dict((vel, number) for number, vel in enumerate(unique_velocities)),
            'time_signatures': dict((ts, number) for number, ts in enumerate(unique_time_signatures)),
            'key_signatures': dict((key, number) for number, key in enumerate(unique_keys))
        }

        # 創建詞彙表大小字典
        n_vocab = {
            'notes': len(unique_notes),
            'durations': len(unique_durations),
            'velocities': len(unique_velocities),
            'time_signatures': len(unique_time_signatures),
            'key_signatures': len(unique_keys)
        }

        print("Vocabulary sizes:")
        for key, size in n_vocab.items():
            print(f"{key}: {size}")

        # 創建輸入序列
        network_input = {
            'note_input': [],
            'duration_input': [],
            'velocity_input': [],
            'time_sig_input': [],
            'key_input': []
        }

        # 創建輸出序列
        network_output = {
            'note_output': [],
            'duration_output': [],
            'velocity_output': []
        }

        # 生成序列
        for i in range(0, len(music_data['notes']) - config.sequence_length):
            # 輸入序列
            network_input['note_input'].append([mappings['notes'][note] for note in music_data['notes'][i:i + config.sequence_length]])
            network_input['duration_input'].append([mappings['durations'][dur] for dur in music_data['durations'][i:i + config.sequence_length]])
            network_input['velocity_input'].append([mappings['velocities'][vel] for vel in music_data['velocities'][i:i + config.sequence_length]])
            network_input['time_sig_input'].append([mappings['time_signatures'][ts] for ts in music_data['time_signatures'][i:i + config.sequence_length]])
            network_input['key_input'].append([mappings['key_signatures'][key] for key in music_data['key_signatures'][i:i + config.sequence_length]])

            # 輸出值
            network_output['note_output'].append(mappings['notes'][music_data['notes'][i + config.sequence_length]])
            network_output['duration_output'].append(mappings['durations'][music_data['durations'][i + config.sequence_length]])
            network_output['velocity_output'].append(mappings['velocities'][music_data['velocities'][i + config.sequence_length]])

        # 轉換為numpy數組
        for key in network_input:
            network_input[key] = np.array(network_input[key], dtype=np.int32)

        # 轉換為one-hot編碼
        network_output = {
            'note_output': to_categorical(network_output['note_output'], num_classes=n_vocab['notes']),
            'duration_output': to_categorical(network_output['duration_output'], num_classes=n_vocab['durations']),
            'velocity_output': to_categorical(network_output['velocity_output'], num_classes=n_vocab['velocities'])
        }

        print("\nInput shapes:")
        for key, value in network_input.items():
            print(f"{key}: {value.shape}")
        print("\nOutput shapes:")
        for key, value in network_output.items():
            print(f"{key}: {value.shape}")

        return network_input, network_output, n_vocab, mappings

    except Exception as e:
        print(f"Error in prepare_sequences: {str(e)}")
        raise

def create_model(config, n_vocab):
    """創建模型"""
    try:
        # 輸入層
        inputs = {
            'note_input': Input(shape=(config.sequence_length,), name='note_input'),
            'duration_input': Input(shape=(config.sequence_length,), name='duration_input'),
            'velocity_input': Input(shape=(config.sequence_length,), name='velocity_input'),
            'time_sig_input': Input(shape=(config.sequence_length,), name='time_sig_input'),
            'key_input': Input(shape=(config.sequence_length,), name='key_input')
        }

        # 嵌入層
        embeddings = {
            'note': Embedding(n_vocab['notes'], config.embedding_dim)(inputs['note_input']),
            'duration': Embedding(n_vocab['durations'], config.embedding_dim)(inputs['duration_input']),
            'velocity': Embedding(n_vocab['velocities'], config.embedding_dim)(inputs['velocity_input']),
            'time_sig': Embedding(n_vocab['time_signatures'], config.embedding_dim)(inputs['time_sig_input']),
            'key': Embedding(n_vocab['key_signatures'], config.embedding_dim)(inputs['key_input'])
        }

        # LSTM層
        lstm_outputs = {
            'note': LSTM(config.lstm_units)(embeddings['note']),
            'duration': LSTM(config.lstm_units)(embeddings['duration']),
            'velocity': LSTM(config.lstm_units)(embeddings['velocity']),
            'time_sig': LSTM(config.lstm_units)(embeddings['time_sig']),
            'key': LSTM(config.lstm_units)(embeddings['key'])
        }

        # 合併所有特徵
        merged = concatenate(list(lstm_outputs.values()))

        # 共享層
        shared = Dense(512, activation='relu')(merged)
        shared = Dropout(config.dropout_rate)(shared)

        # 輸出分支
        outputs = [
            Dense(n_vocab['notes'], activation='softmax', name='note_output')(
                Dense(256, activation='relu')(shared)),
            Dense(n_vocab['durations'], activation='softmax', name='duration_output')(
                Dense(128, activation='relu')(shared)),
            Dense(n_vocab['velocities'], activation='softmax', name='velocity_output')(
                Dense(128, activation='relu')(shared))
        ]

        model = Model(
            inputs=list(inputs.values()),
            outputs=outputs  # 使用列表形式的輸出
        )

        # 編譯模型
        model.compile(
            optimizer=Adam(learning_rate=config.learning_rate),
            loss={
                'note_output': 'categorical_crossentropy',
                'duration_output': 'categorical_crossentropy',
                'velocity_output': 'categorical_crossentropy'
            },
            metrics={
                'note_output': 'accuracy',
                'duration_output': 'accuracy',
                'velocity_output': 'accuracy'
            }
        )

        model.summary()
        return model

    except Exception as e:
        print(f"Error in create_model: {str(e)}")
        raise


In [None]:
def train_model(model, network_input, network_output, config):
    """訓練模型"""
    try:
        # 計算訓練集大小
        train_size = int(len(next(iter(network_input.values()))) * (1 - config.validation_split))

        # 分割訓練集和驗證集
        train_input = {k: v[:train_size] for k, v in network_input.items()}
        val_input = {k: v[train_size:] for k, v in network_input.items()}

        # 分割輸出
        train_output = [
            network_output['note_output'][:train_size],
            network_output['duration_output'][:train_size],
            network_output['velocity_output'][:train_size]
        ]

        val_output = [
            network_output['note_output'][train_size:],
            network_output['duration_output'][train_size:],
            network_output['velocity_output'][train_size:]
        ]

        # 打印輸入數據形狀
        print("\nTraining data shapes:")
        print("Input shapes:")
        for key, value in train_input.items():
            print(f"Train {key}: {value.shape}")

        # 打印輸出數據形狀
        print("\nOutput shapes:")
        print(f"Train note_output: {train_output[0].shape}")
        print(f"Train duration_output: {train_output[1].shape}")
        print(f"Train velocity_output: {train_output[2].shape}")

        print("\nValidation shapes:")
        print("Input shapes:")
        for key, value in val_input.items():
            print(f"Val {key}: {value.shape}")

        print("\nOutput shapes:")
        print(f"Val note_output: {val_output[0].shape}")
        print(f"Val duration_output: {val_output[1].shape}")
        print(f"Val velocity_output: {val_output[2].shape}")

        # 訓練模型
        history = model.fit(
            x=train_input,
            y=train_output,
            validation_data=(val_input, val_output),
            batch_size=config.batch_size,
            epochs=config.epochs,
            callbacks=[
                ModelCheckpoint("best_model.keras", monitor='val_loss', save_best_only=True),
                EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True),
                ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=5, min_lr=0.00001)
            ],
            verbose=1
        )

        return history

    except Exception as e:
        print(f"Error in train_model: {str(e)}")
        raise


def main():
    """主程序"""
    try:
        print("Starting music generation model training...")

        # 處理MIDI文件
        print("\nProcessing MIDI files...")
        music_data = process_midi_files('/content/midi_data2')

        # 準備序列
        print("\nPreparing sequences...")
        network_input, network_output, n_vocab, mappings = prepare_sequences(music_data, config)

        # 驗證數據
        if not validate_data(network_input, network_output, n_vocab, config):
            raise ValueError("Data validation failed")

        # 創建模型
        print("\nCreating model...")
        model = create_model(config, n_vocab)

        # 訓練模型
        print("\nTraining model...")
        history = train_model(model, network_input, network_output, config)

        # 保存映射字典，用於後續生成音樂
        print("\nSaving mappings...")
        np.save('mappings.npy', mappings)

        print("\nTraining completed successfully!")
        return model, history, mappings

    except Exception as e:
        print(f"Error in main: {str(e)}")
        raise

if __name__ == '__main__':
    try:
        model, history, mappings = main()
    except Exception as e:
        print(f"An unexpected error occurred: {e}")


Starting music generation model training...

Processing MIDI files...
Processing beethoven_hammerklavier_2_format0.mid...


  return self.iter().getElementsByClass(classFilterList)


Processing beethoven_hammerklavier_3_format0.mid...
Processing beethoven_hammerklavier_1_format0.mid...
Processing appass_2_format0.mid...
Processing appass_1_format0.mid...
Processing beethoven_hammerklavier_4_format0.mid...
Processing appass_3_format0.mid...
Successfully processed 21142 notes from 7 files

Preparing sequences...
Vocabulary sizes:
notes: 76
durations: 23
velocities: 95
time_signatures: 6
key_signatures: 4

Input shapes:
note_input: (21092, 50)
duration_input: (21092, 50)
velocity_input: (21092, 50)
time_sig_input: (21092, 50)
key_input: (21092, 50)

Output shapes:
note_output: (21092, 76)
duration_output: (21092, 23)
velocity_output: (21092, 95)
Data validation passed

Creating model...



Training model...

Training data shapes:
Input shapes:
Train note_input: (16873, 50)
Train duration_input: (16873, 50)
Train velocity_input: (16873, 50)
Train time_sig_input: (16873, 50)
Train key_input: (16873, 50)

Output shapes:
Train note_output: (16873, 76)
Train duration_output: (16873, 23)
Train velocity_output: (16873, 95)

Validation shapes:
Input shapes:
Val note_input: (4219, 50)
Val duration_input: (4219, 50)
Val velocity_input: (4219, 50)
Val time_sig_input: (4219, 50)
Val key_input: (4219, 50)

Output shapes:
Val note_output: (4219, 76)
Val duration_output: (4219, 23)
Val velocity_output: (4219, 95)
Epoch 1/2
[1m264/264[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 30ms/step - duration_output_accuracy: 0.6461 - duration_output_loss: 1.3997 - loss: 9.6539 - note_output_accuracy: 0.0440 - note_output_loss: 4.0386 - velocity_output_accuracy: 0.0354 - velocity_output_loss: 4.2156 - val_duration_output_accuracy: 0.8547 - val_duration_output_loss: 0.6660 - val_loss: 

In [None]:
import numpy as np
import tensorflow as tf
from music21 import *
from tensorflow.keras.models import load_model

def generate_music(model_path, mappings_path, seed_length=50, num_steps=100, temperature=1.0):
    """
    生成音樂序列

    參數:
    - model_path: 模型文件路徑
    - mappings_path: 映射文件路徑
    - seed_length: 種子序列長度
    - num_steps: 要生成的音符數量
    - temperature: 採樣溫度，控制隨機性（較高 = 更隨機）
    """
    try:
        # 加載模型和映射
        model = load_model(model_path)
        mappings = np.load(mappings_path, allow_pickle=True).item()

        # 創建反向映射（從數字到值）
        reverse_mappings = {
            'notes': {num: note for note, num in mappings['notes'].items()},
            'durations': {num: dur for dur, num in mappings['durations'].items()},
            'velocities': {num: vel for vel, num in mappings['velocities'].items()},
            'time_signatures': {num: ts for ts, num in mappings['time_signatures'].items()},
            'key_signatures': {num: key for key, num in mappings['key_signatures'].items()}
        }

        # 初始化種子序列（可以從訓練數據中隨機選擇）
        seed = {
            'note_input': np.random.randint(0, len(mappings['notes']), (1, seed_length)),
            'duration_input': np.random.randint(0, len(mappings['durations']), (1, seed_length)),
            'velocity_input': np.random.randint(0, len(mappings['velocities']), (1, seed_length)),
            'time_sig_input': np.random.randint(0, len(mappings['time_signatures']), (1, seed_length)),
            'key_input': np.random.randint(0, len(mappings['key_signatures']), (1, seed_length))
        }

        # 用於存儲生成的音樂
        generated_notes = []
        generated_durations = []
        generated_velocities = []

        # 生成音樂
        for _ in range(num_steps):
            # 預測下一個音符
            predictions = model.predict(seed)

            # 使用溫度進行採樣
            def sample_with_temperature(preds, temperature=1.0):
                preds = np.asarray(preds).astype('float64')
                preds = np.log(preds) / temperature
                exp_preds = np.exp(preds)
                preds = exp_preds / np.sum(exp_preds)
                probas = np.random.multinomial(1, preds[0])
                return np.argmax(probas)

            # 獲取預測結果
            next_note = sample_with_temperature(predictions[0], temperature)
            next_duration = sample_with_temperature(predictions[1], temperature)
            next_velocity = sample_with_temperature(predictions[2], temperature)

            # 添加到生成序列
            generated_notes.append(reverse_mappings['notes'][next_note])
            generated_durations.append(float(reverse_mappings['durations'][next_duration]))
            generated_velocities.append(int(reverse_mappings['velocities'][next_velocity]))

            # 更新種子序列
            for key in seed:
                seed[key] = np.roll(seed[key], -1, axis=1)
                if key == 'note_input':
                    seed[key][0, -1] = next_note
                elif key == 'duration_input':
                    seed[key][0, -1] = next_duration
                elif key == 'velocity_input':
                    seed[key][0, -1] = next_velocity

        # 創建MIDI文件
        midi_stream = stream.Stream()

        # 添加音符到MIDI流
        for note_val, duration_val, velocity_val in zip(generated_notes, generated_durations, generated_velocities):
            new_note = note.Note(pitch=note_val, quarterLength=duration_val)
            new_note.volume.velocity = velocity_val
            midi_stream.append(new_note)

        # 保存MIDI文件
        output_path = 'generated_music.mid'
        midi_stream.write('midi', fp=output_path)

        print(f"Generated music saved to {output_path}")
        return output_path, generated_notes, generated_durations, generated_velocities

    except Exception as e:
        print(f"Error generating music: {e}")
        raise

def play_midi(midi_path):
    """播放生成的MIDI文件"""
    try:
        midi_file = converter.parse(midi_path)
        midi_file.show('midi')
    except Exception as e:
        print(f"Error playing MIDI file: {e}")

# 使用示例
if __name__ == '__main__':
    try:
        # 設置路徑
        MODEL_PATH = '/content/best_model.keras'
        MAPPINGS_PATH = '/content/mappings.npy'

        # 生成音樂
        output_path, notes, durations, velocities = generate_music(
            model_path=MODEL_PATH,
            mappings_path=MAPPINGS_PATH,
            seed_length=50,
            num_steps=200,  # 生成200個音符
            temperature=0.75  # 可以調整這個值來控制創意性
        )

        # 顯示生成的音樂統計信息
        print("\nGeneration Statistics:")
        print(f"Number of notes generated: {len(notes)}")
        print(f"Average duration: {np.mean(durations):.2f}")
        print(f"Average velocity: {np.mean(velocities):.2f}")

        # 播放生成的音樂
        play_midi(output_path)

    except Exception as e:
        print(f"An error occurred: {e}")


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 432ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 25ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 29ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 32ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 22ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2