In [476]:
import numpy as np
from hmmlearn import hmm
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns


def midi_note_to_pitch(midi_note) -> str:
    """
    the function to convert midi note to pitch
    param midi_note: int
    return: str
    """

    # Equal temperament
    pitch_names = ['C', 'C#', 'D', 'D#', 'E', 'F', 'F#', 'G', 'G#', 'A', 'A#', 'B']
    octave = (midi_note - 12) // 12 + 1
    pitch_class = midi_note % 12
    pitch_name = pitch_names[pitch_class]
    return f'{pitch_name}'


In [477]:
# Beat tracking example
import librosa

# 1. Get the file path to an included audio example
filename = "C:\\Users\\Hsieh\\Documents\\nccucs\\specialTopic\\special_topic\\src\\auto_accompany\\audio\\vocal\\input.9.wav"


# 2. Load the audio as a waveform `y`
#    Store the sampling rate as `sr`
y, sr = librosa.load(filename)


# 3. Run the default beat tracker
vocal_tempo, beat_frames = librosa.beat.beat_track(y=y, sr=sr)

print('Estimated tempo: {:.2f} beats per minute'.format(vocal_tempo))

# 4. Convert the frame indices of beat events into timestamps
beat_times = librosa.frames_to_time(beat_frames, sr=sr)
beat_times 

#get A  element index multiple of 4 from 0
down_beat = beat_times[0::4]


#convert to time section group by 2 and the last elemnt cotinue to end
time_section = []
for i in range(0,len(down_beat)-1):
    time_section.append([down_beat[i],down_beat[i+1]])
time_section.append([down_beat[-1],librosa.get_duration(y=y, sr=sr)])

time_section

the_start_time = down_beat[0]
time_section

Estimated tempo: 89.10 beats per minute


[[0.13931972789115646, 2.9257142857142857],
 [2.9257142857142857, 5.712108843537415],
 [5.712108843537415, 8.475283446712018],
 [8.475283446712018, 11.00625850340136],
 [11.00625850340136, 13.49079365079365],
 [13.49079365079365, 16.32362811791383],
 [16.32362811791383, 19.086802721088436],
 [19.086802721088436, 22.076462585034015]]

In [478]:
import librosa
import numpy as np

def group_pitches_by_sections(pitch_values, time_sections):
    grouped_pitches = []

    # 遍歷每個時間區段
    for start, end in time_sections:
        # 獲取在當前時間區段內的音高
        section_pitches = [note for time, note in pitch_values if start <= time < end]
        #remove duplicate
        section_pitches = list(set(section_pitches))
        #repalce all number with ""
        section_pitches = [x.replace('1','').replace('2','').replace('3','').replace('4','').replace('5','').replace('6','').replace('7','').replace('8','').replace('9','').replace('0','') for x in section_pitches]
        #replace ♯ with #
        section_pitches = [x.replace('♯','#') for x in section_pitches]
        
        grouped_pitches.append(section_pitches)

    return grouped_pitches

def extract_pitch_and_time(y, sr):
    # 提取音高
    pitches, magnitudes = librosa.piptrack(y=y, sr=sr)
    
    # 轉換 frame indices 到時間戳
    times = librosa.frames_to_time(np.arange(pitches.shape[1]), sr=sr)
    
    pitch_values = []
    for t in range(pitches.shape[1]):
        index = magnitudes[:, t].argmax()
        pitch = pitches[index, t]
        
        # 只取非零的音高
        if pitch > 0:
            pitch_values.append((times[t], librosa.hz_to_note(pitch)))
            
    return pitch_values

y, sr = librosa.load(filename)
pitch_values = extract_pitch_and_time(y, sr)

grouped_pitches = group_pitches_by_sections(pitch_values, time_section)
grouped_pitches


[['B',
  'A#',
  'E',
  'A',
  'F#',
  'A',
  'C#',
  'G',
  'B',
  'F#',
  'F',
  'D#',
  'C',
  'A#',
  'F#',
  'G#',
  'F'],
 ['E',
  'A',
  'D',
  'G',
  'F#',
  'E',
  'A#',
  'F#',
  'G',
  'A#',
  'C',
  'D#',
  'G#',
  'A',
  'F',
  'D',
  'B',
  'F',
  'B'],
 ['E', 'A', 'G', 'F#', 'G', 'F', 'B', 'G', 'D#', 'A#', 'C', 'E', 'G#', 'F'],
 ['E',
  'B',
  'D',
  'A#',
  'E',
  'G#',
  'C#',
  'D#',
  'A#',
  'F#',
  'G',
  'D#',
  'A',
  'F',
  'D',
  'B',
  'F#',
  'F',
  'B'],
 ['E',
  'G',
  'B',
  'G#',
  'D',
  'C#',
  'G',
  'G#',
  'D',
  'D',
  'C#',
  'D#',
  'C#',
  'F#',
  'G',
  'C',
  'A#',
  'G#',
  'F',
  'A',
  'B'],
 ['B', 'A#', 'E', 'C', 'G#', 'F#', 'F#', 'F', 'G', 'E', 'G#', 'D#', 'F'],
 ['E',
  'A',
  'A',
  'G',
  'G#',
  'D',
  'F#',
  'E',
  'G#',
  'D',
  'D#',
  'C#',
  'G',
  'A#',
  'C',
  'D#',
  'G#',
  'A',
  'F',
  'D',
  'A',
  'F',
  'B',
  'B'],
 ['F#',
  'E',
  'G',
  'D',
  'D#',
  'G',
  'G#',
  'D',
  'A',
  'F#',
  'G',
  'D#',
  'G#',
  'F#',


In [479]:
import pretty_midi
midi_data = pretty_midi.PrettyMIDI('C:\\Users\\Hsieh\\Documents\\nccucs\\specialTopic\\special_topic\\src\\auto_accompany\\midi\\midi_output_voice.mid')
measure_list = []

  

#accroding to time section to find the note events
for i in range(0,len(time_section)):
    measure = []
    for note in midi_data.instruments[0].notes:
        time_gap = 1000
        if note.start >= time_section[i][0] and note.start < time_section[i][1]:
            loop_count = int(abs(note.start - note.end) * time_gap)

            for j in range(loop_count):
                measure.append(midi_note_to_pitch(note.pitch))
          
    measure_list.append(measure)

measure_list

split_notes_list = measure_list
split_notes_list

[['A#',
  'A#',
  'A#',
  'A#',
  'A#',
  'A#',
  'A#',
  'A#',
  'A#',
  'A#',
  'A#',
  'A#',
  'A#',
  'A#',
  'A#',
  'A#',
  'A#',
  'A#',
  'A#',
  'A#',
  'A#',
  'A#',
  'A#',
  'A#',
  'A#',
  'A#',
  'A#',
  'A#',
  'A#',
  'A#',
  'A#',
  'A#',
  'A#',
  'A#',
  'A#',
  'A#',
  'A#',
  'A#',
  'A#',
  'A#',
  'A#',
  'A#',
  'A#',
  'A#',
  'A#',
  'A#',
  'A#',
  'A#',
  'A#',
  'A#',
  'A#',
  'A#',
  'A#',
  'A#',
  'A#',
  'A#',
  'A#',
  'A#',
  'A#',
  'A#',
  'A#',
  'A#',
  'A#',
  'A#',
  'A#',
  'A#',
  'A#',
  'A#',
  'A#',
  'A#',
  'A#',
  'A#',
  'A#',
  'A#',
  'A#',
  'A#',
  'A#',
  'A#',
  'A#',
  'A#',
  'A#',
  'A#',
  'A#',
  'A#',
  'A#',
  'A#',
  'A#',
  'A#',
  'A#',
  'A#',
  'A#',
  'A#',
  'A#',
  'A#',
  'A#',
  'A#',
  'A#',
  'A#',
  'A#',
  'A#',
  'A#',
  'A#',
  'A#',
  'A#',
  'A#',
  'A#',
  'A#',
  'A#',
  'A#',
  'A#',
  'A#',
  'A#',
  'A#',
  'A#',
  'A#',
  'A#',
  'A#',
  'A#',
  'A#',
  'A#',
  'A#',
  'A#',
  'A#',
  'A#',
  'A#',


In [480]:
"""
to get the time singature helping setting the 
hmm model
"""

from music21 import converter

"""
convert the audio to midi then split the midi to measure
"""

midi_file = 'C:\\Users\\Hsieh\\Documents\\nccucs\\specialTopic\\special_topic\\src\\auto_accompany\\midi\\midi_output_voice.mid'

score = converter.parse(midi_file)
#get the key signature
key = score.analyze('key')
print(key.tonic.name, key.mode)
quality = ""
#replace - with b
adjust_key_tonic_name = key.tonic.name.replace('-','b')
if key.mode == 'major':
    quality = 'maj'
if key.mode == 'minor':
    quality = 'min'

key_signature = adjust_key_tonic_name +":"+ quality
key_signature

G minor


'G:min'

In [481]:
#read chord aka states
chord = pd.read_csv('transition__chord_matrix/csv_file/all_chord.csv')
chord_list = chord['chord'].unique()
chord_list.sort()
chord_list = list(chord_list[:len(chord_list)-2])
        
pitch_names = ['C', 'C#', 'D', 'D#', 'E', 'F', 'F#', 'G', 'G#', 'A', 'A#', 'B']
chord_list

['A:7',
 'A:dim',
 'A:maj',
 'A:maj6',
 'A:maj7',
 'A:min',
 'A:min7',
 'A:sus2',
 'A:sus4',
 'Ab:7',
 'Ab:dim',
 'Ab:maj',
 'Ab:maj6',
 'Ab:maj7',
 'Ab:min',
 'Ab:min7',
 'Ab:sus2',
 'Ab:sus4',
 'B:7',
 'B:dim',
 'B:maj',
 'B:maj6',
 'B:maj7',
 'B:min',
 'B:min7',
 'B:sus2',
 'B:sus4',
 'Bb:7',
 'Bb:dim',
 'Bb:maj',
 'Bb:maj6',
 'Bb:maj7',
 'Bb:min',
 'Bb:min7',
 'Bb:sus2',
 'Bb:sus4',
 'C#:7',
 'C#:dim',
 'C#:maj',
 'C#:maj6',
 'C#:maj7',
 'C#:min',
 'C#:min7',
 'C#:sus2',
 'C#:sus4',
 'C:7',
 'C:aug',
 'C:dim',
 'C:maj',
 'C:maj6',
 'C:maj7',
 'C:min',
 'C:min7',
 'C:sus2',
 'C:sus4',
 'D:7',
 'D:dim',
 'D:maj',
 'D:maj6',
 'D:maj7',
 'D:min',
 'D:min7',
 'D:sus2',
 'D:sus4',
 'E:7',
 'E:dim',
 'E:maj',
 'E:maj6',
 'E:maj7',
 'E:min',
 'E:min7',
 'E:sus2',
 'E:sus4',
 'Eb:7',
 'Eb:dim',
 'Eb:maj',
 'Eb:maj6',
 'Eb:maj7',
 'Eb:min',
 'Eb:min7',
 'Eb:sus2',
 'Eb:sus4',
 'F#:7',
 'F#:dim',
 'F#:maj',
 'F#:maj6',
 'F#:maj7',
 'F#:min',
 'F#:min7',
 'F#:sus2',
 'F#:sus4',
 'F:7',
 'F:dim

In [482]:
from music21 import pitch, scale

#enharmonic equivalent dictionary
enharmonic_equivalent = {'C#':'Db','D#':'Eb','F#':'Gb','G#':'Ab','A#':'Bb', 'F':'E#',
                         'Db':'C#','Eb':'D#','Gb':'F#','Ab':'G#','Bb':'A#', 'E#':'F'}

def get_scale_tones(key, scale_type):
    tonic = key.upper()
    tonic_pitch = pitch.Pitch(tonic)
    
    if scale_type.lower() == 'major':
        scales = scale.MajorScale(tonic_pitch)
    elif scale_type.lower() == 'minor':
        scales = scale.MinorScale(tonic_pitch)
    else:
        raise ValueError('Invalid scale type')
    
    return scales.getPitches()


key = score.analyze('key')

adjust_key_tonic_name = []

scale_tones = get_scale_tones(key.tonic.name, key.mode)
for pitch in scale_tones:
    #if pitch has - , replace it with b
    pitch = pitch.name.replace('-','b')
    #remove int
    pitch = pitch.replace('1','')
    adjust_key_tonic_name.append(pitch)

adjust_key_tonic_name

# Get the enharmonic equivalent, if yes append to the list
for i in range(len(adjust_key_tonic_name)):
    if adjust_key_tonic_name[i] in enharmonic_equivalent:
        adjust_key_tonic_name.append(enharmonic_equivalent[adjust_key_tonic_name[i]])

adjust_key_tonic_name

        


['G', 'A', 'Bb', 'C', 'D', 'Eb', 'F', 'G', 'A#', 'D#', 'E#']

In [483]:
"""
this stage is to check chord's component is in the scale or not
if not, remove it
"""
from pychord import Chord

chord_each_component = []
def convert_to_note_name(chord_str) -> str:
    """
    :param chord_name: str
    :return: str
    """
    chord_parts = chord_str.split(':')
    chord_name = chord_parts[0]  
    chord_type = chord_parts[1] 

    if 'min' in chord_type:
        #replace min with m
        chord_type = chord_type.replace('min', 'm')

    #if last character is 6
    if chord_type[-1] == '6':
        chord_type = chord_type.replace('6', '')
    #if last character is not num
    if chord_type[-1].isdigit() == False:
        if "maj" in chord_type:
            chord_type = chord_type.replace('maj', '')
    return chord_name + chord_type


for i in chord_list:
   
    c = Chord(convert_to_note_name(i))
    
    #if chord's component is not in the scale, add to chord_list
    if c.components() not in adjust_key_tonic_name:
      chord_list.remove(i)

print(len(chord_list))



    

54


In [484]:
"""
read the csv file and preprocess for emission probability 
in hmm model
"""
#read all_pitch_sorted.csv as dataframe
df_pitch = pd.read_csv('melody observation matrix\\csv_file\\all_pitch.csv')

#oreprocess dataframe
df_pitch.rename(columns={'Unnamed: 0':'chord'}, inplace=True)
#remove row with chord = 'start_chord' ro 'end_chord'
print(df_pitch)
df_pitch = df_pitch[df_pitch['chord'] != 'start_chord']
df_pitch = df_pitch[df_pitch['chord'] != 'end_chord']
#reset index
df_pitch.reset_index(drop=True, inplace=True)
#sort by chord column
df_pitch.sort_values(by=['chord'], inplace=True)


"""Since certain notes are very unlikely to appear when certain
chords are playing, many combinations of notes and chords
will have no observed data. We add a few “imaginary”
instances of every note observed for a short duration over
every chord"""

#add 1 to column except 'chord' column
df_pitch.iloc[:, 1:] = df_pitch.iloc[:, 1:].apply(lambda x: x + 1)

#remove the row with chord not in chord_list
df_pitch = df_pitch[df_pitch['chord'].isin(chord_list)]
df_pitch.shape
print(type(df_pitch))

           chord    C   C#   D   D#    E    F   F#    G   G#    A   A#    B
0    start_chord    3    1   9    3   20    3    6   15    8    5    5   13
1          B:maj   28  870  84  688  798   64  828  131  871  230  405  995
2         C#:maj  362  708  13  939   56  597  535  106  660   65  896  107
3         Bb:min  339  419   6  666    7  606  263   70  566   17  744   27
4         Eb:min   90  604  18  692   70  378  367    0  643   13  558  208
..           ...  ...  ...  ..  ...  ...  ...  ...  ...  ...  ...  ...  ...
106      F#:maj6    0    6   0    8    0    2   11    0   14    0    9    5
107       E:maj6    0    3   0    0    6    0    0    0    4    5    0   15
108        C:dim   11   21   0    2    0   12    6    0   13    0   16    2
109       Ab:dim    4    3   5    0   16   10    2    0    0    0    0   10
110       Bb:dim    9    8   0    7   11    1    1    0   25    4    0   26

[111 rows x 13 columns]
<class 'pandas.core.frame.DataFrame'>


In [485]:
"""
this section is to create emission matrix, and some preprocessing include 
log caculation and normalize


"""

#create 12 dim vector for each measure 
measure_list_vector = []
for measure in split_notes_list:
    temp_list = []
    for pitch in pitch_names:
        temp_list.append(measure.count(pitch)/len(measure)+ 1e-10)
    measure_list_vector.append(temp_list)
measure_list_vector

[[1e-10,
  1e-10,
  1e-10,
  1e-10,
  1e-10,
  1e-10,
  0.09334763958497853,
  0.19206008593690987,
  1e-10,
  0.22424892713862662,
  0.1861587983832618,
  0.3041845494562232],
 [1e-10,
  1e-10,
  1e-10,
  1e-10,
  1e-10,
  1e-10,
  0.21039354197689203,
  0.7028254289597377,
  0.08678102936337033,
  1e-10,
  1e-10,
  1e-10],
 [1e-10,
  1e-10,
  1e-10,
  1e-10,
  1e-10,
  1e-10,
  1e-10,
  0.11194429576148901,
  0.2929833959221746,
  0.12372790583111944,
  0.2051419390394751,
  0.2662024639457418],
 [1e-10,
  1e-10,
  1e-10,
  1e-10,
  1e-10,
  1e-10,
  0.10586881482957422,
  0.806674338419908,
  1e-10,
  0.08745684705051783,
  1e-10,
  1e-10],
 [1e-10,
  0.2567049809429119,
  0.4109195403298851,
  1e-10,
  1e-10,
  1e-10,
  1e-10,
  1e-10,
  1e-10,
  1e-10,
  0.33237547902720305,
  1e-10],
 [1e-10,
  1e-10,
  1e-10,
  1e-10,
  1e-10,
  1e-10,
  0.05103550305857988,
  0.8716715977331361,
  0.07729289950828402,
  1e-10,
  1e-10,
  1e-10],
 [0.09373203000224266,
  0.07935595179637722,
  0

In [486]:
"""
taking the dot product of the
observation vector x with the log of the appropriate row of
the melody observation matrix; this yields the loglikelihood for this chord. For each measure in the recorded
voice track, MySong stores a list containing all 60 of these
observation probabilities. 
"""

#calculate loglikelihood for each chord
loglikelihood_list = []
for measure_vector in measure_list_vector:
    temp_list = []

    for i in range(len(df_pitch)):
        temp_list.append(np.dot(measure_vector, np.log2(df_pitch.iloc[i,1:].to_numpy().astype(float))))
    loglikelihood_list.append(temp_list)




            
loglikelihood_list

[[0.6824034352565658,
  1.577977131660203,
  8.419185159483343,
  4.250860476286399,
  1.927911846153812,
  6.843214562739166,
  3.11217767195078,
  4.141511340097342,
  2.5006086670725933,
  2.967194609732191,
  0.909333411531684,
  8.557039066501074,
  3.8839053723341146,
  2.4686548905124286,
  7.494451392399989,
  3.432947335271694,
  2.76957419215428,
  1.797512872505393,
  2.1704310578659536,
  0.8275484184362776,
  7.590459041528758,
  2.6883654801014187,
  2.2067181082163727,
  1.505098232315334,
  2.103156957854997,
  6.538113864077877,
  4.999815638990252,
  5.099515831147302,
  9.307530815773678,
  2.910175469558956,
  4.839692830412884,
  5.01436757993713,
  2.320019180537412,
  1.7964132660181216,
  8.195798553235555,
  4.1229531937435695,
  3.0810043022946454,
  7.872590853232256,
  3.173249056383389,
  3.0833439243874667,
  1.7337558465499021,
  2.013188012829326,
  1.7393595846928216,
  7.4621192832567065,
  3.710494624131431,
  1.8952369265627593,
  8.509536830184517,


In [487]:
"""
emission matrix sample
[
    [0.7, 0.3]
    [0.2, 0.8]
]

matrix[0][0] stands for the probability about if chord is a, the probability of 
measure is 1 is 0.7, measure is 2 is 0.2
"""
from scipy.special import softmax




loglikelihood_list_matrix = np.array(loglikelihood_list)

emission_matrix = loglikelihood_list_matrix.transpose()

#normalize emission matrix each row to 1 ues softmax

for i in range(len(emission_matrix)):
    emission_matrix[i] = softmax(emission_matrix[i])
    
#convert nan to 0
emission_matrix = np.nan_to_num(emission_matrix)

#convert emission matrix to numpy array
emission_matrix = np.array(emission_matrix)


#to check if there is any elememt is nan
emission_matrix.shape




(54, 8)

In [488]:
transition_matrix = pd.read_csv('transition__chord_matrix/csv_file/transition_chord.csv')


#oreprocess dataframe
transition_matrix.rename(columns={'Unnamed: 0':'chord'}, inplace=True)
#remove the row with chord not in chord_list
transition_matrix = transition_matrix[transition_matrix['chord'].isin(chord_list)]
#remove the column's name not in chord_list but remain the column "chord"
transition_matrix = transition_matrix[transition_matrix.columns.intersection(chord_list)]

#remove the % in each element
transition_matrix = transition_matrix.apply(lambda x: x.str.replace('%', ''))

#convert to numpy array
transition_matrix = np.array(transition_matrix)


#normalize transition matrix each row to 1
for i in range(len(transition_matrix)):
    transition_matrix[i] = transition_matrix[i].astype(float)
    transition_matrix[i] = transition_matrix[i]/sum(transition_matrix[i])




#change dtype to float
transition_matrix = transition_matrix.astype(float)
transition_matrix.shape

print(type(transition_matrix))

<class 'numpy.ndarray'>


In [489]:
"""
to create the hmm model, we need to define the state, observation, start probability, transition probability, 
emission probability
"""

states = chord_list
n_states = len(states)

#observation that is note vector
observations_variable = measure_list_vector
n_observations = len(observations_variable)


"""
if key signature in chord_list, then set the start probability to 0.75, 0.25 left for others 
chord in chord_list
"""
if key_signature in chord_list:

        classic_factor = 0
        start_probability = np.full(n_states, 0, dtype=float)
        start_probability[chord_list.index(key_signature)] = classic_factor

        for i in range(len(start_probability)):
            if start_probability[i] == 0:
                start_probability[i] = (1-classic_factor)/(n_states-1)

        print("the key signature is in chord list")
else:
        start_probability = np.full(n_states, 1/n_states)


start_probability = np.full(n_states, 1/n_states)    


transition_probability = transition_matrix

emission_probability = emission_matrix


model = hmm.CategoricalHMM(n_components=n_states,verbose=True, n_iter=100)

model.startprob_ = start_probability
model.transmat_ = transition_probability
model.emissionprob_ = emission_probability
print(type(model))
#given the observation, predict the state

#user action
user_sing_action = np.array([[i for i in range(len(split_notes_list))]])
logprob, chord_sequence = model.decode(user_sing_action.transpose(), algorithm="viterbi")
print("logprob", logprob)
print("chord_sequence", chord_sequence)


#convert chord_sequence to chord name
chord_sequence_name = []
for i in chord_sequence:
    chord_sequence_name.append(chord_list[i])
chord_sequence_name


the key signature is in chord list
<class 'hmmlearn.hmm.CategoricalHMM'>
logprob -28.121884102990055
chord_sequence [44 43 11 34 46 14 37  5]


['F#:sus2', 'F#:min', 'B:min', 'E:min', 'F:maj', 'Bb:maj', 'Eb:maj', 'Ab:maj']

In [490]:

from pychord import Chord

chord_each_component = []
def convert_to_note_name(chord_str) -> str:
    """
    :param chord_name: str
    :return: str
    """
    chord_parts = chord_str.split(':')
    chord_name = chord_parts[0]  
    chord_type = chord_parts[1] 

    if 'min' in chord_type:
        #replace min with m
        chord_type = chord_type.replace('min', 'm')

    #if last character is 6
    if chord_type[-1] == '6':
        chord_type = chord_type.replace('6', '')
    #if last character is not num
    if chord_type[-1].isdigit() == False:
        if "maj" in chord_type:
            chord_type = chord_type.replace('maj', '')
    return chord_name + chord_type


for i in chord_sequence:
   
    c = Chord(convert_to_note_name(states[i]))
    
    chord_each_component.append(c.components())
chord_each_component

#get the first note in each chord
first_note_in_chord = []
for i in chord_each_component:
    first_note_in_chord.append(i[0])


chord_each_component

[['F#', 'G#', 'C#'],
 ['F#', 'A', 'C#'],
 ['B', 'D', 'F#'],
 ['E', 'G', 'B'],
 ['F', 'A', 'C'],
 ['Bb', 'D', 'F'],
 ['Eb', 'G', 'Bb'],
 ['Ab', 'C', 'Eb']]

In [491]:
'''
write  paino accompaniment pattern,
left hand play root note
right hand play chord as quarter note

'''

from music21 import *



def chords_to_midi(chords, file_name):
    # Create a stream object
    main_stream = stream.Stream()
   
    # Create an instrument
    piano = instrument.Piano()

    # Add the instrument to the stream
    left_hand = stream.Part()
    right_hand = stream.Part()
    
    left_hand.insert(0, instrument.Piano())  
    right_hand.insert(0, instrument.Piano())  
   
    # Create chord objects and add them to the stream
    for chord_notes in chords:
       
        root_note_str = chord_notes[0]
        third_note_str = chord_notes[1] 
        fifth_note_str = chord_notes[2] 
       

        #left hand
        for i in range(1):
            root_note = note.Note(root_note_str+'3')
            root_note.duration.type = 'whole'
           
            left_hand.append(root_note)

    

       
      
      
        #right hand
        for i in range(4):
                right_notes= chord.Chord([third_note_str+'4',fifth_note_str+'4'])    
                right_notes.duration.type = 'quarter'

                right_hand.append(right_notes)
           
    # Write the stream to a MIDI file
    main_stream.insert(0, left_hand)
    main_stream.insert(0, right_hand)

    midi_file_path = 'accompaniment_file/midi/'  +file_name + '.mid'
    #set bpm
    main_stream.insert(0, tempo.MetronomeMark(int(vocal_tempo)))
    # main_stream.timeSignature = meter.TimeSignature('4/4')
   
    main_stream.write('midi', fp=midi_file_path)


file_name = 'chords_output'

chords_to_midi(chord_each_component, file_name)


In [492]:
'''
write drum accompaniment pattern
each bar has 4 beat as following:

'''
from music21 import *

# Initialize stream and tempo
s = stream.Stream()
bpm = tempo.MetronomeMark(int(vocal_tempo))  # replace 120 with your desired BPM
s.append(bpm)

# Define the drum pattern
pattern = [
    (36, 36),  # Bass Drum + Closed Hi-hat
    ( 38,42 ),  # Snare Drum + Closed Hi-hat
    (36, 36),  # Bass Drum + Closed Hi-hat
     ( 38,42 )   # Snare Drum + Closed Hi-hat
]

# Define the number of measures
num_measures = len(chord_each_component)

# Create the drum track
for _ in range(num_measures):
    for p in pattern:
        for midi_pitch in p:
            n = note.Note()
            n.pitch.midi = midi_pitch
            n.duration = duration.Duration(1/2)
            s.append(n)

# Write to a MIDI file
s.write('midi', fp='accompaniment_file/midi/drum_output.mid')


'accompaniment_file/midi/drum_output.mid'

In [493]:
from midi2audio import FluidSynth

fs = FluidSynth('C:\\Users\\Hsieh\\Documents\\nccucs\\specialTopic\\special_topic\\src\\data_process\\soundfont\\Ultimate Acoustic Session Kit.sf2')
fs.midi_to_audio('C:\\Users\\Hsieh\\Documents\\nccucs\\specialTopic\\special_topic\\src\\data_process\\accompaniment_file\\midi\\drum_output.mid', 'output.wav')


In [494]:
'''
convert midi to wav
'''
import subprocess

midi_file = "C:\\Users\\Hsieh\\Documents\\nccucs\\specialTopic\\special_topic\\src\\data_process\\accompaniment_file\\midi\\chords_output.mid"
wav_file = "accompaniment_file/wav/chords_output.wav"




def convert_midi_to_wav(midi_path, wav_path):
    fluidsynth_cmd = ['fluidsynth', '-F', wav_path]
   
    fluidsynth_cmd += [midi_path]

    subprocess.run(fluidsynth_cmd)


convert_midi_to_wav(midi_file, wav_file)

print("complte")





complte


In [495]:
'''
adjust the chord file and the vocal file to the same length,
including the following steps:
'''
from pydub import AudioSegment

vocal_start_time_in_ms = the_start_time * 1000

# Load the audio files
sound1 = AudioSegment.from_mp3("C:\\Users\\Hsieh\\Documents\\nccucs\\specialTopic\\special_topic\\src\\auto_accompany\\audio\\vocal\\input.9.wav")[vocal_start_time_in_ms:]
sound2 = AudioSegment.from_file("C:\\Users\\Hsieh\\Documents\\nccucs\\specialTopic\\special_topic\\src\\data_process\\accompaniment_file\\wav\\chords_output.wav")
sound3 = AudioSegment.from_file("output.wav")


# Determine the shortest audio length
min_length = min(len(sound1), len(sound2))

# Trim the audio files to the shortest length
sound1 = sound1[:min_length]
sound2 = sound2[:min_length]
sound3 = sound3[:min_length]

#adjust the volume
sound1 = sound1 - 10
sound2 = sound2 -5
sound3 = sound3 +10


# Overlay the audio files
combined = sound1.overlay(sound2)
combined = combined.overlay(sound3)
# Save the result as MP3
combined.export("C:\\Users\\Hsieh\\Documents\\nccucs\\specialTopic\\special_topic\\src\\data_process\\accompaniment_file\\combined\\combined.wav", format='mp3')


<_io.BufferedRandom name='C:\\Users\\Hsieh\\Documents\\nccucs\\specialTopic\\special_topic\\src\\data_process\\accompaniment_file\\combined\\combined.wav'>