In [1]:
import os
import yaml
import random
import json
import librosa
import numpy as np
import soundfile as sf

from IPython.display import Audio, display

In [2]:
import os
from openai import OpenAI

import sys
import os
import torchaudio
import soundfile as sf

subfolder_path = os.path.abspath("MU-LLaMA/MU-LLaMA")
sys.path.append(subfolder_path)

from util.misc import *

import torch.cuda
import llama

import pretty_midi
import music21

In [6]:
#Path of the dataset files is defined
slakh_path = "/engram/naplab/shared/Slakh2100/slakh2100_flac_redux"

In [3]:
#Functions for getting the music description from MU-LLaMA
#Both of these functions are taken from the MU-LLaMA repo.
#You can refer for the detailed information: https://github.com/shansongliu/MU-LLaMA

def load_and_transform_audio_data( #This function is to load and preprocess the audio for MU-LLaMA model
    audio_paths,
    sample_rate=24000
):
    audios = []
    for path in audio_paths:
        #waveform, sr = torchaudio.load(path)
        waveform, sr = sf.read(path)
        waveform = torch.tensor(waveform).unsqueeze(0)
        if sample_rate != sr:
            waveform = torchaudio.functional.resample(waveform, orig_freq=sr, new_freq=sample_rate)
        waveform = torch.mean(waveform, 0)
        audios.append(waveform)
    return torch.stack(audios, dim=0)

def multimodal_generate( #This function is for generating music description using MU-LLaMA
        audio_path,
        audio_weight,
        prompt,
        cache_size,
        cache_t,
        cache_weight,
        max_gen_len,
        gen_t, top_p
):
    inputs = {}
    audio = load_and_transform_audio_data([audio_path])
    inputs['Audio'] = [audio, audio_weight]
    image_prompt = prompt
    text_output = None
    prompts = [llama.format_prompt(prompt)]
    prompts = [model.tokenizer.encode(x, bos=True, eos=False) for x in prompts]
    with torch.cuda.amp.autocast():
        results = model.generate(inputs, prompts, max_gen_len=max_gen_len, temperature=gen_t, top_p=top_p,
                                     cache_size=cache_size, cache_t=cache_t, cache_weight=cache_weight)
    text_output = results[0].strip()
    return text_output

In [4]:
#The MU-LLaMA model for high-level music description is defined
model = llama.load("/home/sd3705/music_gen_2024f/music_gen_project/MU-LLaMA/MU-LLaMA/ckpts/checkpoint.pth", "/home/sd3705/music_gen_2024f/music_gen_project/MU-LLaMA/MU-LLaMA/ckpts/LLaMA", mert_path="/home/sd3705/music_gen_2024f/music_gen_project/MU-LLaMA/MU-LLaMA/ckpts/MERT-v1-330M", knn=True, knn_dir="/home/sd3705/music_gen_2024f/music_gen_project/MU-LLaMA/MU-LLaMA/ckpts", llama_type="7B")
model.eval()

Loading LLaMA-Adapter from /home/sd3705/music_gen_2024f/music_gen_project/MU-LLaMA/MU-LLaMA/ckpts/checkpoint.pth


  return self.fget.__get__(instance, owner)()


model args: ModelArgs(dim=4096, n_layers=32, n_heads=32, n_kv_heads=None, vocab_size=-1, multiple_of=256, ffn_dim_multiplier=None, norm_eps=1e-05, max_batch_size=1, max_seq_len=8192, w_bias=True, w_lora=True, lora_rank=16)


  _C._set_default_tensor_type(t)


LLaMA_adapter(
  (mert_model): MERTModel(
    (feature_extractor): HubertFeatureEncoder(
      (conv_layers): ModuleList(
        (0): HubertGroupNormConvLayer(
          (conv): Conv1d(1, 512, kernel_size=(10,), stride=(5,), bias=False)
          (activation): GELUActivation()
          (layer_norm): GroupNorm(512, 512, eps=1e-05, affine=True)
        )
        (1-4): 4 x HubertNoLayerNormConvLayer(
          (conv): Conv1d(512, 512, kernel_size=(3,), stride=(2,), bias=False)
          (activation): GELUActivation()
        )
        (5-6): 2 x HubertNoLayerNormConvLayer(
          (conv): Conv1d(512, 512, kernel_size=(2,), stride=(2,), bias=False)
          (activation): GELUActivation()
        )
      )
    )
    (feature_projection): MERTFeatureProjection(
      (layer_norm): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
      (projection): Linear(in_features=512, out_features=1024, bias=True)
      (dropout): Dropout(p=0.0, inplace=False)
    )
    (encoder): HubertEncode

In [5]:
#ChatGPT API client is defined. The API key should be inserted to use
client = OpenAI(
    api_key="{INSERT CHATGPT API KEY HERE}",
)

In [24]:
#Train Set
for track_id in os.listdir(os.path.join(slakh_path,'train')): #For each track in the train set
    print(track_id) #Track ID is printed for the verbose
    track_path = os.path.join("train",track_id)

    midi_pretty = pretty_midi.PrettyMIDI(os.path.join(slakh_path,track_path,'all_src.mid')) #MIDI file is loaded with pretty_midi package
    track_tempo = midi_pretty.estimate_tempo() #The tempo is estimated using pretty_midi package

    midi_21 = music21.converter.parse(os.path.join(slakh_path,track_path,'all_src.mid')) #MIDI file is loaded with music21 package
    track_key = str(midi_21.analyze('key')) #The key of the track is extimated using music21 package
    
    
    path_list = [] #List including paths of all audio files
    
    path_list.append(os.path.join(slakh_path,track_path,'mix')) #Path of full mixture audio file is added
    
    stems_list = [os.path.splitext(flac_file)[0] for flac_file in os.listdir(os.path.join(slakh_path,track_path,'stems')) if flac_file.endswith('.flac')]
    for stem in stems_list:
        path_list.append(os.path.join(slakh_path,track_path,'stems',stem)) #Paths of all individual instrument audio files are added
        
    submixes_list = [os.path.splitext(flac_file)[0] for flac_file in os.listdir(os.path.join(slakh_path,track_path,'submixes')) if flac_file.endswith('.flac')]
    for submix in submixes_list:
        path_list.append(os.path.join(slakh_path,track_path,'submixes',submix)) #Paths of all individual submix audio files are added

    for curr_path in path_list: #For each path in the path list
        with open(curr_path+'.json', 'r') as metadata_file:
            curr_json = json.load(metadata_file) #Previously created .json file is loaded

        instruments = curr_json['instrument']

        try: #Checked if the audio file is corrupted
            #If not, high-level music description is created using MU-LLaMA
            mullama_description = multimodal_generate(curr_path+'.flac', 1, 'Describe the music.', 100, 20.0, 0.0, 512, 0.6, 0.8)
        except sf.LibsndfileError as e:
            print('.flac error occured.') #If the file is corrupted, only the key and tempo is added to the .json file
            curr_json['key'] = track_key
            curr_json['bpm'] = round(track_tempo,2)
            curr_json['description'] = f'Instruments: {instruments}.'

            with open(curr_path+'.json', 'w') as json_file:
                json.dump(curr_json, json_file, indent=4) #The json file is saved without the additional description
                
            continue #The file is skipped
            
        #Generated high-level music description is given to GPT together with key and tempo to generate an overall description
        gpt_completion = client.chat.completions.create(
        messages=[
            {
                "role": "user",
                "content": f"Create a short simple overall musical description with just a sentence using the following information. Do not include any instrument name. General description:{mullama_description}, Tempo:{track_tempo}, Key:{track_key}",
            }
        ],
        model="gpt-4o",
        )

        gpt_description = gpt_completion.choices[0].message.content #Answer of GPT is obtained

        curr_json['key'] = track_key #Key information of .json file is updated
        curr_json['bpm'] = round(track_tempo,2) #Tempo information of .json file is updated
        curr_json['description'] = f'Instruments: {instruments}. {gpt_description}' #GPT-generated description is added with the instrument list at the beginning as the new text description of the audio file

        with open(curr_path+'.json', 'w') as json_file:
            json.dump(curr_json, json_file, indent=4) #The json file is saved with the additional description
            

Track01279
Track01404
Track00813
Track00760
Track01449
Track00390
Track00918
Track00670
Track00202
Track01030
Track00128
Track00508
Track01268
Track00376




Track00628
Track00880
Track00231
.flac error occured.
Track00306
Track00475
Track00526
Track00514
.flac error occured.
Track00407
Track00290
Track01062
Track01198
Track01356
Track00696
Track00193
Track00872




Track00352
Track01208
Track00544
Track00874
Track01425
Track00428
Track00948
Track00434
Track01459
Track01168
Track00200
Track01421
Track00922
Track00552
Track00329
Track00314
Track00351
Track00040
Track00636
Track00635
Track01443
Track00003




Track00166
Track01043




Track00761
Track00913
Track00023
Track00007
Track00816
Track00056
Track00608
Track00669
Track00477
Track01021
Track00777
Track01463
Track01489
Track01259
Track01341
Track00589
Track00843
Track00062
Track00331
Track00027
Track01440
Track00582
Track00606
Track00722
Track01126
Track00814
Track00309
Track01271
Track00717
Track01483
Track01427
Track00921
Track00564
Track00693
Track00406
Track01039
Track01121
Track00641
Track00154
Track01174
Track00561




Track00257
Track00574
Track01058
Track00855
Track01195
Track01399
Track00463
Track01206
Track00426
Track00718
Track01397
Track01050
Track00269
Track01290
Track00234
Track00518
Track00983
Track01488
Track00612
Track00381
Track00469
Track00189
Track01138
Track00215
Track00523
Track00656
Track01381
Track01433
Track01335
Track00681
Track01487
Track00083
Track00671
Track00894
Track00835
Track00840
Track00897




Track01002
Track00432
Track00967
Track00362
Track00540
Track01266
Track00780
Track00516
Track00923
Track01213
Track00303
.flac error occured.
Track00618
Track00453
Track00268
.flac error occured.
Track01076
Track01245
Track00970
Track01150
Track01042
Track00621
Track00914
Track01114
Track00229
Track00050
Track01370
Track00392
Track00172




Track01010
Track01418
Track00099
Track00594
Track00974
Track01376
Track01115
Track00398
Track00789
Track00382
Track00349




Track00702
Track00157
Track00682
Track00098
.flac error occured.
Track01392
Track01249
Track00727
Track01430
Track00396
Track00625
Track00087
Track00139
Track00367
Track00304
Track00404
Track00541
Track00210




Track01492
.flac error occured.
Track00307
Track00347
Track01125
Track01074
Track00197
Track01361
Track01196
Track00610
.flac error occured.
Track00455
Track00080
Track00399
Track00467
Track01394
Track01334
Track00102
.flac error occured.
Track00069
Track01197
Track00143
Track01476
Track01360
Track00954
Track01420
Track00672
Track00515
Track00779
Track00038
Track00419
Track00685
Track00867
Track01014
Track00239
Track00340
Track00581
Track00153
Track01055
Track01060
Track00057
Track00198




Track00073
Track00863
Track01284
Track01411
Track01247
Track01088
Track00705
Track01223
Track00253
Track00395
Track01342
Track01089
Track01031
Track01170
Track01265
Track00912




Track00638
Track00976
Track00617
Track00205
Track00089
Track00947
Track01264
Track00680
Track01110
Track00052
Track01148
Track01204
Track01023
Track00631
Track01083
Track00650
Track00084
Track00479
Track01016
Track01481
Track01100
Track01080
Track01384
Track01075
Track01383
Track00420
Track00067
Track00391
Track00127
Track00565
Track01097
Track01347
Track00327
Track00545
Track00137
Track01393
Track01053
Track00975
Track00896
Track01260
Track00430
Track00086
Track00649
Track00026
Track00261
Track00159
Track00902
Track00968
Track00339
Track00091
Track00402
Track00201
Track01192
Track00061
Track00868
Track01203
Track00709
Track01380
Track00458
Track00460
Track00016
Track01398
Track01474
Track01310
Track00259
Track00838
Track01500
Track00449
Track00345
Track00415
Track01008
Track00878
Track01118
Track00465
Track00459




Track00221
Track00266
Track00939
Track00676
Track00041
Track01188
Track00924
Track00488
Track00547
Track01332
.flac error occured.
Track01479
Track00926
Track00342
Track00065
Track00746
Track00542
Track01321
Track00928
Track00940
Track00585
Track00149
Track00427
.flac error occured.
Track01408
Track00737
Track00064
Track01396
Track00298
Track00076
Track00483




Track00955
Track00332
Track00527
Track00664
Track00712
Track01494
Track00285
Track00213
Track01201
.flac error occured.
.flac error occured.
Track00686
Track00088
Track01251
Track00726
Track00247
Track00724
Track01318
Track01426
Track00597
Track01350
Track00323
Track00384
Track00951
Track00363
Track00242
Track01022
.flac error occured.
Track00416
Track00097
Track00216
Track00152
Track00973
Track01314
Track00950
Track00634
Track00754
Track00588
Track01362
Track00879
Track00907
Track00236
Track00888
Track01013
Track00267
.flac error occured.
Track00807
Track00122
Track00689
Track00320
Track01428
Track01164
Track00507
Track00444
.flac error occured.
Track00836
Track01122
Track00725
Track00397
Track01288
Track01178
Track00903
.flac error occured.
Track00435
.flac error occured.
Track00538
Track01199
Track01092
Track00305
Track00584
.flac error occured.
Track00112




Track00466
Track00494
Track00548




.flac error occured.
Track00517
Track01073
.flac error occured.
Track00093
Track00060
.flac error occured.
Track00831
Track00804
Track00377
Track00909
Track00770
Track00158




Track00148
Track00313
Track00249
.flac error occured.
Track00412
Track00772




Track00263
Track00375
Track00695
Track00033
Track01313
Track00529
Track00032
Track00721
Track00765
Track00828
Track00691




Track00311
.flac error occured.
Track00145
Track00870
Track01141
Track00203
Track01275
Track00499
Track01226
Track00573
Track00272
Track00022
Track00900
Track01142
Track00368
Track00123
Track01037
Track01155
Track00211
Track00844
Track00533
Track00663
Track00004
Track00413
Track01027
Track00771
Track00414
Track00596
Track01056
Track00254




.flac error occured.
Track00490
Track01051
Track01112
.flac error occured.
Track01385
Track00869
Track01108
Track01246
Track00115
Track01214
Track00767
Track00072
.flac error occured.
Track00373
Track01364
Track00857
Track01311
Track00230
Track00535
Track00383
Track00441
.flac error occured.
Track00785
Track00103
Track00324
Track00569
Track01095
Track00209




Track01466
Track00186
Track01025
Track00658
Track00587
Track01135
Track00673
Track01048
Track01163
Track00270
Track01462
Track01458
Track00577
Track00165
Track00667
Track01205
Track00853
Track00936
Track00028
Track01365
Track01064
Track01232
Track00509
Track00226
Track00659
Track00865
Track00937
Track01146
Track01323
Track00559
Track00334




Track00965
Track00776
Track01212
.flac error occured.
Track00743
Track00482
Track00795
Track00493
Track00243
Track00567
Track00839
Track01237
Track00018
Track01473
Track01007
.flac error occured.
Track01106
Track00446
Track00819
Track01177
.flac error occured.
Track01337
Track01346
Track00403
Track01491
Track00861
Track00995
.flac error occured.
Track00605
Track00882
Track00180
Track00114
Track00502
Track00805
.flac error occured.
Track00222
Track00046
Track01078
Track01405
Track01482
Track01085
Track00117
Track00811
Track01343
Track00456
Track00679
Track00369
Track01461
Track01453
Track00543
Track00066
Track01395
Track00977
Track01359
Track00002
Track00468
Track01069
Track00557
Track00992
Track01269
Track01486
Track00570
Track00017
Track00812
Track01047
.flac error occured.
Track01278
Track00151
Track01497
Track01303
Track00410
Track00300




Track00778
Track01336
Track00946
Track00841
Track00986
Track00549
.flac error occured.
Track00708
Track00258
Track00447
Track00522
Track01322
Track00111
Track00454




Track00337
Track01029
Track00957
Track00354
Track01243
Track01091
Track00167
Track00730
Track01270
Track00081
Track00768
Track00583
Track01454
Track01101
Track00187
Track00100
Track01415
.flac error occured.
Track00971
Track00837
.flac error occured.
.flac error occured.
Track00901
Track00822
Track01302
.flac error occured.
Track01273
Track00282
Track00763
Track00341
Track01026
Track00125
Track00043
Track00781
.flac error occured.
Track01012
Track01409
Track01298
Track00171




Track00586
Track00528
Track01183
Track00825
Track00471
.flac error occured.
Track00731
Track00162
Track00195
Track01250
.flac error occured.
Track00524
Track00745
Track01153
Track00739
Track00451
.flac error occured.
Track00330
Track00860
Track00491
Track00433
Track00834
Track00015
Track00274




Track00355
Track00481
.flac error occured.
Track00132
Track00603
Track00156
Track00194
Track00915
Track00047
Track00448
Track00353
Track00999
Track00472
Track00288
Track00511
Track00035
Track00442
.flac error occured.
Track01166
Track01300
Track00759
Track00312
Track00893
Track00006
Track00147
Track00358
Track01377
Track00364
Track00784
Track00118
Track00113
.flac error occured.
Track01236
Track00276
Track00830
Track00071
Track00684
Track00070
Track00134
Track00733
Track01104
Track00104
Track00654
Track01218
Track00626
Track01221
.flac error occured.
Track01363
Track00497
Track00241
Track00876
Track00889
Track00892
Track00461
Track01049
Track00054
Track00485
Track00852
Track00219
Track00338




Track00120
Track01255
Track00930
Track00735
Track01102
Track01103
Track00380
Track00616
Track00284
Track00627
Track00438
Track00815
Track00082
Track00129
.flac error occured.
Track00400
Track01238




Track01119
Track00464
Track00985
Track01253
Track01072
Track00360




Track00492
Track01468
Track00988
Track00265
Track00039
Track01038
Track00845
Track00135
Track01296
Track00106
Track00875
Track01123
Track00632
Track01390
Track00037
Track00793
Track01032
Track00728
Track00177
Track00059
Track01281
Track00480
Track00163
Track01149
Track00101
Track00929
Track00295
Track00489
Track01293
.flac error occured.
Track00496
Track00105
Track00619
Track00297
Track00668
Track00944
Track01258
Track00908
Track00911
Track00710
Track01354
Track00592
.flac error occured.
Track01340
Track01128
Track00978
Track00850
Track00025
Track01414
Track01215
Track00751
Track00980
Track00393
Track01019
.flac error occured.
Track01165
.flac error occured.
.flac error occured.
Track01262
Track01173
Track00530
Track00184
Track01355
Track01252
Track00001
Track00963
Track00563
Track01287
Track01158
Track00758
Track00108
Track00692
Track00662
Track00742
.flac error occured.
Track00657
Track00277
Track00173
.flac error occured.
Track00136
Track00645
Track00661
Track01386
Track00372
Track0



Track00953
Track00817
Track00178
.flac error occured.
Track01004
Track00794
Track00756
.flac error occured.
Track00866




Track00734
Track00609
Track01082
Track00053
Track00653
Track00294
Track00188
Track00335
Track00666
.flac error occured.
Track00887
.flac error occured.
Track00792




Track00604
Track00520
Track00011
Track00248
Track01277
Track01375
Track00600
Track00629
Track00905
Track00938
Track00579
Track00630
Track00998
Track00042
Track00898
Track00553
.flac error occured.
Track00637
Track00787
Track00503
Track00595
Track00301
Track01228
Track01225
Track00966
Track00551
Track01475
Track01389
Track01432
Track00677
.flac error occured.
Track00357
Track00615
Track00788
Track01333
.flac error occured.
Track01315
Track00289
Track00436
Track00325
Track01096
Track00279
Track00809
Track01439
Track01324
Track00227
Track00774
Track00429
.flac error occured.
Track00030
Track00512
Track00791
.flac error occured.
Track01113
Track00278
Track01369
Track00639
Track01374
Track01063
Track00984
Track00370
Track00374
.flac error occured.
Track00207
Track01193
Track00993
Track01134
Track00782
Track00055
Track00020




Track00260
Track01041
Track00958
Track00917
.flac error occured.
Track00823




Track00943
Track00633
Track00235
Track00849
Track00593
Track00095
Track00614
Track01480
Track00748
.flac error occured.
Track00379
Track01034
Track01320
Track00856
Track01434
Track00262
Track01465
Track00525
Track00423
Track01185
Track00169
Track01105
Track00832
Track01145
Track00036
Track01496
Track00972
Track01035
Track00487
Track00871
Track01216
Track01368
Track00131
Track00005


In [25]:
#Validation Set
#The code is exactly the same with the training set block except the loop is over tracks in the validation set
for track_id in os.listdir(os.path.join(slakh_path,'validation')):
    print(track_id)
    track_path = os.path.join("validation",track_id)

    midi_pretty = pretty_midi.PrettyMIDI(os.path.join(slakh_path,track_path,'all_src.mid'))
    track_tempo = midi_pretty.estimate_tempo()

    midi_21 = music21.converter.parse(os.path.join(slakh_path,track_path,'all_src.mid'))
    track_key = str(midi_21.analyze('key'))
    
    
    path_list = []
    
    path_list.append(os.path.join(slakh_path,track_path,'mix'))
    
    stems_list = [os.path.splitext(flac_file)[0] for flac_file in os.listdir(os.path.join(slakh_path,track_path,'stems')) if flac_file.endswith('.flac')]
    for stem in stems_list:
        path_list.append(os.path.join(slakh_path,track_path,'stems',stem))
        
    submixes_list = [os.path.splitext(flac_file)[0] for flac_file in os.listdir(os.path.join(slakh_path,track_path,'submixes')) if flac_file.endswith('.flac')]
    for submix in submixes_list:
        path_list.append(os.path.join(slakh_path,track_path,'submixes',submix))

    for curr_path in path_list:
        with open(curr_path+'.json', 'r') as metadata_file:
            curr_json = json.load(metadata_file)

        instruments = curr_json['instrument']

        try:
            mullama_description = multimodal_generate(curr_path+'.flac', 1, 'Describe the music.', 100, 20.0, 0.0, 512, 0.6, 0.8)
        except sf.LibsndfileError as e:
            print('.flac error occured.')
            curr_json['key'] = track_key
            curr_json['bpm'] = round(track_tempo,2)
            curr_json['description'] = f'Instruments: {instruments}.'

            with open(curr_path+'.json', 'w') as json_file:
                json.dump(curr_json, json_file, indent=4)
                
            continue

        gpt_completion = client.chat.completions.create(
        messages=[
            {
                "role": "user",
                "content": f"Create a short simple overall musical description with just a sentence using the following information. Do not include any instrument name. General description:{mullama_description}, Tempo:{track_tempo}, Key:{track_key}",
            }
        ],
        model="gpt-4o",
        )

        gpt_description = gpt_completion.choices[0].message.content

        curr_json['key'] = track_key
        curr_json['bpm'] = round(track_tempo,2)
        curr_json['description'] = f'Instruments: {instruments}. {gpt_description}'

        with open(curr_path+'.json', 'w') as json_file:
            json.dump(curr_json, json_file, indent=4)
            

Track01675
Track01836
Track01628
Track01685
Track01620
Track01520
Track01679
Track01869




Track01703
Track01595
Track01751
Track01838
Track01618
Track01598
Track01689
Track01564
Track01849
Track01641
Track01802
Track01694
Track01580
Track01582
Track01754
Track01837
Track01516
Track01512
Track01615
Track01756
Track01746
Track01653
Track01611
Track01851
Track01630
Track01715
Track01526
Track01528
Track01586
Track01619
Track01643
Track01531
Track01691
Track01732
Track01696
Track01791
Track01731
Track01743
Track01646
Track01713
Track01688
Track01788




Track01747
Track01709
Track01783
Track01773
Track01613
Track01772
Track01739
Track01539
Track01813
Track01578
Track01702
Track01825
Track01660
Track01716
Track01815
Track01777
Track01645
Track01625
Track01793
Track01807
Track01766
Track01514
Track01656
Track01672
Track01769
Track01805
Track01556
Track01787
Track01738
Track01720
Track01644
Track01757
Track01820
Track01560
Track01511
Track01566
Track01760
Track01690
Track01770
Track01579
Track01649
Track01681
Track01843
Track01540
Track01742
Track01591
Track01551
Track01819
Track01848
Track01527
Track01606
Track01621
Track01509
Track01519
Track01782
Track01616
Track01695
Track01749
Track01568
Track01806
Track01605
Track01548




Track01730
Track01574
Track01626
Track01523
Track01642
Track01874
Track01521
Track01635
Track01552
Track01575
Track01707
Track01708
Track01796
Track01530
Track01529
Track01706
Track01510
Track01518
Track01657
Track01678




Track01557
Track01781
Track01697
Track01563
Track01830
Track01561
Track01852
Track01723
Track01698
Track01865
Track01604
Track01588
Track01554
Track01809
Track01570
Track01594
Track01639
Track01856
Track01632
Track01676
Track01779
Track01590
Track01826




Track01508
Track01744
Track01654
Track01875
Track01558
Track01624
Track01870




Track01804
Track01763
Track01785
Track01666
Track01524
Track01502
Track01821
Track01501
Track01537
Track01724
Track01814
Track01741
Track01542
Track01701
Track01602
Track01840
Track01776
Track01719
Track01790
Track01725
Track01758
Track01775
Track01828
Track01513
Track01559
Track01522
Track01827
Track01692
Track01733
Track01714
Track01664
Track01823
Track01855
Track01735
Track01677
Track01774
Track01867
Track01753
Track01799
Track01845
Track01718
Track01623
Track01670
Track01824
Track01812
Track01634
Track01607
Track01792
Track01532
Track01700
Track01571
Track01835
Track01762
Track01768
Track01592
Track01614
Track01797
Track01795
Track01717
Track01665
Track01599
Track01778
Track01693
Track01546
Track01803
Track01565
Track01652
Track01629
Track01789
Track01745
Track01576
Track01525




Track01662
Track01515




Track01674
Track01503
Track01710
Track01583
Track01612
Track01581
Track01705
Track01727
Track01750
Track01584
Track01841
Track01861
Track01505
Track01573
Track01545
Track01650
Track01873
Track01597
Track01771
Track01507
Track01517
Track01659
Track01647
Track01622
Track01860
Track01858
Track01631
Track01504
Track01533
Track01589
Track01761
Track01655
Track01661
Track01737


In [26]:
#Test Set
#The code is exactly the same with the training set block except the loop is over tracks in the test set
for track_id in os.listdir(os.path.join(slakh_path,'test')):
    print(track_id)
    track_path = os.path.join("test",track_id)

    midi_pretty = pretty_midi.PrettyMIDI(os.path.join(slakh_path,track_path,'all_src.mid'))
    track_tempo = midi_pretty.estimate_tempo()

    midi_21 = music21.converter.parse(os.path.join(slakh_path,track_path,'all_src.mid'))
    track_key = str(midi_21.analyze('key'))
    
    
    path_list = []
    
    path_list.append(os.path.join(slakh_path,track_path,'mix'))
    
    stems_list = [os.path.splitext(flac_file)[0] for flac_file in os.listdir(os.path.join(slakh_path,track_path,'stems')) if flac_file.endswith('.flac')]
    for stem in stems_list:
        path_list.append(os.path.join(slakh_path,track_path,'stems',stem))
        
    submixes_list = [os.path.splitext(flac_file)[0] for flac_file in os.listdir(os.path.join(slakh_path,track_path,'submixes')) if flac_file.endswith('.flac')]
    for submix in submixes_list:
        path_list.append(os.path.join(slakh_path,track_path,'submixes',submix))

    for curr_path in path_list:
        with open(curr_path+'.json', 'r') as metadata_file:
            curr_json = json.load(metadata_file)

        instruments = curr_json['instrument']

        try:
            mullama_description = multimodal_generate(curr_path+'.flac', 1, 'Describe the music.', 100, 20.0, 0.0, 512, 0.6, 0.8)
        except sf.LibsndfileError as e:
            print('.flac error occured.')
            curr_json['key'] = track_key
            curr_json['bpm'] = round(track_tempo,2)
            curr_json['description'] = curr_json['description'] = f'Instruments: {instruments}.'

            with open(curr_path+'.json', 'w') as json_file:
                json.dump(curr_json, json_file, indent=4)
                
            continue

        gpt_completion = client.chat.completions.create(
        messages=[
            {
                "role": "user",
                "content": f"Create a short simple overall musical description with just a sentence using the following information. Do not include any instrument name. General description:{mullama_description}, Tempo:{track_tempo}, Key:{track_key}",
            }
        ],
        model="gpt-4o",
        )

        gpt_description = gpt_completion.choices[0].message.content

        curr_json['key'] = track_key
        curr_json['bpm'] = round(track_tempo,2)
        curr_json['description'] = f'Instruments: {instruments}. {gpt_description}'

        with open(curr_path+'.json', 'w') as json_file:
            json.dump(curr_json, json_file, indent=4)
            

Track02061
Track02098
Track01980
Track02016
Track02040
Track01905
Track02003
Track01881
Track02010
Track02079
Track01960
Track01943
Track02018
Track02005
Track02082
Track01882
Track02013
Track01952
Track01932
Track02042
Track01945
Track02023
Track02048
Track01985
Track01990
Track02050
Track02049
Track02008
Track01961
Track01878
Track02026
Track01937
Track01951
Track01986




Track01877
Track02069
Track01954
Track02007
Track01896
Track01955
Track01981
Track01947
Track01897
Track01904
Track01887
Track02047
Track02053
Track01977
Track02096
Track01927
Track01913
Track01899
Track01891
Track01893
Track01901
Track02086
Track01883
Track01917
Track01959
Track01880
Track02038
Track01889
Track02095
Track02000
Track02062
Track02001
Track01998
Track01931
Track02084
Track02046
Track01900
Track01934
Track01907
Track01994
Track02014
Track01902
Track01925
Track01987




Track02051
Track02019
Track01930
Track01911
Track01935
Track02054
Track01995




Track01916
Track01968
Track01982
Track01929
Track01920
Track01895
Track02070
Track01918
Track01948
Track01903
Track01884
Track02063
Track01906
Track02032
Track02020
Track01972
Track02024
Track01957
Track01965
Track02088
Track01963
Track01936
Track01974
Track02081
Track01976
Track02090
Track01975




Track02087
Track02083
Track02064
Track02074
Track01928
Track02093
Track01898




Track01888
Track01949
Track01956
Track02029
Track01908
Track01950
Track01989
Track02044
Track01996
Track01876
Track02052
Track01886
Track02037
Track02017
Track02094
Track01997
Track02002
Track02045
Track01892
Track01962
Track01978
Track02031




Track02036
Track01973
Track01993
Track02004
Track01967
Track02056
Track01940
Track02030
Track02067
Track02092
