# **Create Freesound Corpora**



# **Setup Collab**

In [1]:
import sys
IN_COLAB = 'google.colab' in sys.modules

if IN_COLAB:
    #pip install git+git://github.com/mtg/freesound-python.git#egg=freesound
    from google.colab import drive
    drive.mount('/content/drive',force_remount=True)

# **Imports**

In [2]:
import os
import pandas as pd
import numpy as np
import freesound
from IPython.display import display
import re
import time
from os import listdir

# **Setup project**

In [3]:

ROOT_PATH = "./"
if IN_COLAB:
    ROOT_PATH = "/content/drive/My Drive/Colab Notebooks/MIR"

FILES_DIR = os.path.join(ROOT_PATH, 'audio_corpora_freesound')
FILES_DIR_ANNOTATIONS = os.path.join(ROOT_PATH, 'annotations_corpora_freesound') 
DATAFRAME_FILENAME = os.path.join(ROOT_PATH, 'corpora_dataframe.csv')  


#FREESOUND
FREESOUND_API_KEY = 'z3BjEL4rNKquyDVOMCO8AIUx9HTQ74j0IfUvSLNF' 
FREESOUND_STORE_METADATA_FIELDS = ['id', 'name', 'username', 'previews', 'license', 'description']  
freesound_client = freesound.FreesoundClient()
freesound_client.set_token(FREESOUND_API_KEY)


#PROJECT OPTION 
MAX_ITEMS_FOR_BPM_CLASS = 100
MIN_BPM_CLASS = 40
MAX_BPM_CLASS = 240
MAX_LENGHT_IN_S = 180
DOWNLOAD = True

# **Define some util functions**

In [4]:
# Define some util functions

def get_ogg_filename(sound):
    return sound.previews.preview_hq_ogg.split("/")[-1]

def query_freesound(query, filter, num_results=10):
    """Queries freesound with the given query and filter values.
    If no filter is given, a default filter is added to only get sounds shorter than 30 seconds.
    """
    if filter is None:
        filter = 'duration:[0 TO 30]'  # Set default filter
    pager = freesound_client.text_search(
        query = query,
        filter = filter,
        fields = ','.join(FREESOUND_STORE_METADATA_FIELDS),
        group_by_pack = 1,
        page_size = num_results
    )
    return [sound for sound in pager]

def retrieve_sound_preview(sound, directory):
    """Download the high-quality OGG sound preview of a given Freesound sound object to the given directory.
    """
    return freesound.FSRequest.retrieve(
        sound.previews.preview_hq_ogg,
        freesound_client,
        os.path.join(directory, get_ogg_filename(sound))
    )

def make_pandas_record(sound, bpm):
    """Create a dictionary with the metadata that we want to store for each sound.
    """
    record = {key: sound.as_dict()[key] for key in FREESOUND_STORE_METADATA_FIELDS}
    del record['previews']  # Don't store previews dict in record
    record['file_id'] = record['id']  # Rename 'id' to 'file_id'
    del record['id']
    record['path'] =  os.path.join(FILES_DIR, get_ogg_filename(sound)) 
    record['bpm'] = bpm
    return record

def prepare_query_for_bpm(bpm):
    
    str_filter_1 = "tag:{} duration:[10 TO {}]".format(bpm,MAX_LENGHT_IN_S)
    str_filter_2 = "duration:[10 TO {}]".format(MAX_LENGHT_IN_S)
    str_query = "{} bpm".format(bpm)
    freesound_queries = [
    {
        'query': 'bpm',
        'filter': str_filter_1,
        'num_results': MAX_ITEMS_FOR_BPM_CLASS,
    },
    {
        'query': str_query,
        'filter': str_filter_2,
        'num_results': MAX_ITEMS_FOR_BPM_CLASS,
    },
    ]
    
    return freesound_queries

def bpm_check(source, bpm):
    
    name = source.as_dict()['name']
    description = source.as_dict()['description']
    
    regex = "(?<!\d){}+\s?bpm".format(bpm)   
    pattern = re.compile(regex)
    matched = False
    
    if pattern.search(name) or pattern.search(description):
        matched = True
        
    return matched

def process_query(freesound_queries):
    
    sounds = sum([query_freesound(query['query'], query['filter'], query['num_results']) for query in freesound_queries],[])
    
    if DOWNLOAD :
        # Download the sounds and save them to FILES_DIR folder
        for count, sound in enumerate(sounds):
            if  get_ogg_filename(sound) not in file_already_in_folder:
                print('Downloading sound with id {0} [{1}/{2}]'.format(sound.id, count + 1, len(sounds)))
                retrieve_sound_preview(sound, FILES_DIR)

    # Make a Pandas DataFrame with the metadata of our sound collection and save it
    sounds_filtered = []
    for s in sounds:
        if bpm_check(s, bpm):
            sounds_filtered.append(s)
    
    return sounds_filtered

# **Check for files already downloaded**

In [11]:
file_already_in_folder = []
if not os.path.exists(FILES_DIR): 
    os.mkdir(FILES_DIR)
else:
    for f in listdir(FILES_DIR): file_already_in_folder.append(f)
        

# **Create Freesound Corpora**

In [12]:
panda_records = []

for bpm in range(MIN_BPM_CLASS,MAX_BPM_CLASS):
    
    print("Query bpm:{}".format(bpm))
    time.sleep(2)
    
    freesound_queries = prepare_query_for_bpm(bpm)
    sounds_filtered = process_query(freesound_queries)
    [panda_records.append(make_pandas_record(s,bpm)) for s in sounds_filtered]
    
   
df = pd.DataFrame(panda_records)
df.sort_values("file_id", inplace = True) 
df.drop_duplicates(subset ="file_id", keep = "first", inplace = True) 
df.reset_index(drop=True, inplace=True)
df.to_csv(DATAFRAME_FILENAME)
print('Saved DataFrame with {0} entries! {1}'.format(len(df), DATAFRAME_FILENAME))

# Show the contents of our DataFrame (the metadata of our source collection)
display(df)

Query bpm:40
Downloading sound with id 361352 [1/24]
Downloading sound with id 321758 [2/24]
Downloading sound with id 477267 [3/24]
Downloading sound with id 332816 [4/24]
Downloading sound with id 437642 [5/24]
Downloading sound with id 64378 [6/24]
Downloading sound with id 477123 [7/24]
Downloading sound with id 294366 [8/24]
Downloading sound with id 294801 [9/24]
Downloading sound with id 295515 [10/24]
Downloading sound with id 302495 [11/24]
Downloading sound with id 467920 [12/24]
Downloading sound with id 292982 [13/24]
Downloading sound with id 238875 [14/24]
Downloading sound with id 505 [15/24]
Downloading sound with id 430860 [16/24]
Downloading sound with id 504688 [17/24]
Downloading sound with id 53583 [18/24]
Downloading sound with id 435639 [19/24]
Downloading sound with id 15363 [20/24]
Downloading sound with id 34154 [21/24]
Downloading sound with id 169056 [22/24]
Downloading sound with id 86570 [23/24]
Downloading sound with id 414547 [24/24]
Query bpm:41
Downloa

Downloading sound with id 249484 [7/17]
Downloading sound with id 477080 [8/17]
Downloading sound with id 292990 [9/17]
Downloading sound with id 497570 [10/17]
Downloading sound with id 256619 [11/17]
Downloading sound with id 483094 [12/17]
Downloading sound with id 405059 [13/17]
Downloading sound with id 456121 [14/17]
Downloading sound with id 507666 [15/17]
Downloading sound with id 472235 [16/17]
Downloading sound with id 482925 [17/17]
Query bpm:49
Downloading sound with id 107525 [1/12]
Downloading sound with id 107525 [2/12]
Downloading sound with id 477272 [3/12]
Downloading sound with id 477087 [4/12]
Downloading sound with id 302504 [5/12]
Downloading sound with id 294810 [6/12]
Downloading sound with id 295524 [7/12]
Downloading sound with id 294375 [8/12]
Downloading sound with id 249483 [9/12]
Downloading sound with id 292991 [10/12]
Downloading sound with id 496832 [11/12]
Downloading sound with id 239517 [12/12]
Query bpm:50
Downloading sound with id 215227 [1/17]
Dow

Downloading sound with id 352549 [3/9]
Downloading sound with id 302516 [4/9]
Downloading sound with id 294387 [5/9]
Downloading sound with id 294822 [6/9]
Downloading sound with id 295536 [7/9]
Downloading sound with id 108841 [8/9]
Downloading sound with id 293014 [9/9]
Query bpm:62
Downloading sound with id 44910 [1/11]
Downloading sound with id 428142 [2/11]
Downloading sound with id 477304 [3/11]
Downloading sound with id 294388 [4/11]
Downloading sound with id 294823 [5/11]
Downloading sound with id 295537 [6/11]
Downloading sound with id 302517 [7/11]
Downloading sound with id 404854 [8/11]
Downloading sound with id 344082 [9/11]
Downloading sound with id 342947 [10/11]
Downloading sound with id 456121 [11/11]
Query bpm:63
Downloading sound with id 48949 [1/12]
Downloading sound with id 215269 [2/12]
Downloading sound with id 477248 [3/12]
Downloading sound with id 302518 [4/12]
Downloading sound with id 294389 [5/12]
Downloading sound with id 295538 [6/12]
Downloading sound wit

Downloading sound with id 302527 [7/15]
Downloading sound with id 294833 [8/15]
Downloading sound with id 295547 [9/15]
Downloading sound with id 294399 [10/15]
Downloading sound with id 293008 [11/15]
Downloading sound with id 416797 [12/15]
Downloading sound with id 428301 [13/15]
Downloading sound with id 489761 [14/15]
Downloading sound with id 428358 [15/15]
Query bpm:73
Downloading sound with id 126061 [1/13]
Downloading sound with id 371945 [2/13]
Downloading sound with id 126061 [3/13]
Downloading sound with id 371945 [4/13]
Downloading sound with id 477300 [5/13]
Downloading sound with id 352837 [6/13]
Downloading sound with id 415328 [7/13]
Downloading sound with id 476962 [8/13]
Downloading sound with id 302528 [9/13]
Downloading sound with id 294400 [10/13]
Downloading sound with id 294834 [11/13]
Downloading sound with id 293009 [12/13]
Downloading sound with id 39304 [13/13]
Query bpm:74
Downloading sound with id 242482 [1/18]
Downloading sound with id 414997 [2/18]
Downl

Downloading sound with id 427513 [42/121]
Downloading sound with id 360353 [43/121]
Downloading sound with id 178602 [44/121]
Downloading sound with id 78587 [45/121]
Downloading sound with id 78689 [46/121]
Downloading sound with id 160549 [47/121]
Downloading sound with id 160724 [48/121]
Downloading sound with id 216067 [49/121]
Downloading sound with id 376024 [50/121]
Downloading sound with id 462873 [51/121]
Downloading sound with id 32098 [52/121]
Downloading sound with id 353678 [53/121]
Downloading sound with id 390878 [54/121]
Downloading sound with id 333631 [55/121]
Downloading sound with id 491285 [56/121]
Downloading sound with id 349228 [57/121]
Downloading sound with id 255169 [58/121]
Downloading sound with id 437642 [59/121]
Downloading sound with id 342431 [60/121]
Downloading sound with id 221687 [61/121]
Downloading sound with id 167182 [62/121]
Downloading sound with id 496421 [63/121]
Downloading sound with id 493727 [64/121]
Downloading sound with id 151378 [65/

Downloading sound with id 320123 [34/50]
Downloading sound with id 320238 [35/50]
Downloading sound with id 316708 [36/50]
Downloading sound with id 479385 [37/50]
Downloading sound with id 497539 [38/50]
Downloading sound with id 474780 [39/50]
Downloading sound with id 506927 [40/50]
Downloading sound with id 320116 [41/50]
Downloading sound with id 506925 [42/50]
Downloading sound with id 112810 [43/50]
Downloading sound with id 271098 [44/50]
Downloading sound with id 293021 [45/50]
Downloading sound with id 506926 [46/50]
Downloading sound with id 389521 [47/50]
Downloading sound with id 476924 [48/50]
Downloading sound with id 496744 [49/50]
Downloading sound with id 166177 [50/50]
Query bpm:86
Downloading sound with id 69611 [1/29]
Downloading sound with id 69606 [2/29]
Downloading sound with id 57873 [3/29]
Downloading sound with id 491327 [4/29]
Downloading sound with id 69611 [5/29]
Downloading sound with id 69606 [6/29]
Downloading sound with id 57873 [7/29]
Downloading soun

Downloading sound with id 40725 [79/123]
Downloading sound with id 165095 [80/123]
Downloading sound with id 444869 [81/123]
Downloading sound with id 49115 [82/123]
Downloading sound with id 161249 [83/123]
Downloading sound with id 331465 [84/123]
Downloading sound with id 424847 [85/123]
Downloading sound with id 248298 [86/123]
Downloading sound with id 428346 [87/123]
Downloading sound with id 416244 [88/123]
Downloading sound with id 440985 [89/123]
Downloading sound with id 419413 [90/123]
Downloading sound with id 506375 [91/123]
Downloading sound with id 20510 [92/123]
Downloading sound with id 412901 [93/123]
Downloading sound with id 404008 [94/123]
Downloading sound with id 165918 [95/123]
Downloading sound with id 48930 [96/123]
Downloading sound with id 368661 [97/123]
Downloading sound with id 455587 [98/123]
Downloading sound with id 165916 [99/123]
Downloading sound with id 165917 [100/123]
Downloading sound with id 44909 [101/123]
Downloading sound with id 387369 [102

Downloading sound with id 316706 [60/70]
Downloading sound with id 316697 [61/70]
Downloading sound with id 477055 [62/70]
Downloading sound with id 490926 [63/70]
Downloading sound with id 490930 [64/70]
Downloading sound with id 490931 [65/70]
Downloading sound with id 510401 [66/70]
Downloading sound with id 293031 [67/70]
Downloading sound with id 504770 [68/70]
Downloading sound with id 179358 [69/70]
Downloading sound with id 217606 [70/70]
Query bpm:96
Downloading sound with id 171256 [1/32]
Downloading sound with id 171256 [2/32]
Downloading sound with id 377158 [3/32]
Downloading sound with id 414038 [4/32]
Downloading sound with id 191961 [5/32]
Downloading sound with id 416246 [6/32]
Downloading sound with id 417960 [7/32]
Downloading sound with id 89070 [8/32]
Downloading sound with id 507140 [9/32]
Downloading sound with id 238867 [10/32]
Downloading sound with id 400508 [11/32]
Downloading sound with id 387343 [12/32]
Downloading sound with id 104115 [13/32]
Downloading s

Downloading sound with id 320801 [96/118]
Downloading sound with id 83318 [97/118]
Downloading sound with id 324906 [98/118]
Downloading sound with id 510038 [99/118]
Downloading sound with id 118373 [100/118]
Downloading sound with id 372031 [101/118]
Downloading sound with id 372138 [102/118]
Downloading sound with id 376250 [103/118]
Downloading sound with id 105234 [104/118]
Downloading sound with id 455090 [105/118]
Downloading sound with id 426150 [106/118]
Downloading sound with id 465809 [107/118]
Downloading sound with id 395212 [108/118]
Downloading sound with id 441942 [109/118]
Downloading sound with id 86662 [110/118]
Downloading sound with id 167393 [111/118]
Downloading sound with id 240102 [112/118]
Downloading sound with id 86661 [113/118]
Downloading sound with id 86660 [114/118]
Downloading sound with id 86652 [115/118]
Downloading sound with id 500352 [116/118]
Downloading sound with id 398326 [117/118]
Downloading sound with id 440462 [118/118]
Query bpm:101
Downlo

Downloading sound with id 484054 [18/109]
Downloading sound with id 323406 [19/109]
Downloading sound with id 412617 [20/109]
Downloading sound with id 347743 [21/109]
Downloading sound with id 414994 [22/109]
Downloading sound with id 431182 [23/109]
Downloading sound with id 435384 [24/109]
Downloading sound with id 331175 [25/109]
Downloading sound with id 331189 [26/109]
Downloading sound with id 331178 [27/109]
Downloading sound with id 331187 [28/109]
Downloading sound with id 331176 [29/109]
Downloading sound with id 331186 [30/109]
Downloading sound with id 331181 [31/109]
Downloading sound with id 331182 [32/109]
Downloading sound with id 331179 [33/109]
Downloading sound with id 102010 [34/109]
Downloading sound with id 423278 [35/109]
Downloading sound with id 146688 [36/109]
Downloading sound with id 434121 [37/109]
Downloading sound with id 507995 [38/109]
Downloading sound with id 331180 [39/109]
Downloading sound with id 331185 [40/109]
Downloading sound with id 55393 [4

Query bpm:116
Downloading sound with id 433551 [1/11]
Downloading sound with id 26204 [2/11]
Downloading sound with id 126567 [3/11]
Downloading sound with id 425007 [4/11]
Downloading sound with id 302536 [5/11]
Downloading sound with id 213381 [6/11]
Downloading sound with id 362102 [7/11]
Downloading sound with id 238299 [8/11]
Downloading sound with id 44209 [9/11]
Downloading sound with id 476925 [10/11]
Downloading sound with id 456121 [11/11]
Query bpm:117
Downloading sound with id 117856 [1/9]
Downloading sound with id 13645 [2/9]
Downloading sound with id 117856 [3/9]
Downloading sound with id 13645 [4/9]
Downloading sound with id 500742 [5/9]
Downloading sound with id 270273 [6/9]
Downloading sound with id 200321 [7/9]
Downloading sound with id 276699 [8/9]
Downloading sound with id 316681 [9/9]
Query bpm:118
Downloading sound with id 202248 [1/11]
Downloading sound with id 467875 [2/11]
Downloading sound with id 202248 [3/11]
Downloading sound with id 467875 [4/11]
Downloadi

Downloading sound with id 495629 [148/167]
Downloading sound with id 495636 [149/167]
Downloading sound with id 418630 [150/167]
Downloading sound with id 129282 [151/167]
Downloading sound with id 399907 [152/167]
Downloading sound with id 28434 [153/167]
Downloading sound with id 73229 [154/167]
Downloading sound with id 64608 [155/167]
Downloading sound with id 44014 [156/167]
Downloading sound with id 170446 [157/167]
Downloading sound with id 16190 [158/167]
Downloading sound with id 418951 [159/167]
Downloading sound with id 418566 [160/167]
Downloading sound with id 455100 [161/167]
Downloading sound with id 99182 [162/167]
Downloading sound with id 465005 [163/167]
Downloading sound with id 43531 [164/167]
Downloading sound with id 43539 [165/167]
Downloading sound with id 42023 [166/167]
Downloading sound with id 168747 [167/167]
Query bpm:121
Downloading sound with id 41304 [1/4]
Downloading sound with id 414130 [2/4]
Downloading sound with id 322471 [3/4]
Downloading sound w

Downloading sound with id 331147 [6/33]
Downloading sound with id 271021 [7/33]
Downloading sound with id 169939 [8/33]
Downloading sound with id 352145 [9/33]
Downloading sound with id 136892 [10/33]
Downloading sound with id 115599 [11/33]
Downloading sound with id 331147 [12/33]
Downloading sound with id 474877 [13/33]
Downloading sound with id 455025 [14/33]
Downloading sound with id 432776 [15/33]
Downloading sound with id 509994 [16/33]
Downloading sound with id 365639 [17/33]
Downloading sound with id 190602 [18/33]
Downloading sound with id 187689 [19/33]
Downloading sound with id 405580 [20/33]
Downloading sound with id 277456 [21/33]
Downloading sound with id 302492 [22/33]
Downloading sound with id 294362 [23/33]
Downloading sound with id 294800 [24/33]
Downloading sound with id 295513 [25/33]
Downloading sound with id 36871 [26/33]
Downloading sound with id 36873 [27/33]
Downloading sound with id 36857 [28/33]
Downloading sound with id 484774 [29/33]
Downloading sound with 

Downloading sound with id 336305 [73/131]
Downloading sound with id 361027 [74/131]
Downloading sound with id 329825 [75/131]
Downloading sound with id 381278 [76/131]
Downloading sound with id 324793 [77/131]
Downloading sound with id 434118 [78/131]
Downloading sound with id 255886 [79/131]
Downloading sound with id 466109 [80/131]
Downloading sound with id 348384 [81/131]
Downloading sound with id 348406 [82/131]
Downloading sound with id 39993 [83/131]
Downloading sound with id 294800 [84/131]
Downloading sound with id 403904 [85/131]
Downloading sound with id 262039 [86/131]
Downloading sound with id 122304 [87/131]
Downloading sound with id 490790 [88/131]
Downloading sound with id 490791 [89/131]
Downloading sound with id 274109 [90/131]
Downloading sound with id 403905 [91/131]
Downloading sound with id 353608 [92/131]
Downloading sound with id 223664 [93/131]
Downloading sound with id 364987 [94/131]
Downloading sound with id 133438 [95/131]
Downloading sound with id 86313 [96

Downloading sound with id 413498 [5/11]
Downloading sound with id 166003 [6/11]
Downloading sound with id 507102 [7/11]
Downloading sound with id 40941 [8/11]
Downloading sound with id 107525 [9/11]
Downloading sound with id 176395 [10/11]
Downloading sound with id 143248 [11/11]
Query bpm:139
Downloading sound with id 431191 [1/2]
Downloading sound with id 423079 [2/2]
Query bpm:140
Downloading sound with id 132178 [1/163]
Downloading sound with id 269546 [2/163]
Downloading sound with id 166652 [3/163]
Downloading sound with id 495533 [4/163]
Downloading sound with id 222465 [5/163]
Downloading sound with id 176312 [6/163]
Downloading sound with id 188685 [7/163]
Downloading sound with id 132909 [8/163]
Downloading sound with id 176313 [9/163]
Downloading sound with id 132905 [10/163]
Downloading sound with id 495794 [11/163]
Downloading sound with id 170452 [12/163]
Downloading sound with id 137979 [13/163]
Downloading sound with id 222458 [14/163]
Downloading sound with id 436479 [

Downloading sound with id 123295 [3/21]
Downloading sound with id 449636 [4/21]
Downloading sound with id 415007 [5/21]
Downloading sound with id 111255 [6/21]
Downloading sound with id 107846 [7/21]
Downloading sound with id 47249 [8/21]
Downloading sound with id 391891 [9/21]
Downloading sound with id 322316 [10/21]
Downloading sound with id 382328 [11/21]
Downloading sound with id 351979 [12/21]
Downloading sound with id 479382 [13/21]
Downloading sound with id 209550 [14/21]
Downloading sound with id 161826 [15/21]
Downloading sound with id 202078 [16/21]
Downloading sound with id 44615 [17/21]
Downloading sound with id 490922 [18/21]
Downloading sound with id 490923 [19/21]
Downloading sound with id 449722 [20/21]
Downloading sound with id 166085 [21/21]
Query bpm:146
Downloading sound with id 490696 [1/4]
Downloading sound with id 213852 [2/4]
Downloading sound with id 368409 [3/4]
Downloading sound with id 469513 [4/4]
Query bpm:147
Downloading sound with id 490939 [1/5]
Downloa

Downloading sound with id 474991 [36/73]
Downloading sound with id 258344 [37/73]
Downloading sound with id 249452 [38/73]
Downloading sound with id 222901 [39/73]
Downloading sound with id 217345 [40/73]
Downloading sound with id 389773 [41/73]
Downloading sound with id 145607 [42/73]
Downloading sound with id 32873 [43/73]
Downloading sound with id 322315 [44/73]
Downloading sound with id 507030 [45/73]
Downloading sound with id 468226 [46/73]
Downloading sound with id 110605 [47/73]
Downloading sound with id 245271 [48/73]
Downloading sound with id 434785 [49/73]
Downloading sound with id 274961 [50/73]
Downloading sound with id 179219 [51/73]
Downloading sound with id 386965 [52/73]
Downloading sound with id 474332 [53/73]
Downloading sound with id 434351 [54/73]
Downloading sound with id 86209 [55/73]
Downloading sound with id 27506 [56/73]
Downloading sound with id 21573 [57/73]
Downloading sound with id 125365 [58/73]
Downloading sound with id 471037 [59/73]
Downloading sound wi

Downloading sound with id 372270 [27/40]
Downloading sound with id 348653 [28/40]
Downloading sound with id 401612 [29/40]
Downloading sound with id 151330 [30/40]
Downloading sound with id 347836 [31/40]
Downloading sound with id 500648 [32/40]
Downloading sound with id 436808 [33/40]
Downloading sound with id 336216 [34/40]
Downloading sound with id 398327 [35/40]
Downloading sound with id 411769 [36/40]
Downloading sound with id 475151 [37/40]
Downloading sound with id 54153 [38/40]
Downloading sound with id 223196 [39/40]
Downloading sound with id 334940 [40/40]
Query bpm:181
Downloading sound with id 456121 [1/1]
Query bpm:182
Query bpm:183
Query bpm:184
Downloading sound with id 111301 [1/2]
Downloading sound with id 443135 [2/2]
Query bpm:185
Downloading sound with id 123293 [1/10]
Downloading sound with id 163665 [2/10]
Downloading sound with id 65186 [3/10]
Downloading sound with id 123293 [4/10]
Downloading sound with id 65186 [5/10]
Downloading sound with id 163665 [6/10]
Do

Unnamed: 0,bpm,description,file_id,license,name,path,username
0,90,Tenor sax sample recorded at 44.1khz by myself...,670,http://creativecommons.org/licenses/by-nc/3.0/,too damn funky 16 beat Dm 90.wav,./audio_corpora_freesound/670_586-hq.ogg,simondsouza
1,123,make music this beats\nprocessed beat loop,872,http://creativecommons.org/licenses/by/3.0/,beats_123bpm.aif,./audio_corpora_freesound/872_872-hq.ogg,kotatsu
2,90,90 bpm noisy distorted guitar loop recorded wi...,1376,http://creativecommons.org/licenses/sampling+/...,90_bpm_parker.wav,./audio_corpora_freesound/1376_838-hq.ogg,sleep
3,132,132bpm loop made with reaktor,1886,http://creativecommons.org/licenses/by-nc/3.0/,machine1.aif,./audio_corpora_freesound/1886_176-hq.ogg,virotic
4,126,126bpm 4/4. 5+ measures with a fill. recorded ...,1979,http://creativecommons.org/licenses/by/3.0/,rbh 126bpm Tabla 06.wav,./audio_corpora_freesound/1979_1112-hq.ogg,RHumphries
5,133,This is loop 1 in a series of 16 variations. T...,2615,http://creativecommons.org/licenses/by/3.0/,133 bpm ATTACK LOOP 04 electrified analog kit ...,./audio_corpora_freesound/2615_2518-hq.ogg,Jovica
6,150,"Smokin'\r\nharmonica from Austin, Texas. These...",2631,http://creativecommons.org/licenses/by-nc/3.0/,Harp1.wav,./audio_corpora_freesound/2631_5150-hq.ogg,TexasMusicForge
7,83,"ambientloop, recorded with real rhodes!!",4048,http://creativecommons.org/licenses/by/3.0/,ambientloop83bpm.wav,./audio_corpora_freesound/4048_7740-hq.ogg,RealRhodesSounds
8,100,layered waves arpegiated @100bpm in c,8139,http://creativecommons.org/licenses/sampling+/...,layerarp100bpm in c.wav,./audio_corpora_freesound/8139_18307-hq.ogg,natmoon
9,100,layered waves arpegiated @100bpm in c,8140,http://creativecommons.org/licenses/sampling+/...,layerdarp 100bpm in c.wav,./audio_corpora_freesound/8140_18307-hq.ogg,natmoon


## Write annotations on files

In [13]:
import ntpath


file_already_in_folder = []
if not os.path.exists(FILES_DIR_ANNOTATIONS): 
    os.mkdir(FILES_DIR_ANNOTATIONS)

df_data = pd.read_csv(open(DATAFRAME_FILENAME), index_col=0)

for index, row in df_data.iterrows():
            bpm = row['bpm']
            file_id_path = row['path']
            name = ntpath.basename(file_id_path).split('.')[0]
            file_name = str(name) + ".bpm"
            full_path = os.path.join(FILES_DIR_ANNOTATIONS, file_name)
            
            f = open(full_path, "a")
            f.write(str(bpm))
            f.close()
            print("write file: {}".format(file_name))


write file_id 510823
write file_id 510823
write file_id 510823
write file_id 510823
write file_id 510823
write file_id 510823
write file_id 510823
write file_id 510823
write file_id 510823
write file_id 510823
write file_id 510823
write file_id 510823
write file_id 510823
write file_id 510823
write file_id 510823
write file_id 510823
write file_id 510823
write file_id 510823
write file_id 510823
write file_id 510823
write file_id 510823
write file_id 510823
write file_id 510823
write file_id 510823
write file_id 510823
write file_id 510823
write file_id 510823
write file_id 510823
write file_id 510823
write file_id 510823
write file_id 510823
write file_id 510823
write file_id 510823
write file_id 510823
write file_id 510823
write file_id 510823
write file_id 510823
write file_id 510823
write file_id 510823
write file_id 510823
write file_id 510823
write file_id 510823
write file_id 510823
write file_id 510823
write file_id 510823
write file_id 510823
write file_id 510823
write file_id

write file_id 510823
write file_id 510823
write file_id 510823
write file_id 510823
write file_id 510823
write file_id 510823
write file_id 510823
write file_id 510823
write file_id 510823
write file_id 510823
write file_id 510823
write file_id 510823
write file_id 510823
write file_id 510823
write file_id 510823
write file_id 510823
write file_id 510823
write file_id 510823
write file_id 510823
write file_id 510823
write file_id 510823
write file_id 510823
write file_id 510823
write file_id 510823
write file_id 510823
write file_id 510823
write file_id 510823
write file_id 510823
write file_id 510823
write file_id 510823
write file_id 510823
write file_id 510823
write file_id 510823
write file_id 510823
write file_id 510823
write file_id 510823
write file_id 510823
write file_id 510823
write file_id 510823
write file_id 510823
write file_id 510823
write file_id 510823
write file_id 510823
write file_id 510823
write file_id 510823
write file_id 510823
write file_id 510823
write file_id

write file_id 510823
write file_id 510823
write file_id 510823
write file_id 510823
write file_id 510823
write file_id 510823
write file_id 510823
write file_id 510823
write file_id 510823
write file_id 510823
write file_id 510823
write file_id 510823
write file_id 510823
write file_id 510823
write file_id 510823
write file_id 510823
write file_id 510823
write file_id 510823
write file_id 510823
write file_id 510823
write file_id 510823
write file_id 510823
write file_id 510823
write file_id 510823
write file_id 510823
write file_id 510823
write file_id 510823
write file_id 510823
write file_id 510823
write file_id 510823
write file_id 510823
write file_id 510823
write file_id 510823
write file_id 510823
write file_id 510823
write file_id 510823
write file_id 510823
write file_id 510823
write file_id 510823
write file_id 510823
write file_id 510823
write file_id 510823
write file_id 510823
write file_id 510823
write file_id 510823
write file_id 510823
write file_id 510823
write file_id

write file_id 510823
write file_id 510823
write file_id 510823
write file_id 510823
write file_id 510823
write file_id 510823
write file_id 510823
write file_id 510823
write file_id 510823
write file_id 510823
write file_id 510823
write file_id 510823
write file_id 510823
write file_id 510823
write file_id 510823
write file_id 510823
write file_id 510823
write file_id 510823
write file_id 510823
write file_id 510823
write file_id 510823
write file_id 510823
write file_id 510823
write file_id 510823
write file_id 510823
write file_id 510823
write file_id 510823
write file_id 510823
write file_id 510823
write file_id 510823
write file_id 510823
write file_id 510823
write file_id 510823
write file_id 510823
write file_id 510823
write file_id 510823
write file_id 510823
write file_id 510823
write file_id 510823
write file_id 510823
write file_id 510823
write file_id 510823
write file_id 510823
write file_id 510823
write file_id 510823
write file_id 510823
write file_id 510823
write file_id

## Clean files 

In [22]:
files_in_folder =  listdir(FILES_DIR)
files_in_folder = [os.path.join(FILES_DIR, name) for name in files_in_folder]

df_data = pd.read_csv(open(DATAFRAME_FILENAME), index_col=0)
files_in_csv = df_data['path'].tolist()


count = 0
for file in files_in_folder:
    if file not in files_in_csv:
        os.remove(file)
        count = count+1
print("file deleted: {}".format(count))

file deleted: 0
