In [None]:
import os
import codecs
import compmusic
from compmusic import dunya as dn
from compmusic.dunya import hindustani as hi
from compmusic.dunya import carnatic as ca
from compmusic.dunya import docserver as ds
from compmusic import musicbrainz
import json
import numpy as np

In [None]:
token = "" #<get your api token form: https://dunya.compmusic.upf.edu/user/profile/>

# Ids of CC collection for both Carnatic and Hindustani music tradition
carnatic_cc = dict(id='a163c8f2-b75f-4655-86be-1504ea2944c2',
                  name='carnatic')
hindustani_cc = dict(id='6adc54c6-6605-4e57-8230-b85f1de5be2b',
                  name='hindustani')
collections = [carnatic_cc, hindustani_cc]

# all the types of files with their apt params to be downloaded. Note that some files like pitch-vocal might be irrelevant for Hindustani tradition
files_download = dict(audio=dict(thetype='mp3', subtype='source', file_type='mp3'),
             pitch=dict(thetype='pitch', subtype='pitch', file_type='txt'),
            pitch_vocal=dict(thetype='pitch-vocal', subtype='source', file_type='txt'),
             tonic=dict(thetype='ctonic', subtype='tonic', file_type='txt'),
             sama=dict(thetype='sama-manual', subtype='source', file_type='txt'),
             bpm=dict(thetype='bpm-manual', subtype='source', file_type='txt'),
             tempo=dict(thetype='tempo-manual', subtype='source', file_type='txt'),
             sections=dict(thetype='sections-manual-p', subtype='source', file_type='txt'),
             phrases=dict(thetype='mphrases-manual', subtype='source', file_type='txt')
            )

In [None]:
# concept mapping, since the names are different in different tradition
mapp={}
for collection in collections:
    if collection['name']=='hindustani':
        release = 'release'  
        recording = 'title'
    elif collection['name']=='carnatic':
        release = 'concert'  # in carnatic album level items are referred by 'concerts'
        recording = 'title'
    m = {'release':release, 'recording':recording}
    mapp[collection['name']] = m

In [None]:
def get_mbids_in_collection(collection_id, music_tradition, token):
    """
    fetches mbids in a collection
    """
    if music_tradition == 'hindustani':
        tradition = hi
    elif music_tradition == 'carnatic':
        tradition = ca        
    
    dn.set_token(token)
    tradition.set_collections([collection_id])
    recs = tradition.get_recordings()
    return [r['mbid'] for r in recs]

def get_recording_info(music_tradition, token,  mbid):
    """
    fetches recording info
    """
    
    if music_tradition == 'hindustani':
        tradition = hi
    elif music_tradition == 'carnatic':
        tradition = ca        
    
    dn.set_token(token)    
    return tradition.get_recording(mbid)

def download_file(mbid, thetype, subtype, file_type, file_path):
    """
    Utility function (wrapper) to download a file
    """
    try:
        content = dn.file_for_document(mbid, thetype, subtype)
        
        if feature_info['thetype']=='pitch':
            content = json.loads(content)
            np.savetxt(file_path, content, delimiter='\t')
        else:
            fid = open(file_path,'wb')
            fid.write(content)
            fid.close()
    except Exception as e:
        raise e
    return True

# Download all the relevant files within to a music tradition

In [None]:
root_dir = ""# folder where you want to download the collection
if not os.path.isdir(root_dir):
    os.makedirs(root_dir)

In [None]:
fid_errors = open(os.path.join(root_dir,'errors.txt'),'w')
fid_errors.close()

dn.set_token(token)
status = []
for collection in collections[1:]:
    mbid_count = 0
    # setting the collection
    if collection['name'] == 'hindustani':
        tradition = hi
    elif collection['name'] == 'carnatic':
        tradition = ca        

    try:
        tradition.set_collections([collection['id']])
        
        # fetching all recordings in that collection
        recs = tradition.get_recordings()
        for rec in recs:
            try:
                mbid = rec['mbid']
                rec_info = tradition.get_recording(mbid)
                release = rec_info[mapp[collection['name']]['release']][0]['title']
                recording = rec_info['title']
                artist = rec_info['album_artists'][0]['name']
                
                # sometimes release and recording names contain strange chars like "/"
                release = release.replace("/","_")
                recording = recording.replace("/","_")
                artist = artist.replace("/","_")
                
                file_dir = os.path.join(root_dir, collection['name'], "%s by %s"%(release, artist), recording)
                if not os.path.isdir(file_dir):
                    os.makedirs(file_dir)
                #dumping json file for the metadata
                json.dump(rec_info, open(os.path.join(file_dir, recording+'.json'),'w'))
                
                # downloading stuff from the server
                for feature_type, feature_info in files_download.items():
                    if feature_info['thetype'] == 'mp3':
                        ext = ".%s"%(feature_info['thetype'])
                    else:
                        ext = ".%s.%s"%(feature_info['thetype'], feature_info['file_type'])
                    file_path = os.path.join(file_dir, recording+ext)
                    if not os.path.isfile(file_path):
                        try:
                            download_status = download_file(mbid, feature_info['thetype'],  feature_info['subtype'],  feature_info['file_type'], file_path)
                        except Exception as e:
                            download_status = False
                    else:
                        download_status = True
                    status.append(dict(release=release,
                                      recording=recording,
                                      feature_type=feature_type,
                                      present=True))
                print(collection['name'], mbid_count)
                mbid_count += 1
            except Exception as e:
                raise e
    except Exception as e:
        fid_errors = open(os.path.join(root_dir,'errors.txt'),'a')
        fid_errors.write(str(e))
        fid_errors.close()
        