## Download freedb dataset

In [None]:
!rm -f freedb-complete-*.tar.bz2*
!wget http://ftp.freedb.org/pub/freedb/freedb-complete-20161101.tar.bz2

In [None]:
!tar -xf freedb-complete-*.tar.bz2

---
## Setup cloud foundry utilities

In [1]:
!pip install --user --upgrade --quiet git+https://github.com/snowch/nb_utils

In [2]:
from getpass import getpass

ibm_id = getpass("ibm id: ")
ibm_id_password = getpass("ibm id password: ")
bluemix_organization_name = 'chris.snow@uk.ibm.com'
bluemix_space_name = 'dev'

#   https://api.ng.bluemix.net     - for the US South Region
#   https://api.eu-gb.bluemix.net  - for the UK
#   https://api.au-syd.bluemix.net - for Australia

target_endpoint = 'https://api.ng.bluemix.net'

ibm id: ········
ibm id password: ········


In [59]:
from cf_utils import cf_utils
cf = cf_utils.CloudFoundryUtil(target_endpoint, ibm_id, ibm_id_password, bluemix_organization_name, bluemix_space_name)

# cf.search_plans('cloudant')
# >> cloudant lite service_guide = 14c83ad2-6fd4-439a-8c3a-d1a20f8a2381

## Create cloudant service instance and database

In [11]:
cf.create_service_instance('14c83ad2-6fd4-439a-8c3a-d1a20f8a2381', 'my_cloudant', create_default_credentials = True)

In [12]:
# print(cf.get_service_credentials('my_cloudant'))

In [58]:
cl_url  = cf.get_service_credentials('my_cloudant')['url']
cl_host = cf.get_service_credentials('my_cloudant')['host']
cl_user = cf.get_service_credentials('my_cloudant')['username']
cl_pass = cf.get_service_credentials('my_cloudant')['password']

Delete database

In [52]:
import requests
response = requests.delete(cl_url + '/musicdb', auth=(cl_user, cl_pass))
print(response.text)

{"ok":true}



Create database

In [53]:
import requests
response = requests.put(cl_url + '/musicdb', auth=(cl_user, cl_pass))
print(response.text)

{"ok":true}



## Parse freedb dataset and upload to cloudant

In [None]:
!pip install --user --upgrade git+https://github.com/snowch/freedb_parser

In [54]:
from arubomu.parsers import freedb
import os, sys, json

IMPORT_NUM = 10000 # set to -1 to load everything

num_imported = 0
for album_id in os.listdir("./rock"):
    
    with open("./rock/" + album_id, "rb") as f:
        fname = os.path.basename(f.name)
        album = freedb.parseText(f.read())
        
        if album.title:
            title = album.title.strip().decode('utf-8','ignore').encode("utf-8")
            
        if album.artist:
            artist = album.artist.strip().decode('utf-8','ignore').encode("utf-8")
        
        album_json = {
            '_id'      : fname,
            'title'    : title,
            'artist'   : artist,
        }
        try:
            # TODO batch these requests
            response = requests.post(cl_url + '/musicdb', auth=(cl_user, cl_pass), data=json.dumps(album_json), headers={'Content-Type':'application/json'})
            # print status using a dot for each record loaded
            sys.stdout.write('.')
            sys.stdout.flush()
        except:
            print "Unexpected error: {0} processing {1}".format(sys.exc_info()[0], album_id)
            
    if IMPORT_NUM > -1 and num_imported >= IMPORT_NUM:
        break
        
    num_imported = num_imported + 1

........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................

## Create a search index on title and artist

In [55]:
idx = {
    "index": {
        "fields": [
            {
                "name": "title",
                "type": "string"
            },
            {
                "name": "artist",
                "type": "string"
            }
        ]
    },
    "name": "title-artist-text",
    "type": "text"
}

response = requests.post(cl_url + '/musicdb/_index', auth=(cl_user, cl_pass), data=json.dumps(idx), headers={'Content-Type':'application/json'})
print(response.text)

{"result":"created","id":"_design/51b2ceee808dabf40825d9792ee8e929e9f4102c","name":"title-artist-text"}



## Test the search index

Try a few different searchs

In [56]:
qry = { 
    "selector": {
      "$text": "Floyd"
    }
}
    
response = requests.post(cl_url + '/musicdb/_find', auth=(cl_user, cl_pass), data=json.dumps(qry), headers={'Content-Type':'application/json'})
print(response.text)  

{"docs":[
{"_id":"380e3a05","_rev":"1-121dd602ec035afd09a2f10ecc9f3e69","artist":"Pink Floyd","title":"Interstellar Encore"},
{"_id":"3909c305","_rev":"1-cca4369c582ee56d1f526463f0cf6c1b","artist":"Pink Floyd","title":"Animals"},
{"_id":"8211d70b","_rev":"1-4acf5d109f322628405122b4492db5a4","artist":"Pink Floyd","title":"Pulse Disc 1"},
{"_id":"65120c09","_rev":"1-54315b397fc6aa815e87c28f93c702b9","artist":"PINK FLOYD","title":"Stranger Than Fiction"},
{"_id":"c912ae0d","_rev":"1-c8e4bc72c13d2b7f2c8cd9189040d123","artist":"Pink Floyd","title":"Echoes Collection"},
{"_id":"15024302","_rev":"1-251f61543ea80682226a90a6a2ba2aaf","artist":"Pink Floyd","title":"On The Turning Away"},
{"_id":"1a103213","_rev":"1-7300f522581f118f0fd872fd176ebcdd","artist":"Pretty Boy Floyd","title":"The Vault"},
{"_id":"c70bc80f","_rev":"1-e3f51b2d542acfe122c86cb43a6623fb","artist":"Pink Floyd","title":"Every Brick In The Wall - CD 1"},
{"_id":"cc11060e","_rev":"1-1c34882ce95ce35d2a16cbddbac25612","ar

In [57]:
qry = { 
    "selector": {
      "$text": "Wall"
    }
}
    
response = requests.post(cl_url + '/musicdb/_find', auth=(cl_user, cl_pass), data=json.dumps(qry), headers={'Content-Type':'application/json'})
print(response.text)  

{"docs":[
{"_id":"c70bc80f","_rev":"1-e3f51b2d542acfe122c86cb43a6623fb","artist":"Pink Floyd","title":"Every Brick In The Wall - CD 1"},
{"_id":"cc11060e","_rev":"1-1c34882ce95ce35d2a16cbddbac25612","artist":"Pink Floyd","title":"Behind the Wall (CD 2)"}
],
"bookmark": "g1AAAACaeJzLYWBgYMpgTmFQSUlKzi9KdUhJMtVLytVNTtZNSczRNTDUS87JL01JzCvRy0styQGpzmMBkgwHgNT____PZ2UwuTkwuHk4AMUSO1FNMiTKpAcQk95DTGLmAokl1mcBAIs4LRY"}




## Delete cloudant service

In [9]:
cf.delete_service('my_cloudant', force=True)


