## Download freedb dataset

In [10]:
%%bash
if [[ "$(md5sum freedb-complete-20161101.tar.bz2)" == "2182ec11b8cd61eea43d13169f28c894  freedb-complete-20161101.tar.bz2" ]]
then 
    echo 'Dataset already downloaded.'
else
    wget http://ftp.freedb.org/pub/freedb/freedb-complete-20161101.tar.bz2
    tar -xf freedb-complete-20161101.tar.bz2
fi

Dataset already downloaded.


---
## Setup cloud foundry utilities

In [None]:
!pip install --user --upgrade --quiet git+https://github.com/snowch/nb_utils

In [11]:
from getpass import getpass

ibm_id = getpass("ibm id: ")
ibm_id_password = getpass("ibm id password: ")
bluemix_organization_name = 'chris.snow@uk.ibm.com'
bluemix_space_name = 'dev'

#   https://api.ng.bluemix.net     - for the US South Region
#   https://api.eu-gb.bluemix.net  - for the UK
#   https://api.au-syd.bluemix.net - for Australia

target_endpoint = 'https://api.ng.bluemix.net'

ibm id: ········
ibm id password: ········


In [22]:
from cf_utils import cf_utils
cf = cf_utils.CloudFoundryUtil(target_endpoint, ibm_id, ibm_id_password, bluemix_organization_name, bluemix_space_name)

# cf.search_plans('cloudant')
# >> cloudant lite service_guide = 14c83ad2-6fd4-439a-8c3a-d1a20f8a2381

## Create cloudant service instance and database

In [40]:
!pip install --user cloudant



In [23]:
cf.create_service_instance('14c83ad2-6fd4-439a-8c3a-d1a20f8a2381', 'my_cloudant', create_default_credentials = True)

In [24]:
vcap = {
  "cloudantNoSQLDB": [
    {
      "credentials": cf.get_service_credentials('my_cloudant')
    } ]
}
import json
#print(json.dumps(vcap))

In [25]:
cl_url  = cf.get_service_credentials('my_cloudant')['url']
cl_host = cf.get_service_credentials('my_cloudant')['host']
cl_user = cf.get_service_credentials('my_cloudant')['username']
cl_pass = cf.get_service_credentials('my_cloudant')['password']

Delete database

In [45]:
import requests
response = requests.delete(cl_url + '/musicdb', auth=(cl_user, cl_pass))
print(response.text)

{"ok":true}



Create database

In [46]:
import requests
response = requests.put(cl_url + '/musicdb', auth=(cl_user, cl_pass))
print(response.text)

{"ok":true}



In [35]:
!ls

COPYING				  jazz			 nohup.out
README				  messagehub.properties  ratings.dat
blues				  misc			 recommender_model
classical			  ml-1m			 recommender_model.tgz
country				  ml-1m.zip		 reggae
data				  ml-1m.zip.1		 rock
demo_2710			  ml-1m.zip.2		 soundtrack
folk				  ml-1m.zip.3
freedb-complete-20161101.tar.bz2  newage


## Parse freedb dataset and upload to cloudant

In [28]:
!pip install --user --upgrade git+https://github.com/snowch/freedb_parser

Collecting git+https://github.com/snowch/freedb_parser
  Cloning https://github.com/snowch/freedb_parser to /gpfs/fs01/user/s85d-88ebffb000cc3e-39ca506ba762/notebook/tmp/pip-nj16Pe-build
Installing collected packages: freedb-parser
  Found existing installation: freedb-parser 0.1
    Uninstalling freedb-parser-0.1:
      Successfully uninstalled freedb-parser-0.1
  Running setup.py install for freedb-parser ... [?25l- done
[?25hSuccessfully installed freedb-parser-0.1


In [None]:
from arubomu.parsers import freedb
import os, sys, json

from cloudant.client import Cloudant
from cloudant.adapters import Replay429Adapter

client = Cloudant(cl_user, cl_pass, url=cl_url, adapter=Replay429Adapter(retries=10))
client.connect()
my_database = client['musicdb']

IMPORT_NUM = -1 # set to -1 to load everything

categories = ['blues', 'classical', 'country', 'folk', 'jazz', 'misc', 'newage', 'reggae', 'rock', 'sountrack']

for category in categories:
    num_imported = 0
    for album_id in os.listdir("./" + category):

        with open("./" + category + "/" + album_id, "rb") as f:
            fname = os.path.basename(f.name)
            album = freedb.parseText(f.read())

            if album.title:
                title = album.title.strip().decode('utf-8','ignore').encode("utf-8")

            if album.artist:
                artist = album.artist.strip().decode('utf-8','ignore').encode("utf-8")

            album_json = {
                '_id'      : fname,
                'title'    : title,
                'artist'   : artist,
            }
            try:
                # TODO batch these requests
                my_document = my_database.create_document(album_json)
                if my_document.exists():
                    sys.stdout.write('+')
                else:
                    sys.stdout.write('-')
                sys.stdout.flush()
            except:
                print "\nUnexpected error: {0} processing {1}".format(sys.exc_info()[0], album_id)

        if IMPORT_NUM > -1 and num_imported >= IMPORT_NUM:
            break

        num_imported = num_imported + 1

++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

## Create a search index on title and artist

In [30]:
idx = {
    "index": {
        "fields": [
            {
                "name": "title",
                "type": "string"
            },
            {
                "name": "artist",
                "type": "string"
            }
        ]
    },
    "name": "title-artist-text",
    "type": "text"
}

response = requests.post(cl_url + '/musicdb/_index', auth=(cl_user, cl_pass), data=json.dumps(idx), headers={'Content-Type':'application/json'})
print(response.text)

{"result":"created","id":"_design/51b2ceee808dabf40825d9792ee8e929e9f4102c","name":"title-artist-text"}



## Test the search index

Try a few different searchs

In [31]:
qry = { 
    "selector": {
      "$text": "Floyd"
    }
}
    
response = requests.post(cl_url + '/musicdb/_find', auth=(cl_user, cl_pass), data=json.dumps(qry), headers={'Content-Type':'application/json'})
print(response.text)  

{"docs":[
{"_id":"bf129f0d","_rev":"1-20a95c944f5c4cf8875028d19005a0e2","artist":"Pink Floyd","title":"Echoes - Best of Pink Floyd CD2"},
{"_id":"380e3a05","_rev":"1-121dd602ec035afd09a2f10ecc9f3e69","artist":"Pink Floyd","title":"Interstellar Encore"},
{"_id":"3d09ca05","_rev":"1-cca4369c582ee56d1f526463f0cf6c1b","artist":"Pink Floyd","title":"Animals"},
{"_id":"c912ae0d","_rev":"1-c8e4bc72c13d2b7f2c8cd9189040d123","artist":"Pink Floyd","title":"Echoes Collection"},
{"_id":"6b096208","_rev":"1-43ce5fa1064e55bf8c0000797cd7f139","artist":"Pink Floyd","title":"Concertgebouw - The Man"},
{"_id":"65120c09","_rev":"1-54315b397fc6aa815e87c28f93c702b9","artist":"PINK FLOYD","title":"Stranger Than Fiction"},
{"_id":"3709c305","_rev":"1-cca4369c582ee56d1f526463f0cf6c1b","artist":"Pink Floyd","title":"Animals"},
{"_id":"890b0b0b","_rev":"1-a9627a202dfc2e79630cf6092a89cb44","artist":"Pink Floyd","title":"Vol. 1"},
{"_id":"ae10d40d","_rev":"1-a0e8a0f4d274d6cef69a4ecfd616584c","artist":"Pi

In [32]:
qry = { 
    "selector": {
      "$text": "Wall"
    }
}
    
response = requests.post(cl_url + '/musicdb/_find', auth=(cl_user, cl_pass), data=json.dumps(qry), headers={'Content-Type':'application/json'})
print(response.text)  

{"docs":[
{"_id":"f8107022","_rev":"1-6bc21b9716347634041fe2e563c63b30","artist":"Paul Wall","title":"How To Be A Player"},
{"_id":"c412900f","_rev":"1-3d74b9aabb6f1faedfd837021a11ad1c","artist":"Roger Waters","title":"The Wall, Santiago 5.3 2002 cd2"},
{"_id":"c70bc80f","_rev":"1-e3f51b2d542acfe122c86cb43a6623fb","artist":"Pink Floyd","title":"Every Brick In The Wall - CD 1"},
{"_id":"cc11060e","_rev":"1-1c34882ce95ce35d2a16cbddbac25612","artist":"Pink Floyd","title":"Behind the Wall (CD 2)"},
{"_id":"d40c110e","_rev":"1-c520f895c71326091dd2ba89415e8c3a","artist":"Pink Floyd","title":"The Wall Live Earl's Court 1981 Disc 2"},
{"_id":"b5114f1e","_rev":"1-19610b6fb98e3acdb657b82cba8ad162","artist":"Basement Wall - New Breed","title":"The New Breed - Want Ad Reader"},
{"_id":"4a04b706","_rev":"1-e54c18557eccb63c703d4f7224ceb3a3","artist":"Bottomed Out - Before I Go","title":"Your Picture's On My Wall"}
],
"bookmark": "g1AAAAIIeJzLYWBgYM9gTmFQSUlKzi9KdUhJMtNLytVNTtZNSczRNTDUS87JL

## Delete cloudant service

In [None]:
cf.delete_service('my_cloudant', force=True)

In [None]:
!git clone https://github.com/snowch/demo_2710

In [None]:
!cd demo_2710 && git pull

In [None]:
!cd demo_2710/web_app && pip install -r requirements.txt && ./run.sh