diff --git a/README.md b/README.md index 4838565..5243628 100644 --- a/README.md +++ b/README.md @@ -59,9 +59,17 @@ The makefile builds an example code generator that uses libcodegen, called "code Will take 30 seconds of audio from 10 seconds into the file and output JSON suitable for querying: - {"metadata":{"artist":"Michael jackson", "release":"800 chansons des annes 80", "title":"Billie jean", "genre":"", "bitrate":192, "sample_rate":44100, "seconds":294, "filename":"billie_jean.mp3", "samples_decoded":220598, "given_duration":30, "start_offset":10, "version":4.00}, "code_count":846, "code":"JxVlIuNwzAMQ1fxCDL133+xo1rnGqNAEcWy/ERa2aKeZmW... + [ + {"metadata":{"artist":"Michael Jackson", "release":"Thriller", "title":"Billie Jean", "genre":"", "bitrate":128,"sample_rate":44100, "duration":294, "filename":"billie_jean.mp3", "samples_decoded":330902, "given_duration":30, "start_offset":10, "version":4.12, "codegen_time":0.087329, "decode_time":0.297166}, "code_count":906, "code":"eJztmm2OZacORafEt2E4YGD-Q8jCt1UnXdKlIlVa ..." + ] -You can host your own [Echoprint server](http://github.com/echonest/echoprint-server "echoprint-server") and ingest or query to that. +You can POST this JSON directly to the Echo Nest's [song/identify](http://developer.echonest.com/docs/v4/song.html#identify "song/identify") (who has an Echoprint server booted), for example: + + curl -F "query=@post_string" http://developer.echonest.com/api/v4/song/identify?api_key=YOUR_KEY + {"response": {"status": {"version": "4.2", "code": 0, "message": "Success"}, "songs": [{"tag": 0, "score": 273, "title": "Billie Jean", "message": "OK (match type 6)", "artist_id": "ARXPPEY1187FB51DF4", "artist_name": "Michael Jackson", "id": "SOJIZLV12A58A78309"}]}} + (you can also use GET, see the API description) + +Or you can host your own [Echoprint server](http://github.com/echonest/echoprint-server "echoprint-server") and ingest or query to that. Codegen also runs in a multithreaded mode for bulk resolving: @@ -69,6 +77,29 @@ Codegen also runs in a multithreaded mode for bulk resolving: Will compute codes for every file in file_list for 30 seconds starting at 10 seconds. (It tries to be smart about the number of threads to use.) It will output a JSON list. +## Integration with Scala and Java via JNI +You can use echoprint-codegen inside a JVM. For this you need to create a class named Echoprint in package com.playax.fingerprint. Here is a Scala example of this class: + + package com.playax.fingerprint + + class Echoprint { + @native def code(fileName: String): String + } + + object Echoprint { + val EP = new Echoprint + + System.load("/path/to/libcodegen.4.1.2.dylib") + + def code(fileName: String) = EP.code(fileName) + } + +Then you call static Method: + + Echoprint.code("/path/to/file.mp3") + +And it will return the json with fingerprint data. + ## Statistics ### Speed diff --git a/build-debian-buster.sh b/build-debian-buster.sh new file mode 100755 index 0000000..fec6e97 --- /dev/null +++ b/build-debian-buster.sh @@ -0,0 +1,16 @@ +#!/usr/bin/env bash +# +# Simple recipe for building echoprint-codegen on Debian Buster. Called +# from our Cloudbuild config. +set -e + +apt update +apt install -y build-essential ffmpeg libboost1.67-dev libtag1-dev zlib1g-dev + +# This thing needs JNI headers to be built, even if you're building only the CLI. +# Fortunately for us it compiles with the version in Buster. +apt install -y openjdk-11-jdk +export JAVA_HOME="/usr/lib/jvm/java-11-openjdk-amd64/" + +cd src +make \ No newline at end of file diff --git a/cloudbuild.yml b/cloudbuild.yml new file mode 100644 index 0000000..11f8bc9 --- /dev/null +++ b/cloudbuild.yml @@ -0,0 +1,30 @@ +# This config builds echoprint-codegen for a Linux distro and uploads it +# into GCS. This is NOT a good solution, as the binary will probably break +# down if libraries in the OS change even by an inch. +# +# Ideally, this should be repackaged as a microservice and built as a regular +# container image deployed as a sidecar, and the main Playax app should access +# this as an API instead of expecting to have the binary available locally. +# +# Other options would include building an APT or AppImage package, but clearly a +# microservice would provide better isolation for dependencies. +options: + dynamic_substitutions: true + +substitutions: + _DISTRO: 'debian' + _RELEASE: 'buster' + _BINARIES_BUCKET: 'gs://files.playax.com/binaries' + _ARTIFACT_URL: '${_BINARIES_BUCKET}/echoprint-codegen/${_DISTRO}-${_RELEASE}/echoprint-codegen' + +steps: + - name: '${_DISTRO}:${_RELEASE}' + args: ['bash', '-c', './build-${_DISTRO}-${_RELEASE}.sh'] + + # We unfortunately cannot use the cloudbuild artifact copy because + # we need to set the ACLs. + - name: 'gcr.io/cloud-builders/gsutil' + args: ['cp', './echoprint-codegen', '${_ARTIFACT_URL}'] + + - name: 'gcr.io/cloud-builders/gsutil' + args: ['acl', 'ch', '-u', 'AllUsers:r', '${_ARTIFACT_URL}'] \ No newline at end of file diff --git a/codegen.sh b/codegen.sh new file mode 100755 index 0000000..6094d51 --- /dev/null +++ b/codegen.sh @@ -0,0 +1,6 @@ +#!/bin/sh +# +# To make it easy to use with GNU Parallel, e.g., +# parallel codegen.sh ::: *.mp3 +# +echoprint-codegen -h "$1" > "$1.json" diff --git a/examples/lookup.py b/examples/lookup.py index d325dc0..54527ec 100755 --- a/examples/lookup.py +++ b/examples/lookup.py @@ -21,7 +21,7 @@ def lookup(file): fp = song.util.codegen(file) if len(fp) and "code" in fp[0]: # The version parameter to song/identify indicates the use of echoprint - result = song.identify(query_obj=fp, version="4.11") + result = song.identify(query_obj=fp, version="4.12") print "Got result:", result if len(result): print "Artist: %s (%s)" % (result[0].artist_name, result[0].artist_id) @@ -36,4 +36,4 @@ def lookup(file): if len(sys.argv) < 2: print >>sys.stderr, "Usage: %s