In [1]:
%load_ext nb_black
%load_ext Cython

<IPython.core.display.Javascript object>

In [2]:
%%cython

from pyhpfw import ParallelCollector
import glob
import pickle
import numpy as np
cimport numpy as np
cimport cython
from libc.stdint cimport uint64_t

cdef extern int __builtin_popcountll(unsigned long long) nogil
        
@cython.boundscheck(False)
@cython.wraparound(False)
cdef unsigned long long _xor(uint64_t[:] array1, uint64_t[:] array2) nogil:
    cdef int i
    cdef unsigned long long cnt = 0
    for i in xrange(len(array1)):
        cnt += __builtin_popcountll(array1[i] ^ array2[i])
    return cnt

@cython.boundscheck(False)
@cython.wraparound(False)
cdef unsigned long long _match_to_ref(uint64_t[:] query, uint64_t[:] ref) nogil:
    cdef unsigned long long bestDistance = 4294967294
    cdef int bestIndex = 0
    cdef int k = len(query)
    cdef int n = len(ref)
    
    cdef int startCol
    cdef int endCol
    cdef unsigned long long distance
    for i in range(n - k + 1):
        startCol = i
        endCol = startCol + k - 1
        distance = _xor(query, ref[startCol : endCol + 1])

        if distance < bestDistance:
            bestDistance = distance
            bestIndex = i

    return bestDistance

def match_to_ref(query, ref):
    return _match_to_ref(query, ref)

def fast_xor(array1, array2):
    return _xor(array1, array2)

<IPython.core.display.Javascript object>

In [3]:
collector = ParallelCollector()

<IPython.core.display.Javascript object>

In [53]:
filenames = glob.glob("/Users/chingachgook/dev/QtProjects/hpfw/original/*")

<IPython.core.display.Javascript object>

In [5]:
# %%time

# hashprints = collector.prepare(filenames)

<IPython.core.display.Javascript object>

In [6]:
# with open('dump', 'wb') as fp:
#     pickle.dump(hashprints, fp)

<IPython.core.display.Javascript object>

In [7]:
hashprints = []
with open("dump", "rb") as fp:
    hashprints = pickle.load(fp)

<IPython.core.display.Javascript object>

In [8]:
slices = glob.glob("/Users/chingachgook/dev/QtProjects/hpfw/slices/*")
slices

['/Users/chingachgook/dev/QtProjects/hpfw/slices/slice008_The Killers - Mr.mp3',
 '/Users/chingachgook/dev/QtProjects/hpfw/slices/slice007_Буерак - На старых сидениях кинотеатра live.mp3',
 '/Users/chingachgook/dev/QtProjects/hpfw/slices/slice023_Joy Division - New Dawn Fades live.mp3',
 '/Users/chingachgook/dev/QtProjects/hpfw/slices/slice007_The Smiths - The Boy with the Thorn in His Side live.mp3',
 '/Users/chingachgook/dev/QtProjects/hpfw/slices/slice015_пасош - каждый день live.mp3',
 '/Users/chingachgook/dev/QtProjects/hpfw/slices/slice003_Буерак - На старых сидениях кинотеатра live.mp3',
 '/Users/chingachgook/dev/QtProjects/hpfw/slices/slice016_Joy Division - New Dawn Fades live.mp3',
 '/Users/chingachgook/dev/QtProjects/hpfw/slices/slice002_Joy Division - Day Of The Lords live.mp3',
 '/Users/chingachgook/dev/QtProjects/hpfw/slices/slice008_The Smiths - Cemetry Gates live.mp3',
 '/Users/chingachgook/dev/QtProjects/hpfw/slices/slice005_Joy Division - New Dawn Fades live.mp3',
 '/

<IPython.core.display.Javascript object>

In [9]:
collector.load("")

<IPython.core.display.Javascript object>

In [54]:
%%time

from collections import defaultdict
import multiprocessing


def search(label):
    global collector
    queryFeatures = collector.calc_hashprint(label)
    if len(queryFeatures) == 0:
        return ("INVALID QUERY", -1, -1)

    top = []
    print("Finding ", label)
    global hashprints
    k = len(queryFeatures)

    for f in hashprints:
        refFeatures = f[0]
        refLabel = f[1]
        bestDistance = match_to_ref(queryFeatures, refFeatures)
        top.append((bestDistance, refLabel))

    top = sorted(top)
    return (label, top[:10])


pool = multiprocessing.Pool()
ans = pool.map(search, slices)
pool.close()
pool.join()

Finding  /Users/chingachgook/dev/QtProjects/hpfw/slices/slice012_Joy Division - Day Of The Lords live.mp3
Finding  /Users/chingachgook/dev/QtProjects/hpfw/slices/slice008_The Killers - Mr.mp3
Finding  /Users/chingachgook/dev/QtProjects/hpfw/slices/slice023_Joy Division - Day Of The Lords live.mp3
Finding  /Users/chingachgook/dev/QtProjects/hpfw/slices/slice000_Joy Division - New Dawn Fades live.mp3
Finding  /Users/chingachgook/dev/QtProjects/hpfw/slices/slice027_Joy Division - Day Of The Lords live.mp3
Finding  /Users/chingachgook/dev/QtProjects/hpfw/slices/slice003_Joy Division - New Dawn Fades live.mp3
Finding  /Users/chingachgook/dev/QtProjects/hpfw/slices/slice015_Joy Division - New Dawn Fades live.mp3
Finding  /Users/chingachgook/dev/QtProjects/hpfw/slices/slice007_Буерак - На старых сидениях кинотеатра live.mp3
Finding  /Users/chingachgook/dev/QtProjects/hpfw/slices/slice002_Буерак - На старых сидениях кинотеатра live.mp3
Finding  /Users/chingachgook/dev/QtProjects/hpfw/slices/sl

Finding  /Users/chingachgook/dev/QtProjects/hpfw/slices/slice014_пасош - каждый день live.mp3
Finding  /Users/chingachgook/dev/QtProjects/hpfw/slices/slice002_The Smiths - Cemetry Gates live.mp3
Finding  /Users/chingachgook/dev/QtProjects/hpfw/slices/slice002_пасош - каждый день live.mp3
Finding  /Users/chingachgook/dev/QtProjects/hpfw/slices/slice017_The Killers - Mr.mp3
Finding  /Users/chingachgook/dev/QtProjects/hpfw/slices/slice001_Буерак - На старых сидениях кинотеатра live.mp3
Finding  /Users/chingachgook/dev/QtProjects/hpfw/slices/slice011_The Killers - Mr.mp3
Finding  /Users/chingachgook/dev/QtProjects/hpfw/slices/slice024_Буерак - На старых сидениях кинотеатра live.mp3
Finding  /Users/chingachgook/dev/QtProjects/hpfw/slices/slice017_Буерак - На старых сидениях кинотеатра live.mp3
Finding  /Users/chingachgook/dev/QtProjects/hpfw/slices/slice001_The Smiths - Cemetry Gates live.mp3
Finding  /Users/chingachgook/dev/QtProjects/hpfw/slices/slice019_Буерак - На старых сидениях киноте

Finding  /Users/chingachgook/dev/QtProjects/hpfw/slices/slice018_The Smiths - The Boy with the Thorn in His Side live.mp3
Finding  /Users/chingachgook/dev/QtProjects/hpfw/slices/slice012_The Doors - People Are Strange live.mp3
Finding  /Users/chingachgook/dev/QtProjects/hpfw/slices/slice000_The Smiths - There Is a Light That Never Goes Out live.mp3
Finding  /Users/chingachgook/dev/QtProjects/hpfw/slices/slice019_The Killers - Mr.mp3
Finding  /Users/chingachgook/dev/QtProjects/hpfw/slices/slice024_The Smiths - There Is a Light That Never Goes Out live.mp3
Finding  /Users/chingachgook/dev/QtProjects/hpfw/slices/slice013_Joy Division - Day Of The Lords live.mp3
Finding  /Users/chingachgook/dev/QtProjects/hpfw/slices/slice013_The Smiths - Cemetry Gates live.mp3
Finding  /Users/chingachgook/dev/QtProjects/hpfw/slices/slice010_Joy Division - Day Of The Lords live.mp3
Finding  /Users/chingachgook/dev/QtProjects/hpfw/slices/slice017_пасош - каждый день live.mp3
Finding  /Users/chingachgook/dev

<IPython.core.display.Javascript object>

In [55]:
ans[0]

('/Users/chingachgook/dev/QtProjects/hpfw/slices/slice008_The Killers - Mr.mp3',
 [(15741, 'The Killers - Mr'),
  (18050, 'Chernikovskaya Hata - Spektakl Okonchen'),
  (18164, 'Neil Young - Old Man'),
  (18183, 'Розенбаум Александр - Глухари'),
  (18241, 'Céline Dion - My Heart Will Go On'),
  (18273, 'Tom Waits - Martha'),
  (18286, 'Nelly Furtado - Promiscious'),
  (18303, 'Metallica - Master Of Puppets'),
  (18320, 'Михаил Шуфутинский - 3-е сентября'),
  (18321, 'Peter Gabriel - Games Without Frontiers')])

<IPython.core.display.Javascript object>

In [56]:
from collections import Counter

counter = Counter([a[0].find(a[1][0][1]) != -1 for a in ans])

<IPython.core.display.Javascript object>

In [57]:
1 - counter[False] / counter[True]

0.8520408163265306

<IPython.core.display.Javascript object>