In [1]:
from query.datasets.prelude import *
from query.datasets.tvnews.shot_detect import shot_detect, shot_stitch
from query.datasets.tvnews.face_detect import face_detect
from query.datasets.tvnews.face_embed import face_embed
from query.datasets.tvnews.pose_detect import pose_detect
from query.datasets.tvnews.identity_detect import identity_detect
from query.datasets.tvnews.animatedness import shot_frame_to_detect

In [None]:
dims = json.load(open('/app/dims.json'))
videos = list(tqdm(Video.objects.all()))
def foo(video):
    item = video.path.split('/')[-1].replace('.mp4', '')
    if item in dims:
        video.height = dims[item]
        video.save()
par_for(foo, videos)

In [2]:
def get_videos():
    log.debug('Fetching videos')
    return list(Video.objects.annotate(
        c=Subquery(
            Shot.objects.filter(video=OuterRef('pk')).values('video') \
            .annotate(c=Count('video')).values('c')
        )).filter(c__isnull=False).order_by('id'))

videos_with_shots = pcache.get('videos_with_shots', fn=get_videos, method='pickle')
videos = videos_with_shots

In [3]:
with Timer('Detecting shots'):
    all_shots = shot_detect(videos)
    face_frame_per_shot = [[shot_frame_to_detect(shot) for shot in vid_shots]
                           for vid_shots in all_shots]

D 18-01-11 11:34:29 prelude.py:218] -- START: Detecting shots
D 18-01-11 11:35:46 prelude.py:224] -- END: Detecting shots -- 77.069s


In [4]:
with Timer('Detecting sparse face'):
    all_faces = face_detect(videos, face_frame_per_shot)
    filtered_videos, filtered_frames, filtered_faces, filtered_shots = unzip(
        [(video, vid_frames, vid_faces, vid_shots)
         for video, vid_frames, vid_faces, vid_shots in zip(videos, face_frame_per_shot, all_faces, all_shots)
         if vid_faces is not None])

D 18-01-11 11:35:46 prelude.py:218] -- START: Detecting sparse face
D 18-01-11 11:36:36 prelude.py:224] -- END: Detecting sparse face -- 50.067s


In [None]:
#all_bboxes = pickle.load(open('/app/bboxes.pkl', 'rb'))

In [None]:
#print(filtered_videos[2].path)

In [None]:
#with Timer('Embedding faces'):

In [5]:
def output_name(video, frames):
    return video.path + '_embeddings_' + str(hash(tuple(frames)))

with make_scanner_db() as db:
    db._load_db_metadata()
    
filter2_videos, filter2_frames, filter2_faces, filter2_shots, embed_tables = unzip(
    [(video, frames, faces, shots, db.table(output_name(video, frames)))
          for (video, frames, faces, shots) in tqdm(zip(filtered_videos, filtered_frames, filtered_faces, filtered_shots))
          if db.has_table(output_name(video, frames)) and db.table(output_name(video, frames)).committed()])


100%|██████████| 30471/30471 [00:01<00:00, 23368.46it/s]


In [6]:
with Timer("Embedding faces"):
    def load_embs():
        log.debug('Loading embs')
        EMBEDDING_SIZE = 128
        def load((t, vid_faces)):
            embs = list(t.column('embeddings').load())
            arrays = [np.frombuffer(emb, dtype=np.float32) if emb is not None else [] for _, emb in embs]
            return [np.split(a, len(a) / 128) if len(a) > 0 else [] for a in arrays]
        return par_for(load, zip(embed_tables, filter2_faces), workers=8)

    all_embs = pcache.get('all_embs', load_embs, method='pickle')

# from query.datasets.tvnews.embed_kernel import EmbedFaceKernel
# import query.datasets.tvnews.embed_kernel
# reload(query.datasets.tvnews.embed_kernel)

# from scannerpy.stdlib import writers
# import cv2
# img = cv2.imread("/app/test.jpg")
# bboxes = [
#     db.protobufs.BoundingBox(x1=0, y1=0, x2=img.shape[1] - 1, y2=img.shape[0] - 1)
# ]

# kernel = query.datasets.tvnews.embed_kernel.EmbedFaceKernel(None, db.protobufs)
# [emb] = kernel.execute(
#     [cv2.cvtColor(img, cv2.COLOR_RGB2BGR),
#      writers.bboxes([bboxes], db.protobufs)[0]])
# exemplar_vector = np.frombuffer(emb, dtype=np.float32)
# print(exemplar_vector.shape)



D 18-01-11 11:38:57 prelude.py:218] -- START: Embedding faces
D 18-01-11 11:57:16 prelude.py:224] -- END: Embedding faces -- 1098.577s


In [51]:
import struct

serializers = {}
type_hashes = {}
def serializer(ty):
    def register_class(cls):
        serializers[ty] = cls
        type_hashes[hash(ty)] = ty
        return cls
    return register_class

def serialize(v):
    ty = type(v)
    if ty in serializers:
        serializer = serializers[ty]
        hsh = hash(ty)
    else:
        serializer = DefaultSerializer
        hsh = 0
    return '{}{}'.format(struct.pack('=i', hsh), serializer.serialize(v))

def _deserialize(s):
    hsh, s = struct.unpack('=i', s[:4])[0], s[4:]
    serializer = serializers[type_hashes[hsh]]
    return serializer.deserialize(s)

def deserialize(s):
    return _deserialize(s)[0]
    
class DefaultSerializer:
    @staticmethod
    def serialize(v):
        return serialize(pickle.dumps(v))
    
    @staticmethod
    def deserialize(s):
        ps, s = _deserialize(s)
        return pickle.loads(ps), s
    
type_hashes[0] = 0    
serializers[0] = DefaultSerializer    

@serializer(int)
class IntSerializer:
    @staticmethod
    def serialize(v):
        return struct.pack('=q', v)
    
    @staticmethod
    def deserialize(s):
        return struct.unpack('=q', s[:8])[0], s[8:]
    
@serializer(str)    
class StringSerializer:
    @staticmethod
    def serialize(v):
        return '{}{}'.format(serialize(len(v)), v)
    
    @staticmethod
    def deserialize(s):
        n, s = _deserialize(s)
        return s[:n], s[n:]

@serializer(np.array)    
class NumpySerializer:
    @staticmethod
    def serialize(v):
        dtype = serialize(pi)
        n = serialize(len(v))
        return '{}{}{}'.format(dtype, n, v.tobytes())
    
    @staticmethod
    def deserialize(s):
        dtype, s = _deserialize(s)
        n, s = _deserialize(s)
        return np.frombuffer(s[:n], dtype=dtype), s[n:]

@serializer(list)    
class ListSerializer:
    @staticmethod
    def serialize(v):
        n = serialize(len(v))
        return '{}{}'.format(
            n, ''.join([serialize(x) for x in v])
        )
    
    @staticmethod
    def deserialize(s):
        n, s = _deserialize(s)
        l = []
        for _ in range(n):
            x, s = _deserialize(s)
            l.append(x)
        return l, s

def test(x):
    assert(deserialize(serialize(x)) == x)

test(1)
test("hello")
test([1, "hello"])

# with Timer('myser'):
#     x = serialize(all_embs[:10])
# with Timer('mydeser'):
#     deserialize(x)
# with Timer('pickleser'):
#     x = pickle.dumps(all_embs[:10])
# with Timer('pickledeser'):
#     pickle.loads(x)

#print(len(pickle.dumps(all_embs[0])))
#print(sum([sum([e.nbytes for e in l]) for l in all_embs[0]]))


In [77]:
with Timer("Stitching shots"):
    def load_stitches():
        log.debug('Computing stitches')
        import query.datasets.tvnews.shot_detect
        reload(query.datasets.tvnews.shot_detect)
        return query.datasets.tvnews.shot_detect.shot_stitch(filter2_videos, filter2_shots, filter2_frames, filter2_faces, all_embs)
    (stitched_shots, indices) = pcache.get('stitched_shots', load_stitches, method='marshal', force=True)

D 18-01-11 15:38:58 prelude.py:218] -- START: Stitching shots
DEBUG:esper:-- START: Stitching shots
D 18-01-11 15:38:58 <ipython-input-77-f885bd83f224>:003] Computing stitches
DEBUG:esper:Computing stitches
D 18-01-11 15:38:58 shot_detect.py:384] Doing shot stitch
DEBUG:esper:Doing shot stitch
100%|██████████| 30046/30046 [09:09<00:00, 54.71it/s]
D 18-01-11 15:51:30 prelude.py:224] -- END: Stitching shots -- 752.512s
DEBUG:esper:-- END: Stitching shots -- 752.512s


In [65]:
all_shot_models = []
for (video, shots) in tqdm(zip(filter2_videos[1:], stitched_shots)):
    all_shot_models.extend([
        Shot(min_frame=s['min_frame'], max_frame=s['max_frame'], labeler_id=s['labeler'], video_id=s['video__id'])
        for s in shots
    ])


100%|██████████| 30045/30045 [09:22<00:00, 53.38it/s]


IntegrityError: duplicate key value violates unique constraint "query_tvnews_shot_pkey"
DETAIL:  Key (id)=(43061205) already exists.


In [71]:
for i in tqdm(range(0, len(all_shot_models), 100000)):
    Shot.objects.bulk_create(all_shot_models[i:i+100000])

100%|██████████| 293/293 [51:34<00:00, 10.56s/it]
