### Prerequisites
You need to have training.json to show were from to download the data. Also, you need to have *ffmpeg*, *git*, *patch* and C build tools installed. This notebook has been tested only with Linux.

In [2]:
from glob import glob
import json
from subprocess import call
import os
import shutil
from urllib import request
import matplotlib.pyplot as plt
import numpy as np
from scipy import ndimage
from scipy.misc import imsave
from scipy.ndimage import measurements

### Download darknet

In [None]:
call(['git', 'clone', 'https://github.com/pjreddie/darknet']) # Download darknet
os.chdir('darknet')
call(['make']) # Compile darknet
shutil.copyfile('libdarknet.so', '../libdarknet.so')
shutil.copyfile('python/darknet.py', '../darknet.py')
shutil.copytree('cfg', '../cfg')
shutil.copytree('data', '../data')
os.chdir('..')
call(['patch', 'darknet.py', 'darknet.patch']) # Patch for Python 3

#### Download weights, it will take some time

In [None]:
request.urlretrieve('https://pjreddie.com/media/files/yolo.weights', 'yolo.weights')

### Load darknet

In [3]:
import darknet as dn
dn.set_gpu(0)
net = dn.load_net("cfg/yolo.cfg".encode('utf-8'), "yolo.weights".encode('utf-8'), 0)
meta = dn.load_meta("cfg/coco.data".encode('utf-8'))

### Download the data
It downloads up to *max_samples* frames from up to *matches_todo* videos based on information in *training.json*, that have first frame in at least *min_samples* samples and downscales them by factor of *scale*.

<span style="color:red">It's not necessary to run this, just unpack data.7z to get a subset of validation data.</span>

In [7]:
scale = 1
min_samples = 5
max_samples = 5
matches_todo = 3

In [8]:
file = open('training.json')
data = json.load(file)['samples']
matches = {}
for i in range(len(data)):
	cur = data[i]['hash']
	if cur in matches:
		matches[cur] += [ i ]
	else:
		matches[cur] = [ i ]

matches_done = 0
for hash in matches:
	if len(matches[hash]) >= min_samples:
		samples_done = 0
		try:
			os.mkdir('match_%s' % hash)
		except FileExistsError:
			pass
		for item in matches[hash]:
			annotation_url = data[item]['human_annotation']['s3annotation']
			annotation_file = request.urlopen(annotation_url)
			annotation_json = json.load(annotation_file)
			try:
				ball = annotation_json['balls']['0']
			except KeyError:
				continue
			out_filename = 'match_%s/out_%d_%d_%d.png' % (hash, item, ball['x'] / scale, ball['y'] / scale)
			if os.path.isfile(out_filename):
				print ('%s already exists' % out_filename)
			else:
				video_url = data[item]['s3video']
				call(['ffmpeg', '-i', video_url, '-vf', 'scale=iw/%d:ih/%d' % (scale, scale), '-vframes', '1', out_filename])
			samples_done += 1
			if samples_done >= max_samples:
				print ('samples limit reached')
				break
		print ('%s done' % hash)
		matches_done += 1
	if matches_done >= matches_todo:
		print ('match limit reached')
		break
print ('done')

samples limit reached
4bde8108eaaba0f8a543cdf669eef8c1 done
samples limit reached
0ec428510ab0bb3a5a68e3bb1271bf0f done
samples limit reached
5662eacc3d67a59bd4aef0b4887f4b54 done
match limit reached
done


### Function to run the detection
To get the nice images, where found objects are shown, run
`darknet/darknet detect cfg/yolo.cfg yolo.weights (image filename)`,
which creates predictions.png

In [26]:
def detect(filename, use_cut):
	params = filename.split('.')[0].split('_')
	samplenr = int(params[2])
	ballx = int(params[3])
	bally = int(params[4])
	if use_cut:
		xa = int(params[5])
		ya = int(params[6])
		isball = int(params[7])
	else:
		isball = 1

	r = dn.detect(net, meta, filename.encode('utf-8'))
	best_prob = 0
	best_coords = 0
	best_name = ''
	for name_bytes, prob, coords in r:
		name = name_bytes.decode('utf-8')
		print ('   ', name, round(prob * 100), '%')
		if 'ball' in name:
			if prob > best_prob:
				best_coords = coords
				best_name = name
				best_prob = prob
                
	correct_yes = 0
	correct_no = 0
	false_positive = 0
	false_negative = 0
                        
	if best_prob > 0:
		best_x, best_y, best_w, best_h = best_coords
		if use_cut:
			best_x += xa
			best_y += ya
		if isball:
			correct_yes += 1
			print (filename, 'found', best_name, '(', round(best_prob * 100), '%)', best_x, best_y, 'expected', ballx, bally)
		else:
			false_positive += 1
			print (filename, 'found', best_name, '(', round(best_prob * 100), '%)', best_x, best_y, 'expected not to find')
	else:
		if isball:
			false_negative += 1
			print (filename, 'didn\'t find, expected', ballx, bally)
		else:
			correct_no += 1
			print (filename, 'didn\'t find, as expected')

	return correct_yes, correct_no, false_positive, false_negative

### Run without pre-processing

In [13]:
for match in glob('match_*'):
	for filename in glob('%s/out_*.png' % match):
		detect(filename, False)

    person 59 %
match_5662eacc3d67a59bd4aef0b4887f4b54/out_105_1724_431.png didn't find, expected 1724 431
    person 63 %
match_5662eacc3d67a59bd4aef0b4887f4b54/out_110_1689_594.png didn't find, expected 1689 594
    person 64 %
match_5662eacc3d67a59bd4aef0b4887f4b54/out_109_1368_379.png didn't find, expected 1368 379
match_5662eacc3d67a59bd4aef0b4887f4b54/out_106_995_238.png didn't find, expected 995 238
    person 75 %
    person 67 %
    person 51 %
match_5662eacc3d67a59bd4aef0b4887f4b54/out_112_526_538.png didn't find, expected 526 538
match_4bde8108eaaba0f8a543cdf669eef8c1/out_2_2049_532.png didn't find, expected 2049 532
    person 77 %
    person 52 %
match_4bde8108eaaba0f8a543cdf669eef8c1/out_9_2496_1081.png didn't find, expected 2496 1081
    person 54 %
match_4bde8108eaaba0f8a543cdf669eef8c1/out_6_502_950.png didn't find, expected 502 950
match_4bde8108eaaba0f8a543cdf669eef8c1/out_4_3485_700.png didn't find, expected 3485 700
match_4bde8108eaaba0f8a543cdf669eef8c1/out_7_2139

### Pre-processing

#### Find median image for each match
If this gives MemoryError, use median.cpp instead, which does the same, but uses less memory.

In [None]:
for match in glob('match_*'):
	filenames = glob('%s/out_*.png' % match)
	image = ndimage.imread(filenames[0])
	Nx, Ny, Nz = image.shape
	Nimage = min(len(filenames), 5)
	choices = np.linspace(0, len(filenames) - 1, Nimage, dtype=int)

	images = np.empty([Nimage, Nx, Ny, Nz])
	for i in range(Nimage):
		images[i] = ndimage.imread(filenames[choices[i]])
	out = np.median(images, axis = 0)

	imsave('%s/median.png' % match, out)
	print ('%s done' % match)

#### Find difference between frame and the median image

In [15]:
treshold_factor = 0.2 # as ratio from maximum difference
pm = 0 # how much is it allowed to move the median image to find the best match

In [16]:
for match in glob('match_*'):
	background = ndimage.imread('%s/median.png' % match, flatten=True)
	for filename in glob('%s/out_*.png' % match):
		image = ndimage.imread(filename, flatten=False)
		image_grey = ndimage.imread(filename, flatten=True)
		min_diff = -1
		for dx in range(-pm, pm + 1):
			for dy in range(-pm, pm + 1):
				cur_diff = np.abs(np.roll(image_grey, [dx, dy], [0, 1]) - background)
				x, y = cur_diff.shape
				total_diff = np.sum(np.where(cur_diff < np.max(cur_diff) * treshold_factor, 0, 1))
				if min_diff == -1 or total_diff < min_diff:
					diff = cur_diff
					min_diff = total_diff
					best_delta = [dx, dy]
		image = np.roll(image, best_delta, [0, 1])
		image[diff < np.max(diff) * treshold_factor] = 0
		imsave(filename.replace('/out_', '/diff_'), diff)
		imsave(filename.replace('/out_', '/bg_'), image)
		print ('%s/%s done (delta %d, %d)' % (match, filename, best_delta[0], best_delta[1]))
	print ('%s done' % match)

`imread` is deprecated in SciPy 1.0.0.
Use ``matplotlib.pyplot.imread`` instead.
  
`imread` is deprecated in SciPy 1.0.0.
Use ``matplotlib.pyplot.imread`` instead.
  after removing the cwd from sys.path.
`imread` is deprecated in SciPy 1.0.0.
Use ``matplotlib.pyplot.imread`` instead.
  """
`imsave` is deprecated in SciPy 1.0.0, and will be removed in 1.2.0.
Use ``imageio.imwrite`` instead.
`imsave` is deprecated in SciPy 1.0.0, and will be removed in 1.2.0.
Use ``imageio.imwrite`` instead.


match_5662eacc3d67a59bd4aef0b4887f4b54/match_5662eacc3d67a59bd4aef0b4887f4b54/out_105_1724_431.png done (delta 0, 0)
match_5662eacc3d67a59bd4aef0b4887f4b54/match_5662eacc3d67a59bd4aef0b4887f4b54/out_110_1689_594.png done (delta 0, 0)
match_5662eacc3d67a59bd4aef0b4887f4b54/match_5662eacc3d67a59bd4aef0b4887f4b54/out_109_1368_379.png done (delta 0, 0)
match_5662eacc3d67a59bd4aef0b4887f4b54/match_5662eacc3d67a59bd4aef0b4887f4b54/out_106_995_238.png done (delta 0, 0)
match_5662eacc3d67a59bd4aef0b4887f4b54/match_5662eacc3d67a59bd4aef0b4887f4b54/out_112_526_538.png done (delta 0, 0)
match_5662eacc3d67a59bd4aef0b4887f4b54 done
match_4bde8108eaaba0f8a543cdf669eef8c1/match_4bde8108eaaba0f8a543cdf669eef8c1/out_2_2049_532.png done (delta 0, 0)
match_4bde8108eaaba0f8a543cdf669eef8c1/match_4bde8108eaaba0f8a543cdf669eef8c1/out_9_2496_1081.png done (delta 0, 0)
match_4bde8108eaaba0f8a543cdf669eef8c1/match_4bde8108eaaba0f8a543cdf669eef8c1/out_6_502_950.png done (delta 0, 0)
match_4bde8108eaaba0f8a543cd

#### Cut out at least 224x224 regions that differ from median image

In [17]:
min_width = 4
min_height = 4
min_out_width = 224
min_out_height = 224

In [18]:
for match in glob('match_*'):
	for filename in glob('%s/bg_*.png' % match):
		params = filename.split('_')
		samplenr = int(params[2])
		ballx = int(params[3])
		bally = int(params[4].split('.')[0])

		image = ndimage.imread(filename.replace('/bg_', '/out_'), flatten=False)
		image_grey = ndimage.imread(filename, flatten=True)
		notsaved = np.ones_like(image_grey, dtype=int)

		labels, num_features = measurements.label(image_grey)
		for feature in range(1, num_features + 1):
			mask = labels == feature
			if np.sum(notsaved[mask]) > 0:
				# Save only, if there are some pixels not already saved
				indeces = np.argwhere(mask)
				xa = np.min(indeces[:, 1])
				xb = np.max(indeces[:, 1])
				ya = np.min(indeces[:, 0])
				yb = np.max(indeces[:, 0])
				orig_width = xb - xa + 1
				orig_height = yb - ya + 1
				if orig_width >= min_width and orig_height >= min_height:
					out_width = max(min_out_width, orig_width)
					out_height = max(min_out_height, orig_height)
					xa = max(0, xa - (out_width - orig_width) // 2)
					xb = min(len(image[0]), xa + out_width)
					xa = xb - out_width
					ya = max(0, ya - (out_height - orig_height) // 2)
					yb = min(len(image), ya + out_height)
					ya = yb - out_height

					cur = image[ya:yb+1, xa:xb+1]
					notsaved[ya:yb+1, xa:xb+1] = 0
					isball = xa <= ballx and ballx <= xb and ya <= bally and bally <= yb
					imsave('%s/cut_%d_%d_%d_%d_%d_%d.png' % (match, samplenr, ballx, bally, xa, ya, isball), cur)
		print ('%s/%d done' % (match, samplenr))
	print ('%s done' % match)

`imread` is deprecated in SciPy 1.0.0.
Use ``matplotlib.pyplot.imread`` instead.
  
`imread` is deprecated in SciPy 1.0.0.
Use ``matplotlib.pyplot.imread`` instead.
  if __name__ == '__main__':
`imsave` is deprecated in SciPy 1.0.0, and will be removed in 1.2.0.
Use ``imageio.imwrite`` instead.


match_5662eacc3d67a59bd4aef0b4887f4b54/106 done
match_5662eacc3d67a59bd4aef0b4887f4b54/110 done
match_5662eacc3d67a59bd4aef0b4887f4b54/109 done
match_5662eacc3d67a59bd4aef0b4887f4b54/112 done
match_5662eacc3d67a59bd4aef0b4887f4b54/105 done
match_5662eacc3d67a59bd4aef0b4887f4b54 done
match_4bde8108eaaba0f8a543cdf669eef8c1/9 done
match_4bde8108eaaba0f8a543cdf669eef8c1/7 done
match_4bde8108eaaba0f8a543cdf669eef8c1/2 done
match_4bde8108eaaba0f8a543cdf669eef8c1/6 done
match_4bde8108eaaba0f8a543cdf669eef8c1/4 done
match_4bde8108eaaba0f8a543cdf669eef8c1 done
match_0ec428510ab0bb3a5a68e3bb1271bf0f/49 done
match_0ec428510ab0bb3a5a68e3bb1271bf0f/52 done
match_0ec428510ab0bb3a5a68e3bb1271bf0f/48 done
match_0ec428510ab0bb3a5a68e3bb1271bf0f/56 done
match_0ec428510ab0bb3a5a68e3bb1271bf0f/54 done
match_0ec428510ab0bb3a5a68e3bb1271bf0f done


### Run with pre-processing

In [32]:
found = 0
total = 0
false_positive = 0
false_negative = 0
for match in glob('match_*'):
	for frame_file in glob('%s/out_*.png' % match):
		frame = frame_file.split('_')[2]
		frame_yes = 0
		frame_no = 0
		frame_fp = 0
		frame_fn = 0
		for filename in glob('%s/cut_%s_*_1.png' % (match, frame)):
            # _1 means that ball is on the frame
			cy, cn, fp, fn = detect(filename, True)
			frame_yes += cy
			frame_no += cn
			frame_fp += fp
			frame_fn += fn
		print ('FRAME %s %s: correct yes %d, correct no %d, false positive %d, false negative %d' % (match, frame, frame_yes, frame_no, frame_fp, frame_fn))
		if frame_yes > 0:
			found += 1
		total += 1
print ('SUMMARY: found %d out of %d' % (found, total))

match_5662eacc3d67a59bd4aef0b4887f4b54/cut_105_1724_431_1612_317_1.png didn't find, expected 1724 431
FRAME match_5662eacc3d67a59bd4aef0b4887f4b54 105: correct yes 0, correct no 0, false positive 0, false negative 1
    person 85 %
    sports ball 76 %
    person 69 %
match_5662eacc3d67a59bd4aef0b4887f4b54/cut_110_1689_594_1564_388_1.png found sports ball ( 76 %) 1690.3329467773438 591.7768707275391 expected 1689 594
    sports ball 83 %
    person 78 %
    person 64 %
    skateboard 56 %
match_5662eacc3d67a59bd4aef0b4887f4b54/cut_110_1689_594_1600_477_1.png found sports ball ( 83 %) 1690.078598022461 593.1318054199219 expected 1689 594
FRAME match_5662eacc3d67a59bd4aef0b4887f4b54 110: correct yes 2, correct no 0, false positive 0, false negative 0
    person 90 %
    sports ball 52 %
match_5662eacc3d67a59bd4aef0b4887f4b54/cut_109_1368_379_1148_276_1.png found sports ball ( 52 %) 1367.7813262939453 377.1566619873047 expected 1368 379
    person 78 %
match_5662eacc3d67a59bd4aef0b4887f4b