Skip to content
Branch: master
Find file Copy path
Find file Copy path
Fetching contributors…
Cannot retrieve contributors at this time
121 lines (99 sloc) 3.87 KB
Verifies that a given RecSys challenge submision is properly constructed.
python challenge_set.json submission.csv
import sys
import json
valid_tracks = set(['main', 'creative'])
def verify_submission(challenge_path, submission_path):
has_team_info = False
error_count = 0
f = open(challenge_path)
js =
challenge = json.loads(js)
error_count += 1
print "Can't read the challenge set"
return error_count
pids = set([playlist['pid'] for playlist in challenge['playlists']])
if len(challenge['playlists']) != 10000:
print "Bad challenge set"
error_count += 1
# seed_tracks contains seed tracks for each challenge playlist
seed_tracks = {}
for playlist in challenge['playlists']:
track_uris = [track['track_uri'] for track in playlist['tracks']]
seed_tracks[playlist['pid']] = set(track_uris)
found_pids = set()
if error_count > 0:
return error_count
f = open(submission_path)
for line_no, line in enumerate(f):
line = line.strip()
if not line:
if line[0] == '#':
if not has_team_info:
if line.startswith("team_info"):
has_team_info = True
tinfo = line.split(',')
if tinfo[1] in valid_tracks:
track = tinfo[1]
team_name = tinfo[2]
elif tinfo[2] in valid_tracks:
track = tinfo[2]
team_name = tinfo[1]
print "unknown challenge track", tinfo[1], "should be 'main' or 'creative' at line", line_no
error_count += 1
print "missing team_info at line", line_no
error_count += 1
fields = line.split(',')
fields = [f.strip() for f in fields]
pid = int(fields[0])
except ValueError:
print "bad pid (should be an integer)", fields[0], "at line", line_no
error_count += 1
tracks = fields[1:]
if not pid in pids:
print "bad pid", pid, "at line", line_no
error_count += 1
if len(tracks) != NTRACKS:
print "wrong number of tracks, found", len(tracks), "should have", NTRACKS, "at", line_no
error_count += 1
if len(set(tracks)) != NTRACKS:
print "wrong number of unique tracks, found", len(set(tracks)), "should have", NTRACKS, "at", line_no
error_count += 1
if seed_tracks[pid].intersection(set(tracks)):
print "found seed tracks in the submission for playlist", pid, "at", line_no
error_count += 1
for uri in tracks:
if not is_track_uri(uri):
print "bad track uri", uri, "at", line_no
error_count += 1
if len(found_pids) != len(pids):
print "wrong number of playlists, found", len(found_pids), "expected", len(pids)
error_count += 1
return error_count
def is_track_uri(uri):
fields = uri.split(':')
return len(fields) == 3 and fields[0] == 'spotify' and fields[1] == 'track' and len(fields[2]) == 22
if __name__ == '__main__':
if len(sys.argv) != 3:
print "usage: python challenge_set.json submission.csv"
errors = verify_submission(sys.argv[1], sys.argv[2])
if errors == 0:
print "Submission is OK! It is ready to submit to the Recsys challenge."
print "Your submission has", errors, "errors. If you submit it, it will be rejected."
You can’t perform that action at this time.