In [None]:
%pylab notebook

In [None]:
import re
from datetime import datetime, timedelta
from collections import namedtuple

In [None]:
Result = namedtuple('Result', 'iter loss top1 top5')            
Info = namedtuple('Info', 'timestamp training validation')

# look for the following kinds of lines in the stdout:
# fist skip things until we run into a date like the first line. 
# returns None if EOF reached
('[2016-11-23 20:50:26]:\n',
 '-Iter 250, Training Loss= 4.2972, Accuracy Top1 = 0.04, Top5 = 0.13\n',
 '-Iter 250, Validation Loss= 4.2765, Accuracy Top1 = 0.03, Top5 = 0.16\n')
def parse_next_record(stream):
    template = """-Iter (?P<iter>[0-9]*), %s Loss= (?P<loss>[\.0-9]*), Accuracy Top1 = (?P<top1>[\.0-9]*), Top5 = (?P<top5>[\.0-9]*)\n"""
    tr_regex = template % "Training"
    val_regex = template % "Validation"
    ptime_regex = '^\[(.*)\]:\n'
    
    while True:
        ln = stream.readline()
        if ln == '':
            return None
            
        ptime = re.search(ptime_regex, ln)
        if not ptime:
            training = re.search(tr_regex,
                                 stream.readline())
            assert(not training)
            validation = re.search(val_regex, 
                               stream.readline())
            assert(not validation)
            print("skipping: %s" % ln, file=sys.stderr)
        else:
            break
    
    dt = datetime.strptime(ptime.group(1), '%Y-%m-%d %H:%M:%S')
    
    ln = stream.readline()
    if ln == '':
        return None
        
    training = re.search(tr_regex, ln)
    assert(training)
    
    ln = stream.readline()
    if ln == '':
        return None
    
    validation = re.search(val_regex, ln)
    assert(validation)
    
    return Info(timestamp=dt, training=Result(**training.groupdict()), validation=Result(**validation.groupdict()))

def parse_records(filename):
    st = open(filename, 'r')
    recs = []
    while True:
        el = parse_next_record(st)
        if el == None:
            return recs
        else:
            recs.append(el)
        

In [None]:
pylab.rcParams['figure.figsize'] = (13, 10)
data = parse_records('baseline_numbers.txt')

ts = [d.timestamp for d in data]
iters = [d.training.iter for d in data]
training_losses = [d.training.loss for d in data]
validation_losses = [d.validation.loss for d in data]
training_top5 = [d.training.top5 for d in data]
validation_top5 = [d.validation.top5 for d in data]
training_top1 = [d.training.top1 for d in data]
validation_top1 = [d.validation.top1 for d in data]
timestamps = [(d.timestamp - data[0].timestamp).total_seconds() for d in data]
iters = [d.training.iter for d in data]

f, (ax0,ax1) = plt.subplots(2, sharex=True)
plt.suptitle('baseline plot for TF alex net provided by course staff, trained on full training input (random order)')

ax0.plot(iters, training_top1, label='training top1')
ax0.plot(iters, validation_top1, label='validation top1')
ax0.plot(iters, training_top5, label='training top5')
ax0.plot(iters, validation_top5, label='validation top5')
ax0.set_ylabel('accuracy rate (%)')
ax0.legend(loc='lower center', ncol=2)

ax1.plot(iters, training_losses, label='training loss')
ax1.plot(iters, validation_losses, label='validation loss')
ax1.set_ybound(0,5)
ax1.set_ylabel('loss value')
ax1.legend(loc='upper center', ncol=2)
ax1.set_xlabel('# iterations (1 iter ~1.25 seconds for this GPU)')