In [1]:
%load_ext cython

# Load some data

In [2]:
!ls -1 *.json

[1m[32mall-world-cup-players.json[m[m
[1m[32mworld_cup_summary.json[m[m


In [3]:
import json
with open('world_cup_summary.json') as f:
    d = json.load(f)

In [4]:
print('d is a {} of {} items'.format(type(d), len(d)))

d is a <class 'list'> of 20 items


In [5]:
d[:2]

[{'average attendance': '52,918',
  'goals per game': '2.7',
  'host': 'Brazil',
  'number of matches': '64',
  'number of teams': '32',
  'total goals': '171',
  'year': '2014'},
 {'average attendance': '49,669',
  'goals per game': '2.3',
  'host': 'South Africa',
  'number of matches': '64',
  'number of teams': '32',
  'total goals': '145',
  'year': '2010'}]

# A Python class for an event

In [6]:
class pyEvent:
    def from_dict(self, d):
        # Self-loading from an event dict
        self.host = d['host']
        self.year = int(d['year'])
        self.attendance = int(d['average attendance'].replace(',',''))
        
def find_max_py1(events: list):
    return max(events, key=lambda e: e.attendance)        
        
def find_max_py2(events: list):
    largest = events[0]
    for e in events:
        if e.attendance > largest.attendance:
            largest = e
    return largest

# A Cython class for an event

In [7]:
%%cython
cdef class cyEvent:
    cdef public:
        str host
        int attendance, matches, teams, goals, year
        
    def from_dict(self, d):
            self.host = d['host']
            self.year = int(d['year'])        
            self.attendance = int(d['average attendance'].replace(',',''))
    
cdef int keyfunc(cyEvent e):
    return e.attendance
    
def find_max_cy1(list events):
    return max(events, key=keyfunc)    
    
def find_max_cy2(list events):
    cdef cyEvent e, largest = events[0]
    for e in events:
        if e.attendance > largest.attendance:
            largest = e
    return largest

# Populating the lists

In [8]:
py_events = []
cy_events = []

for _ in d:
    # Populate the Python objects
    py_instance = pyEvent()
    py_instance.from_dict(_)
    py_events.append(py_instance)
    
    # Populate the Cython objects
    cy_instance = cyEvent()
    cy_instance.from_dict(_)
    cy_events.append(cy_instance)

# Test that all lookups work

In [9]:
e = find_max_py1(py_events)
print('Python lookup 1:', e.host, e.attendance, e.year)
e = find_max_py2(py_events)
print('Python lookup 2:', e.host, e.attendance, e.year)



e = find_max_cy1(cy_events)
print('Cython lookup 1:', e.host, e.attendance, e.year)
e = find_max_cy2(cy_events)
print('Cython lookup 2:', e.host, e.attendance, e.year)

Python lookup 1: USA 68991 1994
Python lookup 2: USA 68991 1994
Cython lookup 1: USA 68991 1994
Cython lookup 2: USA 68991 1994


# Compare speed

In [10]:
# Both using the "max" builtin function
%timeit -n 100000 e = find_max_py1(py_events)
%timeit -n 100000 e = find_max_cy1(cy_events)

100000 loops, best of 3: 3.79 µs per loop
100000 loops, best of 3: 2.07 µs per loop


In [11]:
# Both using manual iteration
%timeit -n 100000 e = find_max_py2(py_events)
%timeit -n 100000 e = find_max_cy2(cy_events)

100000 loops, best of 3: 2.38 µs per loop
100000 loops, best of 3: 89 ns per loop
