**Note:** Here we compare between three implementations of [Conway's game of life](https://en.wikipedia.org/wiki/Conway%27s_Game_of_Life). 

 * The first and second methods apply Conway's rules iteratively to each element of a 2D grid. This is done first in Python, then again in Cython. 
 * The third method makes use of NumPy vectorization to apply Conway's rules to the entire 2D grid at once. This is typically not possible in reaction-diffusion equations, but it is with Conway's rules.

In [1]:
%load_ext Cython
import os, sys, copy
from collections import namedtuple
import numpy as np
from numpy import log, exp, minimum
from matplotlib import pylab as plt
%alias_magic t timeit

Created `%t` as an alias for `%timeit`.
Created `%%t` as an alias for `%%timeit`.


In [2]:
def iterate_v1(Z, c):
    '''Element by elemenent iteration.
    
    Args:
        Z (ndarray) - Represents 2D space
        c (namedtuple) - Container for constants
    Returns:
        Z (ndarray)
    '''
    
    # Holds the sum of neighbors for each element in Z
    N = np.zeros((c.rows-1, c.cols-1), dtype=np.int32)    
    
    # Count neighbors
    for x in range(1, c.rows-1):
        for y in range(1, c.cols-1):
            N[x, y] = (Z[x-1, y-1] + Z[x-1, y] + Z[x-1, y+1] +
                       Z[x,   y-1]             + Z[x,   y+1] +
                       Z[x+1, y-1] + Z[x+1, y] + Z[x+1, y+1])
            
    # Apply rules
    for x in range(1, c.rows-1):
        for y in range(1, c.cols-1):
            if Z[x, y] == 1 and (N[x, y] < 2 or N[x, y] > 3):
                Z[x, y] = 0
            elif Z[x, y] == 0 and N[x, y] == 3:
                Z[x, y] = 1
    return Z

In [3]:
%%cython
#cython: wraparound=False, boundscheck=False, cdivision=True
#cython: profile=False, nonecheck=False, overflowcheck=False
#cython: cdivision_warnings=False, unraisable_tracebacks=False
import numpy as np
cimport numpy as np

cpdef iterate_v2(Z, c):
    '''Element by elemenent iteration.

    Args:
        Z (ndarray - int) - Represents 2D space
        c (namedtuple) - Container for constants
    Returns:
        Z (ndarray - int)
    '''

    # Holds the sum of neighbors for each element in Z
    N = np.zeros((c.rows-1, c.cols-1), dtype=int)

    cdef int rows = c.rows
    cdef int cols = c.cols
    cdef long [:, :] N_ = N
    cdef long [:, :] Z_ = Z
    cdef int x, y

    with nogil:
        
        # Count neighbors
        for x in range(1, rows-1):
            for y in range(1, cols-1):
                N_[x, y] = (Z_[x-1, y-1] + Z_[x-1, y] + Z_[x-1, y+1] +
                            Z_[x,   y-1]              + Z_[x,   y+1] +
                            Z_[x+1, y-1] + Z_[x+1, y] + Z_[x+1, y+1])
                
        # Apply rules 
        for x in range(1, rows-1):
            for y in range(1, cols-1):
                if Z_[x, y] == 1 and (N_[x, y] < 2 or N_[x, y] > 3):
                    Z_[x, y] = 0
                elif Z_[x, y] == 0 and N_[x, y] == 3:
                     Z_[x, y] = 1

    return np.array(Z_)

In [4]:
def iterate_v3(Z, c):
    '''NumPy vectorization on boolean arrays.

    Args:
        Z (ndarray) - Represents 2D space
        c (namedtuple) - Container for constants
    Returns:
        Z (ndarray)
    '''

    # Holds the sum of neighbors for each element in Z
    
    # Count neighbors
    N = (Z[0:-2,0:-2] + Z[0:-2,1:-1] + Z[0:-2,2:] +
         Z[1:-1,0:-2]                + Z[1:-1,2:] +
         Z[2:  ,0:-2] + Z[2:  ,1:-1] + Z[2:  ,2:])

    # Apply rules
    birth = (N==3) & (Z[1:-1, 1:-1]==0)
    survive = ((N==2) | (N==3)) & (Z[1:-1, 1:-1]==1)
    Z[...] = 0
    Z[1:-1,1:-1][birth | survive] = 1

    return Z

In [5]:
def init_grid(c):
    '''Initialized the 2D grid with random values, with an empty border.'''

    Z = np.random.randint(0, 2, (c.rows, c.cols))
    
    Z[0, :] = 0
    Z[-1, :] = 0
    Z[:, 0] = 0
    Z[:, -1] = 0
    
    return Z

In [6]:
Const = namedtuple('c', ['rows', 'cols', 'n_iterations'])
c = Const(rows=100, cols=100, n_iterations=100)

for iterate in [iterate_v1, iterate_v2, iterate_v3]:
    
    Z = init_grid(c)
    %t -n1000 -r3 iterate(Z, c)

1000 loops, best of 3: 51.3 ms per loop
1000 loops, best of 3: 62.6 µs per loop
1000 loops, best of 3: 208 µs per loop


Results
--------

Iteration in Python was three orders of magnitude slower than ether iteration in Cython or NumPy vectorization. Cython was a little over three times faster that NumPy vectorization.

*References*

 * Nicolas P. Rougier: [Numpy tutorial](http://www.labri.fr/perso/nrougier/teaching/numpy/numpy.html)

 * From StackExchange and StackOverflow: [Fastest way to iterate over NumPy arrays](http://codereview.stackexchange.com/questions/38580/fastest-way-to-iterate-over-numpy-array), [Python List Comprehension Vs. Map](http://stackoverflow.com/questions/1247486/python-list-comprehension-vs-map), [Optimizing ... Game of Life [in] NumPy](http://codereview.stackexchange.com/questions/46011/optimizing-very-simple-piece-of-game-of-life-code-by-taking-advantage-of-numpy)

 * From reddit's /r/learnpython: [Cython optimization ...](https://www.reddit.com/r/learnpython/comments/3g73ux/cython_optimization_can_i_get_any_closer/)