# Performance comparison

In [19]:
%matplotlib inline

from geosnap import datasets
import geopandas
import pandas
import esda
import pygeoda
import os
import libpysal as ps
from time import time

PERMS = 9999

from libpysal.examples import get_path, load_example

## Data

In [2]:
%%time
from geosnap.io import store_census
store_census()

Copying: 0.00B [00:00, ?B/s]
Copying: 0.00B [00:00, ?B/s]

CPU times: user 904 ms, sys: 165 ms, total: 1.07 s
Wall time: 10 s





- Counties

In [3]:
_ = load_example("NCOVR")

counties = geopandas.read_file(get_path("NAT.shp"))
w_counties = ps.weights.Queen.from_dataframe(counties)

- Tracts

In [None]:
%%time
tracts = datasets.tracts_2010()\
                 [["geoid", "median_household_income", "geometry"]]\
                 .dropna()

In [None]:
%%time
w_tracts = ps.weights.Queen.from_dataframe(tracts)

- Blocks

## Setup

In [20]:
def workbench(db, y_name, w, perms=PERMS, runs=5, save_last=False):
    print(f"Workbench working on a {os.cpu_count()}-core machine")
    times = {
        "No Numba": [],
        "Numba - Single Thread": [],
        "Numba - Multi Thread": [],
        "PyGeoda": [],
    }
    # Ensure function is compiled
    one = esda.Moran_Local(db[y_name], 
                           w, 
                           permutations=1,
                           numba=True,
                           n_jobs=1
                          )
    one = esda.Moran_Local(db[y_name], 
                           w, 
                           permutations=1,
                           numba=True,
                           n_jobs=-1
                          )
    # PyGeoda setup
    db.to_file("tmp")
    db_geoda = pygeoda.open("tmp/tmp.shp")
    y_geoda = db_geoda.GetIntegerCol(y_name)
    w_geoda = pygeoda.weights.queen(db_geoda)
    for i in range(runs):
        # Pre-numba
        t0 = time()
        old = esda.Moran_Local(db[y_name], 
                               w, 
                               permutations=perms
                              )
        t1 = time()
        times["No Numba"].append(t1-t0)
        # Numba single thread

        t0 = time()
        one = esda.Moran_Local(db[y_name], 
                               w, 
                               permutations=perms,
                               numba=True,
                               n_jobs=1
                              )
        t1 = time()
        times["Numba - Single Thread"].append(t1-t0)
        # Numba multi thread
        t0 = time()
        mul = esda.Moran_Local(db[y_name], 
                               w, 
                               permutations=perms,
                               numba=True,
                               n_jobs=-1
                              )
        t1 = time()
        times["Numba - Multi Thread"].append(t1-t0)
        # PyGeoDa
        lisa_geoda = pygeoda.local_moran(w_geoda, y_geoda)
        lisa_geoda.SetPermutations(perms)
        t0 = time()
        lisa_geoda.Run()
        t1 = time()
        times["PyGeoda"].append(t1-t0)
    if save_last:
        pvals = pandas.DataFrame({
            "No Numba": old.p_sim,
            "Numba - Single Thread": one.p_sim,
            "Numba - Multi Thread": mul.p_sim
        })
    else:
        pvals = None
    os.system("rm -rf tmp")
    return pandas.DataFrame(times), pvals

## All counties

In [21]:
%%time
res_counties = workbench(counties, 
                         "HR70", 
                         w_counties, 
                         perms=99, 
                         runs=7,
                         save_last=True
                        )
timing_counties, pvals_counties = res_counties

Workbench working on a 2-core machine
CPU times: user 33.8 s, sys: 227 ms, total: 34 s
Wall time: 29.6 s


In [22]:
timing_counties.mean()

No Numba                 1.009174
Numba - Single Thread    0.126301
Numba - Multi Thread     0.110175
PyGeoda                  0.068683
dtype: float64

## All tracts

## All blocks