In [1]:
import atomtypes2 as atomtypes
import MDAnalysis as mda
import numpy as np
import pandas as pd

In [3]:
u = mda.Universe('big.gro')

In [4]:
oldag = u.atoms
master = atomtypes.convert(u.atoms)

# The size of AtomGroup to test
natoms = 250000
idx = np.random.randint(0, len(u.atoms), natoms)

oldag = u.atoms[idx]
newag = master[idx]

In [5]:
oldag_full = u.atoms
newag_full = atomtypes.convert(u.atoms)

In [6]:
# since the processor cache will affect the speeds of repeated access,
# we want to take only the cached calls and leave off the slow ones for our
# timing runs so we can at least compare consistently
percentile = .75

# Let the games begin 

## Let's get some attributes

Fetch the names for our atomgroups

In [7]:
a_new = %timeit -n1 -r50 -o newag.names()
s_new = pd.Series(a_new.all_runs)
t_new = s_new[s_new < s_new.quantile(percentile)].mean()

a_old = %timeit -n1 -r50 -o oldag.names()
s_old = pd.Series(a_old.all_runs)
t_old = s_old[s_old < s_old.quantile(percentile)].mean()

print "New style took: {}".format(t_new)
print "Old style took: {}".format(t_old)
print ""
print "Speed up of new: {}".format(t_old / t_new)

1 loops, best of 50: 7.57 ms per loop
1 loops, best of 50: 173 ms per loop
New style took: 0.00763145652977
Old style took: 0.174694370579

Speed up of new: 22.8913536882


Fetch the charges for our atomgroups

In [8]:
a_new = %timeit -n1 -r50 -o newag.charges()
s_new = pd.Series(a_new.all_runs)
t_new = s_new[s_new < s_new.quantile(percentile)].mean()

a_old = %timeit -n1 -r50 -o oldag.charges()
s_old = pd.Series(a_old.all_runs)
t_old = s_old[s_old < s_old.quantile(percentile)].mean()

print "New style took: {}".format(t_new)
print "Old style took: {}".format(t_old)
print ""
print "Speed up of new: {}".format(t_old / t_new)

1 loops, best of 50: 4.96 ms per loop
1 loops, best of 50: 74.7 ms per loop
New style took: 0.00501285372554
Old style took: 0.0753132394842

Speed up of new: 15.0240249582


## Let's set some attributes

In [9]:
charges = np.random.random(len(oldag))

a_new = %timeit -n1 -r50 -o newag.set_charges(charges)
s_new = pd.Series(a_new.all_runs)
t_new = s_new[s_new < s_new.quantile(percentile)].mean()

a_old = %timeit -n1 -r50 -o oldag.set_charge(charges)
s_old = pd.Series(a_old.all_runs)
t_old = s_old[s_old < s_old.quantile(percentile)].mean()

print "New style took: {}".format(t_new)
print "Old style took: {}".format(t_old)
print ""
print "Speed up of new: {}".format(t_old / t_new)

1 loops, best of 50: 3.99 ms per loop
1 loops, best of 50: 151 ms per loop
New style took: 0.00402729575698
Old style took: 0.158945309149

Speed up of new: 39.4670068305


## Let's try some fancy indexing

In [10]:
idx2 = np.random.randint(0, len(oldag), size=25000)

a_new = %timeit -n1 -r50 -o newag[idx2]
s_new = pd.Series(a_new.all_runs)
t_new = s_new[s_new < s_new.quantile(percentile)].mean()

a_old = %timeit -n1 -r50 -o oldag[idx2]
s_old = pd.Series(a_old.all_runs)
t_old = s_old[s_old < s_old.quantile(percentile)].mean()

print "New style took: {}".format(t_new)
print "Old style took: {}".format(t_old)
print ""
print "Speed up of new: {}".format(t_old / t_new)

The slowest run took 9.00 times longer than the fastest. This could mean that an intermediate result is being cached 
1 loops, best of 50: 102 µs per loop
1 loops, best of 50: 5.79 ms per loop
New style took: 0.000199026531643
Old style took: 0.00595336347013

Speed up of new: 29.9124112799


What about fancy indexing the full set of atoms? Does the relative speedup depend on system size?

In [11]:
idx2 = np.random.randint(0, len(oldag_full), size=25000)

a_new = %timeit -n1 -r50 -o newag_full[idx2].names()
s_new = pd.Series(a_new.all_runs)
t_new = s_new[s_new < s_new.quantile(percentile)].mean()

a_old = %timeit -n1 -r50 -o oldag_full[idx2].names()
s_old = pd.Series(a_old.all_runs)
t_old = s_old[s_old < s_old.quantile(percentile)].mean()

print "New style took: {}".format(t_new)
print "Old style took: {}".format(t_old)
print ""
print "Speed up of new: {}".format(t_old / t_new)

1 loops, best of 50: 1.05 ms per loop
1 loops, best of 50: 22.7 ms per loop
New style took: 0.00110406488986
Old style took: 0.0233755240569

Speed up of new: 21.1722374941


## Making selections based on names

Pull out all atoms called 'OW' from the atomgroup

In [12]:
a_new = %timeit -n1 -r50 -o newag[newag.names() == 'OW']
s_new = pd.Series(a_new.all_runs)
t_new = s_new[s_new < s_new.quantile(percentile)].mean()

a_old = %timeit -n1 -r50 -o oldag[oldag.names() == 'OW']
s_old = pd.Series(a_old.all_runs)
t_old = s_old[s_old < s_old.quantile(percentile)].mean()

print "New style took :{}".format(t_new)
print "Old style took :{}".format(t_old)
print ""
print "Speed up of new: {}".format(t_old/t_new)

1 loops, best of 50: 12.6 ms per loop
1 loops, best of 50: 198 ms per loop
New style took :0.0126413976824
Old style took :0.198858480196

Speed up of new: 15.7307352551
