# N Body BLAS

An attempt at implementing n body acceleration calculations using BLAS

In [10]:
import numpy as np
import scipy.linalg
import timeit
import numba

## Simulation Constants

In [2]:
# Mass definitions
n = 10**3               # Number of bodies in disk
mE = 5.9722 * (10**24)  # Mass of Earth
rE = 6371.0 * (10**3)   # Radius of Earth
mDisk = 90              # Earth masses in disk
mass = (mE * mDisk) / n # Mass of single body

# Simulation constants
G = 6.67430 * (10**-11) # Gravitational constant
E = 1 * (10**-5)        # Softening factor (m), smaller when average distances are small

In [3]:
rng = np.random.default_rng()

masses = np.array([mass] * n)
velocities = rng.standard_normal((n, 3))
positions = rng.standard_normal((n, 3))

## Accelerations

In [11]:
# From github repo below, with minor modifications

@numba.jit()
def acc1(positions, masses, G, E):
    n = masses.size
    x = positions[:, 0:1]
    y = positions[:, 1:2]
    z = positions[:, 2:3]

    dx = x.T - x
    dy = y.T - y
    dz = z.T - z

    rInvCubed = (dx**2 + dy**2 + dz**2 + E**2)**(-1.5)

    ax = G * (dx * rInvCubed) @ masses
    ay = G * (dy * rInvCubed) @ masses
    az = G * (dz * rInvCubed) @ masses

    a = np.vstack((ax, ay, az)).T
    
    return a

In [14]:
n = masses.size
x = positions[:, 0:1]
y = positions[:, 1:2]
z = positions[:, 2:3]

dx = x.T - x
dy = y.T - y
dz = z.T - z

rInvCubed = (dx**2 + dy**2 + dz**2 + E**2)**(-1.5)

ax = G * (dx * rInvCubed) @ masses
ay = G * (dy * rInvCubed) @ masses
az = G * (dz * rInvCubed) @ masses

a = np.vstack((ax, ay, az)).T

In [16]:
dx.shape

(1000, 1000)

In [41]:
%timeit dx @ dx

21.3 ms ± 1.16 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [42]:
%timeit scipy.linalg.blas.sgemm(1, dx, dx)

20.7 ms ± 981 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [43]:
%timeit (dx*dx) @ masses

4.87 ms ± 69.3 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [44]:
%timeit scipy.linalg.blas.ssymm(1, dx*dx, masses)

9.33 ms ± 87.2 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [54]:
%timeit (dx * rInvCubed) @ masses

4.92 ms ± 168 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [55]:
%timeit scipy.linalg.blas.sgemm(1, scipy.linalg.blas.ssymm(1, rInvCubed, dx), masses)

20.7 ms ± 881 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [58]:
(rInvCubed * dx) == scipy.linalg.blas.ssymm(1, rInvCubed, dx)

array([[False, False, False, ..., False, False, False],
       [False, False, False, ..., False, False, False],
       [False, False, False, ..., False, False, False],
       ...,
       [False, False, False, ..., False, False, False],
       [False, False, False, ..., False, False, False],
       [False, False, False, ..., False, False, False]])

In [52]:
dx.T == -dx

array([[ True,  True,  True, ...,  True,  True,  True],
       [ True,  True,  True, ...,  True,  True,  True],
       [ True,  True,  True, ...,  True,  True,  True],
       ...,
       [ True,  True,  True, ...,  True,  True,  True],
       [ True,  True,  True, ...,  True,  True,  True],
       [ True,  True,  True, ...,  True,  True,  True]])

In [36]:
dx

array([[ 0.        , -0.62986302,  1.64152867, ...,  0.06090329,
         1.12779764, -0.25096639],
       [ 0.62986302,  0.        ,  2.2713917 , ...,  0.69076632,
         1.75766066,  0.37889663],
       [-1.64152867, -2.2713917 ,  0.        , ..., -1.58062538,
        -0.51373104, -1.89249507],
       ...,
       [-0.06090329, -0.69076632,  1.58062538, ...,  0.        ,
         1.06689435, -0.31186968],
       [-1.12779764, -1.75766066,  0.51373104, ..., -1.06689435,
         0.        , -1.37876403],
       [ 0.25096639, -0.37889663,  1.89249507, ...,  0.31186968,
         1.37876403,  0.        ]])

In [38]:
scipy.linalg.blas.dsyrk(1, dx)

array([[1044.58996086, 1017.43347697, 1115.36431225, ..., 1047.21580042,
        1093.21484783, 1033.76956889],
       [   0.        , 1387.00442257,   54.26961259, ...,  981.69858544,
         355.70033335, 1164.68753641],
       [   0.        ,    0.        , 3880.75505409, ..., 1217.96465043,
        3015.30135995,  692.57538949],
       ...,
       [   0.        ,    0.        ,    0.        , ..., 1053.55085083,
        1164.5272751 , 1021.11072916],
       [   0.        ,    0.        ,    0.        , ...,    0.        ,
        2413.767245  ,  799.35515067],
       [   0.        ,    0.        ,    0.        , ...,    0.        ,
           0.        , 1085.93330741]])

In [13]:
def acc1blas(positions, masses, G, E):
    pass

## Performance Comparison

In [12]:
%timeit acc1(positions, masses, G, E)

89.7 ms ± 14.6 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
