In [1]:
from lennard_jones_potential import *
from multiprocessing import Pool
import numpy as np
import time

## Parallel calculation of the LJ energy for 2000 points using 1, 2, 4 CPUs.

In a first attempt we are using the same algorithm we used calculating the energy in serial.
Only ecaluation of `v_LJ(d)` is executed in parallel.

**From: `LJ_multi_1.py`:**

In [2]:
def main_1():
    "Calculate LJ energy of 2000 points using 1, 2, 4 CPUs."

    n_points = 2000

    for n_CPUs in [1, 2, 4]:

        print("Parallel execution of {} points using {} processes".format(n_points, n_CPUs))
        start = time.time()                 # start timer

        coords = generate_coodinates(n_points, 3, upper=100, seed=5)

        distances = calc_distances(coords)

        pool = Pool(n_CPUs)                 # create multiprocessing pool
        results = pool.map(v_LJ, distances) # calculate results IN PARALLEL
        pool.close()                        # stop pool (free memory)
        
        v_total = np.sum(results)           # sum up total
        print("v_Total: {:.2f}".format(v_total))# print result

        stop = time.time()                  # stop timer
        print("Total Runtime: {:.3f} seconds".format(stop-start))
        print(20*"-"+"\n")



main_1()

Parallel execution of 2000 points using 1 processes
v_Total: 23630468668.50
Total Runtime: 26.584 seconds
--------------------

Parallel execution of 2000 points using 2 processes
v_Total: 23630468668.50
Total Runtime: 27.638 seconds
--------------------

Parallel execution of 2000 points using 4 processes
v_Total: 23630468668.50
Total Runtime: 24.943 seconds
--------------------



----

### A poor man's Profiler: ;-)

We not measure the time for the following steps individually:

* generating coordinates
* calculating distances
* calculating LJ energies


**From: `LJ_multi_2.py`:**

In [3]:
def main_2():
    "Same as before but with separate time-keeping."

    n_points = 2000

    for n_CPUs in [1, 2, 4]:
        print("Start LJ-Potential on {} points using {} processes:".format(n_points, n_CPUs))

        print("Generating coordinates...", end='', flush=True)          ####
        start = time.time()
        coords  = generate_coodinates(n_points, 3, upper=100, seed=5)
        stop = time.time()
        print("done!\t\tRuntime: {:6.3f} seconds".format(stop-start))

        print("Calculating distances...", end='', flush=True)
        start = time.time()
        distances   = calc_distances(coords)
        stop = time.time()
        print("done!\t\tRuntime: {:6.3f} seconds".format(stop-start))   ####

        print("Calculating LJ on {} CPUs ...".format(n_CPUs), end='', flush=True)
        start = time.time()
        pool = Pool(n_CPUs)                 # create multiprocessing pool
        results = pool.map(v_LJ, distances) # calculate results IN PARALLEL
        pool.close()
        stop = time.time()
        print("done!\tRuntime: {:6.3f} seconds".format(stop-start))

        v_total = np.sum(results)           # sum up total

        print("v_Total: {:.2f}".format(v_total))# print result
        print(20*"-"+"\n")

main_2()

Start LJ-Potential on 2000 points using 1 processes:
Generating coordinates...done!		Runtime:  0.002 seconds
Calculating distances...done!		Runtime: 11.148 seconds
Calculating LJ on 1 CPUs ...done!	Runtime: 14.544 seconds
v_Total: 23630468668.50
--------------------

Start LJ-Potential on 2000 points using 2 processes:
Generating coordinates...done!		Runtime:  0.000 seconds
Calculating distances...done!		Runtime: 11.380 seconds
Calculating LJ on 2 CPUs ...done!	Runtime: 12.770 seconds
v_Total: 23630468668.50
--------------------

Start LJ-Potential on 2000 points using 4 processes:
Generating coordinates...done!		Runtime:  0.000 seconds
Calculating distances...done!		Runtime: 11.392 seconds
Calculating LJ on 4 CPUs ...done!	Runtime: 12.197 seconds
v_Total: 23630468668.50
--------------------



----

### There is no need to calculate the distances three times...

Instead of generating the coordinates for each number of CPUs and
re-calculating the distances, we can pull them put of the loop:

**From: `LJ_multi_3.py`:**

In [4]:
def main_3():
    "Same as before but calculate distances only once (using single CPU)."

    n_points = 2000

    print("Generating coordinates...", end='', flush=True)
    start = time.time()
    coords  = generate_coodinates(n_points, 3, upper=100, seed=5)
    stop = time.time()
    print("done!\t\tRuntime: {:6.3f} seconds".format(stop-start))

    print("Calculating distances...", end='', flush=True)
    start = time.time()
    distances   = calc_distances(coords)
    stop = time.time()
    print("done!\t\tRuntime: {:6.3f} seconds".format(stop-start))
    print(20*"-"+"\n")

    for n_CPUs in [1, 2, 4]:
        print("Start LJ-Potential on {} points using {} processes:".format(n_points, n_CPUs))

        print("Calculating LJ on {} CPUs ...".format(n_CPUs), end='', flush=True)
        start = time.time()
        pool = Pool(n_CPUs)                 # create multiprocessing pool
        results = pool.map(v_LJ, distances) # calculate results IN PARALLEL
        pool.close()
        stop = time.time()
        print("done!\tRuntime: {:6.3f} seconds".format(stop-start))

        v_total = np.sum(results)           # sum up total

        print("v_Total: {:.2f}".format(v_total))# print result
        print(20*"-"+"\n")


main_3()

Generating coordinates...done!		Runtime:  0.000 seconds
Calculating distances...done!		Runtime: 11.319 seconds
--------------------

Start LJ-Potential on 2000 points using 1 processes:
Calculating LJ on 1 CPUs ...done!	Runtime: 15.330 seconds
v_Total: 23630468668.50
--------------------

Start LJ-Potential on 2000 points using 2 processes:
Calculating LJ on 2 CPUs ...done!	Runtime: 14.021 seconds
v_Total: 23630468668.50
--------------------

Start LJ-Potential on 2000 points using 4 processes:
Calculating LJ on 4 CPUs ...done!	Runtime: 14.836 seconds
v_Total: 23630468668.50
--------------------



----

### Calculating the distances in parallel

Calculating the distances of points takes a significant amount of 
the total run-time and so far this part was only executed in serial.

In order to be able to distribute the distance calculation evenly across
multiple CPUs, we must know the point-pairs, before dispatching the parallel work.

**From `LJ_multi_4.py`:**

In [5]:
def generate_pairs(n):
    """generate unique point pairs (i<j)
    
    returns:: list of tuples with pair indecies
    
    >>> generate_pairs(3)
    [(0, 1), (0, 2), (1, 2)]
    """
    pairs = []
    for i in range(n):
        for j in range(n):
            if j>i:
                pairs.append((i,j))
    return pairs

def calc_pot(args):
    """calculate the distance for a pair i j in coords and return LJ potential
    
    >>> coords = np.array(
      [[  6.25533007e-01,   1.08048674e+00,   1.71562226e-04],
       [  4.53498859e-01,   2.20133836e-01,   1.38507892e-01],
       [  2.79390317e-01,   5.18341091e-01,   5.95151211e-01]])
    >>> args = (0, 1, coords)
    >>> calc_pot(args)
    21402303.167507425
    """

    i, j, coords = args                 # unpack arguments tupel
    r = np.sqrt(np.sum(np.square(np.subtract(coords[i],coords[j]))))
    v = v_LJ(r)

    return v

def main_4():
    "Calculate distances in parallel as well."
    n_points = 2000

    print("Generating coordinates...", end='', flush=True)
    start = time.time()
    coords  = generate_coodinates(n_points, 3, upper=100, seed=5)
    stop = time.time()
    print("done!\t\tRuntime: {:6.3f} seconds".format(stop-start))

    print("Generating pairs...", end='', flush=True)
    start = time.time()
    pairs   = generate_pairs(len(coords))
    stop = time.time()
    print("done!\t\tRuntime: {:6.3f} seconds".format(stop-start))
    print(20*"-"+"\n")

    for n_CPUs in [1, 2, 4]:
        print("Parallel LJ-Potential on {} points using {} processes:".format(n_points, n_CPUs))
        pool = Pool(n_CPUs)

        print("Calculating LJ on {} CPUs ...".format(n_CPUs), end='', flush=True)
        start = time.time()

        tasks = []                              # prepare tasks
        for p in pairs:
            args = (p[0], p[1], coords)         # pack arguments tupel
            tasks.append(args)

        results = pool.map(calc_pot, tasks)     # <-- in parallel
        pool.close()

        stop = time.time()
        print("done!\tRuntime: {:6.3f} seconds".format(stop-start))

        v_total = np.sum(results)               # sum up total
        print("v_Total: {:.2f}".format(v_total))    # print result
        print(20*"-"+"\n")

main_4()

Generating coordinates...done!		Runtime:  0.000 seconds
Generating pairs...done!		Runtime:  0.707 seconds
--------------------

Parallel LJ-Potential on 2000 points using 1 processes:
Calculating LJ on 1 CPUs ...done!	Runtime: 24.739 seconds
v_Total: 23630468668.50
--------------------

Parallel LJ-Potential on 2000 points using 2 processes:
Calculating LJ on 2 CPUs ...done!	Runtime: 16.139 seconds
v_Total: 23630468668.50
--------------------

Parallel LJ-Potential on 2000 points using 4 processes:
Calculating LJ on 4 CPUs ...done!	Runtime: 14.906 seconds
v_Total: 23630468668.50
--------------------



----

### Calculating the distances in parallel

Instead of creating the `tasks` list with a python loop,
we can use list comprehensons.

instead of:

```python
tasks = []                            # prepare tasks
for p in pairs:
    args = (p[0], p[1], coords)       # pack arguments tupel
    tasks.append(args)
```

we can write:

```python
tasks = [ (p[0], p[1], coords) for p in pairs ]
```

**From `LJ_multi_5.py`:**

In [6]:
def main_5():
    "Same as before but using list comprehensions."
    n_points = 2000

    print("Generating coordinates...", end='', flush=True)
    start = time.time()
    coords  = generate_coodinates(n_points, 3, upper=100, seed=5)
    stop = time.time()
    print("done!\t\tRuntime: {:6.3f} seconds".format(stop-start))

    print("Generating pairs...", end='', flush=True)
    start = time.time()
    pairs   = generate_pairs(len(coords))
    stop = time.time()
    print("done!\t\tRuntime: {:6.3f} seconds".format(stop-start))
    print(20*"-"+"\n")

    for n_CPUs in [1, 2, 4]:
        print("Parallel LJ-Potential on {} points using {} processes:".format(n_points, n_CPUs))
        pool = Pool(n_CPUs)

        print("Calculating LJ on {} CPUs ...".format(n_CPUs), end='', flush=True)
        start = time.time()

        tasks = [ (p[0], p[1], coords) for p in pairs]

        results = pool.map(calc_pot, tasks)     # <-- in parallel
        pool.close()

        stop = time.time()
        print("done!\tRuntime: {:6.3f} seconds".format(stop-start))

        v_total = np.sum(results)               # sum up total
        print("v_Total: {:.2f}".format(v_total))    # print result
        print(20*"-"+"\n")

main_5()

Generating coordinates...done!		Runtime:  0.000 seconds
Generating pairs...done!		Runtime:  0.548 seconds
--------------------

Parallel LJ-Potential on 2000 points using 1 processes:
Calculating LJ on 1 CPUs ...done!	Runtime: 23.936 seconds
v_Total: 23630468668.50
--------------------

Parallel LJ-Potential on 2000 points using 2 processes:
Calculating LJ on 2 CPUs ...done!	Runtime: 15.956 seconds
v_Total: 23630468668.50
--------------------

Parallel LJ-Potential on 2000 points using 4 processes:
Calculating LJ on 4 CPUs ...done!	Runtime: 15.565 seconds
v_Total: 23630468668.50
--------------------

