In [1]:
import os
import time
import threading
import multiprocessing
 
NUM_WORKERS = 4
 
def only_sleep():
    """ Do nothing, wait for a timer to expire """
    print("PID: %s, Process Name: %s, Thread Name: %s" % (
        os.getpid(),
        multiprocessing.current_process().name,
        threading.current_thread().name)
    )
    time.sleep(1)
 
 
def crunch_numbers():
    """ Do some computations """
    print("PID: %s, Process Name: %s, Thread Name: %s" % (
        os.getpid(),
        multiprocessing.current_process().name,
        threading.current_thread().name)
    )
    x = 0
    while x < 10000000:
        x += 1

In [2]:
## Run tasks serially
start_time = time.time()
for _ in range(NUM_WORKERS):
    only_sleep()
end_time = time.time()
print("Serial time=", end_time - start_time)

PID: 30642, Process Name: MainProcess, Thread Name: MainThread
PID: 30642, Process Name: MainProcess, Thread Name: MainThread
PID: 30642, Process Name: MainProcess, Thread Name: MainThread
PID: 30642, Process Name: MainProcess, Thread Name: MainThread
Serial time= 4.005331039428711


In [4]:
# Run tasks using threads
start_time = time.time()
threads = [threading.Thread(target=only_sleep) for _ in range(NUM_WORKERS)]
[thread.start() for thread in threads]
[thread.join() for thread in threads]
end_time = time.time()
 
print("Threads time=", end_time - start_time)

PID: 27866, Process Name: MainProcess, Thread Name: Thread-4
PID: 27866, Process Name: MainProcess, Thread Name: Thread-5
PID: 27866, Process Name: MainProcess, Thread Name: Thread-6
PID: 27866, Process Name: MainProcess, Thread Name: Thread-7
Threads time= 1.0042507648468018


In [7]:
# Run tasks using processes
start_time = time.time()
processes = [multiprocessing.Process(target=only_sleep) for _ in range(NUM_WORKERS)]
[process.start() for process in processes]
[process.join() for process in processes]
end_time = time.time()
 
print("Parallel time=", end_time - start_time)

PID: 28139, Process Name: Process-6, Thread Name: MainThread
PID: 28140, Process Name: Process-7, Thread Name: MainThread
PID: 28142, Process Name: Process-8, Thread Name: MainThread
PID: 28138, Process Name: Process-5, Thread Name: MainThread
Parallel time= 1.0349993705749512


In [9]:
start_time = time.time()
for _ in range(NUM_WORKERS):
    crunch_numbers()
end_time = time.time()
 
print("Serial time=", end_time - start_time)
print()

start_time = time.time()
threads = [threading.Thread(target=crunch_numbers) for _ in range(NUM_WORKERS)]
[thread.start() for thread in threads]
[thread.join() for thread in threads]
end_time = time.time()
 
print("Threads time=", end_time - start_time)
print() 
 
start_time = time.time()
processes = [multiprocessing.Process(target=crunch_numbers) for _ in range(NUM_WORKERS)]
[process.start() for process in processes]
[process.join() for process in processes]
end_time = time.time()
 
print("Parallel time=", end_time - start_time)
print()

PID: 27866, Process Name: MainProcess, Thread Name: MainThread
PID: 27866, Process Name: MainProcess, Thread Name: MainThread
PID: 27866, Process Name: MainProcess, Thread Name: MainThread
PID: 27866, Process Name: MainProcess, Thread Name: MainThread
Serial time= 1.8378472328186035

PID: 27866, Process Name: MainProcess, Thread Name: Thread-12
PID: 27866, Process Name: MainProcess, Thread Name: Thread-13
PID: 27866, Process Name: MainProcess, Thread Name: Thread-14
PID: 27866, Process Name: MainProcess, Thread Name: Thread-15
Threads time= 2.7522053718566895

PID: 28260, Process Name: Process-15, Thread Name: MainThread
PID: 28262, Process Name: Process-16, Thread Name: MainThread
PID: 28257, Process Name: Process-13, Thread Name: MainThread
PID: 28258, Process Name: Process-14, Thread Name: MainThread
Parallel time= 0.6293623447418213



In [2]:
import numpy as np
import mdtraj as md
import matplotlib.pyplot as plt
import queue
traj = {}

In [43]:
start_time = time.time()
#Only the new ones
for nmolec in (28,14):
    traj[nmolec]=md.load('/home/ramon/perdiu51/ariadna/{}/prod_140/output.xtc'.format(nmolec), 
            top='/home/ramon/perdiu51/ariadna/{}/prod_140/structure.pdb'.format(nmolec),
                        stride=5)
    # This is slow
    #traj[nmolec] = traj[nmolec].remove_solvent()
    #This is faster and also removes hydrogens
    protein_atoms = traj[nmolec].top.select("protein and not type H")
    traj[nmolec] = traj[nmolec].atom_slice(protein_atoms)

for nmolec in (56,113):
    traj[nmolec]=md.load('/home/ramon/perdiu50/polyQ/{}/prod_140/output.xtc'.format(nmolec), 
            top='/home/ramon/perdiu50/polyQ/{}/prod_140/structure.pdb'.format(nmolec),
                        stride=5)
    # This is slow
    #traj[nmolec] = traj[nmolec].remove_solvent()
    #This is faster and also removes hydrogens
    protein_atoms = traj[nmolec].top.select("protein and not type H")
    traj[nmolec] = traj[nmolec].atom_slice(protein_atoms)
end_time = time.time()
print("Serial time=", end_time - start_time)
traj

Serial time= 132.1322774887085


{14: <mdtraj.Trajectory with 36 frames, 4368 atoms, 518 residues, and unitcells at 0x7f1649239ef0>,
 28: <mdtraj.Trajectory with 32 frames, 8731 atoms, 1036 residues, and unitcells at 0x7f163eb6c828>,
 56: <mdtraj.Trajectory with 185 frames, 17462 atoms, 2072 residues, and unitcells at 0x7f163c4b4978>,
 113: <mdtraj.Trajectory with 203 frames, 35251 atoms, 4181 residues, and unitcells at 0x7f16afb36128>}

In [23]:
def load_traj(dirname, nmol, queue):
    """
    Load a trajectory and add it to a queue (so that it can run concurrently)
    """
    print("PID: %s, Process Name: %s, Thread Name: %s" % (
    os.getpid(),
    multiprocessing.current_process().name,
    threading.current_thread().name))

    t = md.load(dirname+'/{}/prod_140/output.xtc'.format(nmol),
            top=dirname+'/{}/prod_140/structure.pdb'.format(nmol), stride=5)
    protein_atoms = t.top.select("protein and not type H")
    t = t.atom_slice(protein_atoms)

    q.put({nmol:t})
    


In [None]:
traj = {}
q = queue.Queue()
threads= []
start_time = time.time()
threads.append(threading.Thread(target=load_traj, 
                 args=('/home/ramon/perdiu51/ariadna/',28, q)))
threads.append(threading.Thread(target=load_traj, 
                 args=('/home/ramon/perdiu51/ariadna/',14, q)))
threads.append(threading.Thread(target=load_traj, 
                 args=('/home/ramon/perdiu50/polyQ/',56, q)))
threads.append(threading.Thread(target=load_traj, 
                 args=('/home/ramon/perdiu50/polyQ/',113, q)))
[thread.start() for thread in threads]
[thread.join() for thread in threads]
end_time = time.time()
print("Threaded time=", end_time - start_time)
while q:
    traj.update(q.get())
traj

In [10]:
traj = {}
q = multiprocessing.Queue()
processes= []
start_time = time.time()

processes.append(multiprocessing.Process(target=load_traj, 
                 args=('/home/ramon/perdiu51/ariadna/',28, q)))
processes.append(multiprocessing.Process(target=load_traj, 
                 args=('/home/ramon/perdiu51/ariadna/',14, q)))
processes.append(multiprocessing.Process(target=load_traj, 
                 args=('/home/ramon/perdiu50/polyQ/',56, q)))
processes.append(multiprocessing.Process(target=load_traj, 
                 args=('/home/ramon/perdiu50/polyQ/',113, q)))
[process.start() for process in processes]
[process.join() for process in processes]
end_time = time.time()
print("Processes time=", end_time - start_time)
traj.update(q.get())
traj

PID: 30857, Process Name: Process-1, Thread Name: MainThread
PID: 30860, Process Name: Process-2, Thread Name: MainThread
PID: 30863, Process Name: Process-3, Thread Name: MainThread
PID: 30866, Process Name: Process-4, Thread Name: MainThread


Process Process-3:
Process Process-4:
Process Process-2:
Process Process-1:
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
  File "/home/ramon/anaconda3/lib/python3.6/multiprocessing/process.py", line 261, in _bootstrap
    util._exit_function()
  File "/home/ramon/anaconda3/lib/python3.6/multiprocessing/process.py", line 261, in _bootstrap
    util._exit_function()
  File "/home/ramon/anaconda3/lib/python3.6/multiprocessing/process.py", line 261, in _bootstrap
    util._exit_function()
  File "/home/ramon/anaconda3/lib/python3.6/multiprocessing/process.py", line 261, in _bootstrap
    util._exit_function()
  File "/home/ramon/anaconda3/lib/python3.6/multiprocessing/util.py", line 322, in _exit_function
    _run_finalizers()
  File "/home/ramon/anaconda3/lib/python3.6/multiprocessing/util.py", line 322, in _exit_function
    _run_finalizers()
  File "/home/ramon/anaconda3/lib/python3.6/multipro

KeyboardInterrupt: 

In [5]:
traj = {}
start_time = time.time()
argslist = [('/home/ramon/perdiu51/ariadna/',28),
           ('/home/ramon/perdiu51/ariadna/',14),
           ('/home/ramon/perdiu50/polyQ/',56),
           ('/home/ramon/perdiu50/polyQ/',113)]
with multiprocessing.Pool(processes=4) as pool:
    results = pool.map_async(load_traj2, argslist)

while results:
    traj.update(results.get())
end_time = time.time()
print("Processes time=", end_time - start_time)
traj

KeyboardInterrupt: 

In [11]:
traj = {}
start_time = time.time()
argslist = [('/home/ramon/perdiu51/ariadna/',28),
           ('/home/ramon/perdiu51/ariadna/',14),
           ('/home/ramon/perdiu50/polyQ/',56),
           ('/home/ramon/perdiu50/polyQ/',113)]
with multiprocessing.pool.ThreadPool(processes=1) as pool:
    results = pool.map_async(load_traj2, argslist)

while results:
    traj.update(results.get())
end_time = time.time()
print("Processes time=", end_time - start_time)
traj

KeyboardInterrupt: 

In [6]:
import concurrent.futures

In [38]:
traj = {}
start_time = time.time()
argslist = [('/home/ramon/perdiu51/ariadna/',28),
           ('/home/ramon/perdiu51/ariadna/',14),
           ('/home/ramon/perdiu50/polyQ/',56),
           ('/home/ramon/perdiu50/polyQ/',113)]
with concurrent.futures.ThreadPoolExecutor(max_workers=4) as executor:
    futures = {executor.submit(load_traj2, *val) for val in argslist}
    #concurrent.futures.wait(futures)

[traj.update(f.result()) for f in futures]
end_time = time.time()
print("Processes time=", end_time - start_time)
traj

PID: 31427, Process Name: MainProcess, Thread Name: ThreadPoolExecutor-9_0
PID: 31427, Process Name: MainProcess, Thread Name: ThreadPoolExecutor-9_1
PID: 31427, Process Name: MainProcess, Thread Name: ThreadPoolExecutor-9_2
PID: 31427, Process Name: MainProcess, Thread Name: ThreadPoolExecutor-9_3
Processes time= 183.9627230167389


{14: <mdtraj.Trajectory with 36 frames, 4368 atoms, 518 residues, and unitcells at 0x7f16300446a0>,
 28: <mdtraj.Trajectory with 32 frames, 8731 atoms, 1036 residues, and unitcells at 0x7f16608707b8>,
 56: <mdtraj.Trajectory with 185 frames, 17462 atoms, 2072 residues, and unitcells at 0x7f15e7a0ab38>,
 113: <mdtraj.Trajectory with 203 frames, 35251 atoms, 4181 residues, and unitcells at 0x7f16a0ab9828>}

In [39]:
traj = {}
start_time = time.time()
argslist = [('/home/ramon/perdiu51/ariadna/',28),
           ('/home/ramon/perdiu51/ariadna/',14),
           ('/home/ramon/perdiu50/polyQ/',56),
           ('/home/ramon/perdiu50/polyQ/',113)]
with concurrent.futures.ThreadPoolExecutor(max_workers=4) as executor:
    futures = {executor.submit(load_traj2, *val) for val in argslist}
    concurrent.futures.wait(futures)

[traj.update(f.result()) for f in futures]
end_time = time.time()
print("Processes time=", end_time - start_time)
traj

PID: 31427, Process Name: MainProcess, Thread Name: ThreadPoolExecutor-10_0
PID: 31427, Process Name: MainProcess, Thread Name: ThreadPoolExecutor-10_1
PID: 31427, Process Name: MainProcess, Thread Name: ThreadPoolExecutor-10_2
PID: 31427, Process Name: MainProcess, Thread Name: ThreadPoolExecutor-10_3
Processes time= 179.27884244918823


{14: <mdtraj.Trajectory with 36 frames, 4368 atoms, 518 residues, and unitcells at 0x7f16453bc630>,
 28: <mdtraj.Trajectory with 32 frames, 8731 atoms, 1036 residues, and unitcells at 0x7f166b76dcc0>,
 56: <mdtraj.Trajectory with 185 frames, 17462 atoms, 2072 residues, and unitcells at 0x7f16468f05f8>,
 113: <mdtraj.Trajectory with 203 frames, 35251 atoms, 4181 residues, and unitcells at 0x7f16468f0668>}

In [40]:
traj = {}
start_time = time.time()
argslist = [('/home/ramon/perdiu51/ariadna/',28),
           ('/home/ramon/perdiu51/ariadna/',14),
           ('/home/ramon/perdiu50/polyQ/',56),
           ('/home/ramon/perdiu50/polyQ/',113)]
with concurrent.futures.ProcessPoolExecutor(max_workers=4) as executor:
    futures = {executor.submit(load_traj2, *val) for val in argslist}
    #concurrent.futures.wait(futures)

[traj.update(f.result()) for f in futures]
end_time = time.time()
print("Processes time=", end_time - start_time)
traj

PID: 32360, Process Name: Process-5, Thread Name: MainThread
PID: 32362, Process Name: Process-7, Thread Name: MainThread
PID: 32361, Process Name: Process-6, Thread Name: MainThread
PID: 32363, Process Name: Process-8, Thread Name: MainThread
Processes time= 59.400649547576904


{14: <mdtraj.Trajectory with 36 frames, 4368 atoms, 518 residues, and unitcells at 0x7f1642d9ef28>,
 28: <mdtraj.Trajectory with 32 frames, 8731 atoms, 1036 residues, and unitcells at 0x7f1607cfdd68>,
 56: <mdtraj.Trajectory with 185 frames, 17462 atoms, 2072 residues, and unitcells at 0x7f15ebb659e8>,
 113: <mdtraj.Trajectory with 203 frames, 35251 atoms, 4181 residues, and unitcells at 0x7f1607cfdf28>}

In [41]:
traj = {}
start_time = time.time()
argslist = [('/home/ramon/perdiu51/ariadna/',28),
           ('/home/ramon/perdiu51/ariadna/',14),
           ('/home/ramon/perdiu50/polyQ/',56),
           ('/home/ramon/perdiu50/polyQ/',113)]
with concurrent.futures.ProcessPoolExecutor(max_workers=4) as executor:
    futures = {executor.submit(load_traj2, *val) for val in argslist}
    concurrent.futures.wait(futures)

[traj.update(f.result()) for f in futures]
end_time = time.time()
print("Processes time=", end_time - start_time)
traj

PID: 32391, Process Name: Process-9, Thread Name: MainThread
PID: 32394, Process Name: Process-12, Thread Name: MainThread
PID: 32392, Process Name: Process-10, Thread Name: MainThread
PID: 32393, Process Name: Process-11, Thread Name: MainThread
Processes time= 61.89566206932068


{14: <mdtraj.Trajectory with 36 frames, 4368 atoms, 518 residues, and unitcells at 0x7f1609573b00>,
 28: <mdtraj.Trajectory with 32 frames, 8731 atoms, 1036 residues, and unitcells at 0x7f1641dea4e0>,
 56: <mdtraj.Trajectory with 185 frames, 17462 atoms, 2072 residues, and unitcells at 0x7f1667ead940>,
 113: <mdtraj.Trajectory with 203 frames, 35251 atoms, 4181 residues, and unitcells at 0x7f1667eadc18>}

In [42]:
4+4

8