# Threading
* Simple notebook to illustrate several mechanics of threads in python
* A thread is an execution path within a process. 

# Setup

In [1]:
import threading
import time

from threading import Thread
from threading import Lock
from threading import Barrier
from threading import Semaphore

from queue import Queue

from concurrent.futures import ThreadPoolExecutor

# Logging
* Using a logger in this notebook because stdout will get mangled as multiple threads write to it

In [2]:
import logging
import sys
logging.basicConfig(stream=sys.stdout, level=logging.DEBUG)
logger = logging.getLogger()
#logger.setLevel(logging.DEBUG)
logger.debug('Hello')

DEBUG:root:Hello


# Current Thread
* We can get a reference to the running thread and report it's name

In [3]:
print(threading.current_thread().name)

MainThread


# Thread Count

In [4]:
print(threading.active_count())

5


# Enumerating Active Threads

In [5]:
for thread in threading.enumerate():
    print(f'Thread: {thread.name} Id: {thread.ident} Alive: {thread.is_alive()}')

Thread: MainThread Id: 140735275814912 Alive: True
Thread: Thread-2 Id: 123145308631040 Alive: True
Thread: Thread-3 Id: 123145313886208 Alive: True
Thread: IPythonHistorySavingThread Id: 123145320214528 Alive: True
Thread: Thread-1 Id: 123145325469696 Alive: True


# Thread Class
* We override a thread class
* We must provide a run method
* The run method is called when our thread is started

In [6]:
class MyThread(Thread):
    def __init__(self, id): 
        self.id = id
        threading.Thread.__init__(self)
        
    def run(self): 
        logger.debug(f'  Running Thread {self.id}')
        
# We have create a thread object
# It doesn't do anything until we tell it to start
thread1 = MyThread(1)

# We start the thread
logger.debug('Starting Thread')
thread1.start()

# Our main loop will wait here to join the 
# worker thread when it completes and exits
logger.debug('Joining Thread')
thread1.join()

logger.debug('Main completed')

DEBUG:root:Starting Thread
DEBUG:root:  Running Thread 1
DEBUG:root:Joining Thread
DEBUG:root:Main completed


# Threads & Functions
* In addition to subclassing thread we can create a thread to execute an arbitrary function

In [7]:
def work(thread_name, seconds=5):
    logging.debug(f'  Thread {thread_name} is working for {seconds} seconds.')
    time.sleep(seconds)
    logging.debug(f'  Thread {thread_name} is complete.')

## Creating & Joining a Thread

In [8]:
# We have create a thread object
# It doesn't do anything until we tell it to start
logger.debug('Main creating thread')
t = threading.Thread(target=work, args=(1,10))

# We start the thread
logger.debug('Starting thread')
t.start()

# Our main loop will wait here to join the 
# worker thread when it completes and exits
logger.debug('Waiting to complete')
t.join()

logger.debug('Main finished')

DEBUG:root:Main creating thread
DEBUG:root:Starting thread
DEBUG:root:  Thread 1 is working for 10 seconds.
DEBUG:root:Waiting to complete
DEBUG:root:  Thread 1 is complete.
DEBUG:root:Main finished


## Creating Multiple Worker Threads

In [9]:
# We create multiple threads
logger.debug('Main creating threads')
threads = [Thread(target=work, args=(x, 10)) for x in range(10)]

# We start all the threads
logger.debug('Starting all threads')
[t.start() for t in threads]

# We join the completed threads
logger.debug('Waiting for all threads to complete')
[t.join() for t in threads]

logger.debug('Main finished')

DEBUG:root:Main creating threads
DEBUG:root:Starting all threads
DEBUG:root:  Thread 0 is working for 10 seconds.
DEBUG:root:  Thread 1 is working for 10 seconds.
DEBUG:root:  Thread 2 is working for 10 seconds.
DEBUG:root:  Thread 3 is working for 10 seconds.
DEBUG:root:  Thread 4 is working for 10 seconds.
DEBUG:root:  Thread 5 is working for 10 seconds.
DEBUG:root:  Thread 6 is working for 10 seconds.
DEBUG:root:  Thread 7 is working for 10 seconds.
DEBUG:root:  Thread 8 is working for 10 seconds.
DEBUG:root:  Thread 9 is working for 10 seconds.
DEBUG:root:Waiting for all threads to complete
DEBUG:root:  Thread 0 is complete.
DEBUG:root:  Thread 1 is complete.
DEBUG:root:  Thread 2 is complete.
DEBUG:root:  Thread 3 is complete.
DEBUG:root:  Thread 4 is complete.
DEBUG:root:  Thread 5 is complete.
DEBUG:root:  Thread 6 is complete.
DEBUG:root:  Thread 7 is complete.
DEBUG:root:  Thread 8 is complete.
DEBUG:root:  Thread 9 is complete.
DEBUG:root:Main finished


# Using a Thread Pool
* This is a safe and convenieny way to launch threads
* Because we create the pool in a with block, it will join all threads on exit

In [10]:
workers=5
seconds=10

logger.debug('Starting Pool')
with ThreadPoolExecutor(max_workers=workers) as pool:
    pool.map(work, range(workers))
logger.debug('Finishing Pool')

DEBUG:root:Starting Pool
DEBUG:root:  Thread 0 is working for 5 seconds.
DEBUG:root:  Thread 1 is working for 5 seconds.
DEBUG:root:  Thread 2 is working for 5 seconds.
DEBUG:root:  Thread 3 is working for 5 seconds.
DEBUG:root:  Thread 4 is working for 5 seconds.
DEBUG:root:  Thread 0 is complete.
DEBUG:root:  Thread 1 is complete.
DEBUG:root:  Thread 2 is complete.
DEBUG:root:  Thread 3 is complete.
DEBUG:root:  Thread 4 is complete.
DEBUG:root:Finishing Pool


## Passing Multiple Values to Work
* Our work function takes both the thread name (number) & seconds of work
* Use a lambda to wrap the function & unpack our arguments

In [11]:
workers=5
seconds=10

logger.debug('Starting Pool')
with ThreadPoolExecutor(max_workers=workers) as pool:
    args = ((i, seconds) for i in range(workers))
    pool.map(lambda p: work(*p), args)
logger.debug('Finishing Pool')

DEBUG:root:Starting Pool
DEBUG:root:  Thread 0 is working for 10 seconds.
DEBUG:root:  Thread 1 is working for 10 seconds.
DEBUG:root:  Thread 2 is working for 10 seconds.
DEBUG:root:  Thread 3 is working for 10 seconds.
DEBUG:root:  Thread 4 is working for 10 seconds.
DEBUG:root:  Thread 0 is complete.
DEBUG:root:  Thread 1 is complete.
DEBUG:root:  Thread 2 is complete.
DEBUG:root:  Thread 4 is complete.
DEBUG:root:  Thread 3 is complete.
DEBUG:root:Finishing Pool


# Threads & Shared State
* In this simple example:
    * We have some shared state in a class
    * We create three threads 
    * Each thread reads the current value, performs work and reads the current value
    * Finally it updates the current value
* We do not expect a consistency problem in the shared state, meaning:
    * When we expect the value before starting work and after working to be the same

In [12]:
class Shared:
    def __init__(self):
        self.shared = 0
    
    def get(self):
        return self.shared
    
    def increment(self):
        self.shared += 1
        
shared = Shared()

def update(thread_name, seconds=1):
    start = shared.get()
    time.sleep(seconds)
    end = shared.get()
    valid = True if start == end else False
    logging.debug(f'  Thread {thread_name} : Starting {start} Ending {end} ... Valid {valid}' )
    shared.increment()
    
workers = 5
logger.debug('Starting Pool')
with ThreadPoolExecutor(max_workers=workers) as pool:
    pool.map(update, range(workers))
logger.debug('Finishing Pool')

DEBUG:root:Starting Pool
DEBUG:root:  Thread 0 : Starting 0 Ending 0 ... Valid True
DEBUG:root:  Thread 4 : Starting 0 Ending 0 ... Valid True
DEBUG:root:  Thread 2 : Starting 0 Ending 0 ... Valid True
DEBUG:root:  Thread 3 : Starting 0 Ending 1 ... Valid False
DEBUG:root:  Thread 1 : Starting 0 Ending 1 ... Valid False
DEBUG:root:Finishing Pool


## Synchronizing Access
* We fix our shared state class using a Lock
* Requires the coordination of our threads to:
    * Obtain the lock
    * Perform work with the shared state
    * Release the lock
* We'll use a with block to avoid having to explicitly obtain and release

In [13]:
class Shared:
    def __init__(self):
        self.shared = 0
        self.lock = Lock()
    
    def get(self):
        return self.shared
    
    def increment(self):
        self.shared += 1
        
shared = Shared()

def update(thread_name, seconds=1):
    logging.debug(f'  Thread {thread_name} : Trying to obtain lock')
    with shared.lock:
        logging.debug(f'  Thread {thread_name} : Obtained lock')
        start = shared.get()
        time.sleep(seconds)
        end = shared.get()
        valid = True if start == end else False
        logging.debug(f'  Thread {thread_name} : Starting {start} Ending {end} Valid {valid}' )
        shared.increment()
        logging.debug(f'  Thread {thread_name} : Releasing lock')
    
workers = 5
logger.debug('Starting Pool')
with ThreadPoolExecutor(max_workers=workers) as pool:
    pool.map(update, range(workers))
logger.debug('Finishing Pool')

DEBUG:root:Starting Pool
DEBUG:root:  Thread 0 : Trying to obtain lock
DEBUG:root:  Thread 1 : Trying to obtain lock
DEBUG:root:  Thread 0 : Obtained lock
DEBUG:root:  Thread 2 : Trying to obtain lock
DEBUG:root:  Thread 3 : Trying to obtain lock
DEBUG:root:  Thread 4 : Trying to obtain lock
DEBUG:root:  Thread 0 : Starting 0 Ending 0 Valid True
DEBUG:root:  Thread 0 : Releasing lock
DEBUG:root:  Thread 1 : Obtained lock
DEBUG:root:  Thread 1 : Starting 1 Ending 1 Valid True
DEBUG:root:  Thread 1 : Releasing lock
DEBUG:root:  Thread 2 : Obtained lock
DEBUG:root:  Thread 2 : Starting 2 Ending 2 Valid True
DEBUG:root:  Thread 2 : Releasing lock
DEBUG:root:  Thread 3 : Obtained lock
DEBUG:root:  Thread 3 : Starting 3 Ending 3 Valid True
DEBUG:root:  Thread 3 : Releasing lock
DEBUG:root:  Thread 4 : Obtained lock
DEBUG:root:  Thread 4 : Starting 4 Ending 4 Valid True
DEBUG:root:  Thread 4 : Releasing lock
DEBUG:root:Finishing Pool


# Barrier
* A barrier blocks until the number of desired threads wait (or timeout)
* Useful in compiling results on multiple threads

In [15]:
workers = 3
barrier = Barrier(workers + 1)

class MyThread(threading.Thread):
    def __init__(self, id): 
        self.id = id
        Thread.__init__(self)
        
    def run(self): 
        logger.debug(f'  Running Thread {self.id}')
        time.sleep(5)
        logger.debug(f'  Work Completed {self.id}')
        barrier.wait()
        
# We create multiple threads
logger.debug('Main creating threads')
threads = [MyThread(x) for x in range(workers)]

# We start all the threads
logger.debug('Starting all threads')
[t.start() for t in threads]

# Wait for the barrier to be reached
logger.debug('Main Waiting for barrier to be reached')
barrier.wait()
logger.debug('Main Barrier reached')

# We join the completed threads
logger.debug('Joining all threads')
[t.join() for t in threads];

DEBUG:root:Main creating threads
DEBUG:root:Starting all threads
DEBUG:root:  Running Thread 0
DEBUG:root:  Running Thread 1
DEBUG:root:  Running Thread 2
DEBUG:root:Main Waiting for barrier to be reached
DEBUG:root:  Work Completed 0
DEBUG:root:  Work Completed 1
DEBUG:root:  Work Completed 2
DEBUG:root:Main Barrier reached
DEBUG:root:Joining all threads


# Semaphore
* A semaphore is often used to guard scarce resources

In [16]:
max_resources = 2
sema = Semaphore(value = max_resources)

def sema_work(thread_name, seconds=5):
    logging.debug(f'  Thread {thread_name} is attempting to acquire resource.')
    sema.acquire()
    logging.debug(f'  Thread {thread_name} is working for {seconds} seconds.')
    time.sleep(seconds)
    logging.debug(f'  Thread {thread_name} is complete.')
    sema.release()

workers = 5
logger.debug('Starting Pool')
with ThreadPoolExecutor(max_workers=workers) as pool:
    pool.map(sema_work, range(workers))
logger.debug('Finishing Pool')

DEBUG:root:Starting Pool
DEBUG:root:  Thread 0 is attempting to acquire resource.
DEBUG:root:  Thread 0 is working for 5 seconds.
DEBUG:root:  Thread 1 is attempting to acquire resource.
DEBUG:root:  Thread 1 is working for 5 seconds.
DEBUG:root:  Thread 2 is attempting to acquire resource.
DEBUG:root:  Thread 3 is attempting to acquire resource.
DEBUG:root:  Thread 4 is attempting to acquire resource.
DEBUG:root:  Thread 0 is complete.
DEBUG:root:  Thread 2 is working for 5 seconds.
DEBUG:root:  Thread 1 is complete.
DEBUG:root:  Thread 3 is working for 5 seconds.
DEBUG:root:  Thread 2 is complete.
DEBUG:root:  Thread 4 is working for 5 seconds.
DEBUG:root:  Thread 3 is complete.
DEBUG:root:  Thread 4 is complete.
DEBUG:root:Finishing Pool


# Timers
* We can easily create a time to start a thread and execute a function

In [17]:
def timer_event(): 
    logger.debug('Our timer fired')
  
timer = threading.Timer(5.0, timer_event) 
timer.start() 
logger.debug('Main completed')

DEBUG:root:Main completed
DEBUG:root:Our timer fired


# Limitations of Threads in Python
* GIL - Global Interpreter Lock
* Only one thread can be running python code at a time
* That dramatically reduces the value of the threading modules for many use cases
* It can be valuable for I/O wait where we're blocking on external locks