# Threading
 Threading is the ability to have python perform one or more function at the same time independently from the rest of the program. 
 Python threads are OS threads therefore they run cooperatively (runs in a way that involves mutual assistance in working towards a common goal).
 
## Advantages
- Well understood so easy to implement
- Increase speed

## Disadvantages
- not designed for CPU intensive tasks
    - due to the cooperative nature of the tasks the CPU will need to pause the runtime to switch to another tread that is a higher priority then return to the python task. 
- The programer is responsible for managing states between treads.
    - Think performing actions on the data being processed in the treads. Ie saving the object after the threads run. 

## The basic setup of a threading

**start()**: Start the thread.
``` python
thread = threading.Thread(target=<FUNCTION>)
thread.start()
```


In [1]:
import threading
import time

def print_numbers():
    for i in range(5):
        print(f"Number: {i}")
        time.sleep(2)

thread = threading.Thread(target=print_numbers)
thread.start()
for i in range(10):
    print("Main thread continues to run...")
    time.sleep(1)

Number: 0
Main thread continues to run...
Main thread continues to run...
Number: 1
Main thread continues to run...
Main thread continues to run...
Number: 2
Main thread continues to run...
Main thread continues to run...
Number: 3
Main thread continues to run...
Main thread continues to run...
Number: 4
Main thread continues to run...
Main thread continues to run...


**join()**: Wait for the thread to complete.

``` python
for thread in threads:
    thread.join()
```

In [3]:
import threading
import time
import random

def worker(task_id):
    print(f"Thread-{task_id} starting")
    time.sleep(random.randint(1,4)) # run for a random time to illistrate threads can end at different times
    print(f"Thread-{task_id} finished")

# Creating threads
thread_list = []
for i in range(5):
    thread = threading.Thread(target=worker, args=(i,))
    thread_list.append(thread)
    thread.start()

#this will run
for i in range(5):
    print("Main thread continues to run...")
    time.sleep(1)

# Joining threads tells the code to wait 
for thread in thread_list:
    thread.join()

#will run after 
print("All threads have completed.")


Thread-0 starting
Thread-1 starting
Thread-2 starting
Thread-3 starting
Thread-4 starting
Main thread continues to run...
Thread-4 finished
Main thread continues to run...
Thread-2 finished
Thread-3 finished
Main thread continues to run...
Main thread continues to run...
Thread-0 finished
Thread-1 finished
Main thread continues to run...
All threads have completed.


## Thread safety

**lock()**: Used when threads access shared resources, synchronization is essential to prevent race conditions. The Lock object is a common tool for achieving thread safety.

``` python
lock = threading.Lock()

def function():
    with lock:
        #code
```

In [None]:
lock = threading.Lock()

def safe_print(lock):
    with lock:
        print("This is thread-safe.")

safe_print(lock)

In [None]:
lock = threading.Lock()
counter = 0
task = 0

def increment():
    global counter
    global task
    with lock:  # Lock ensures only one thread modifies the counter at a time
        print(thread.getName)
        task+=1
        print(task)
        time.sleep(0.1)  # Simulate some work
        counter += task

def increment_Unsafe():
    global counter
    global task
    print(thread.getName)
    task+=1
    print(task)
    time.sleep(0.1)  # Simulate some work
    counter += task
    

#create 10 threads
threads = [threading.Thread(target=increment_Unsafe) for _ in range(10)]
for thread in threads:
    thread.start()
#hold untill all the threads are done
for thread in threads:
    thread.join()

print(f"Final counter value: {counter}")


## Lets look at an example to see performance
The following code will read a json file from the web page floatrates

### The old way

In [None]:
import threading
import requests
import json
import time

URL = "https://www.floatrates.com/daily/usd.json"
response = requests.get(URL)
print(json.dumps(response.json()['jpy'], indent = 2))
threading.active_count()

In [None]:
rates = ['eur','jpy','usd','rub','cad']
bases = ['eur','jpy','usd','rub','cad']


In [1]:
#define a function for the rest of the notbook to use
def fetch_rate(bases, symbols =['eur','jpy','usd'], timing = False, output = False):
    """
    A function to fetch the conversion rate of a given input
    Takes in the desired base currentcey and outputs the given
    conversion rate to supplied symbol"""
    
    #setup up time for deminstration
    if timing:
        a = time.time()
    if output:
        print("Current active threads: "+str(threading.active_count()))

    # Run the main part of the function to get the rates
    web = "http://www.floatrates.com/daily/"+str(bases)+".json"
    response = requests.get(web)
    rate = response.json()
    rate[bases]= {'rate':1}
    
    #create a line to output the rate
    if output: 
        print_rates(bases,rate, symbols)
    if timing:
        print("Time Elapsed in Thread:  {:.02f}s\n".format((time.time()-a)))
    


def print_rates(base, rate, symbols):
    rates_line = ", ".join(
        [f"{symbol}{float(rate[symbol]['rate']):10.04}" 
         for symbol in symbols]
    )
    print(f"{base} = {rates_line}")

In [2]:
# running the function without threads
a= time.time()

for base in bases:
    fetch_rate(base, rates)#, True, True)
    
print("Total Time without threads Elapsed:  {:.02f}s".format(time.time()-a))


NameError: name 'time' is not defined

    Notice the above time it took to get the "current active threads is the total number of threads my computer is running

### Now lets run it as a threaded process:

In [None]:
def threaded(debug=False):
    threads = []
    for base in bases:
        thread = threading.Thread(target=fetch_rate, args=(base, rates, debug, debug))
        thread.start()
        threads.append(thread)

    for thread in threads:
        thread.join()    

In [None]:

a= time.time()
threaded(True)
print("Total Time with threads Elapsed:  {:.02f}s".format(time.time()-a))

    Running it combined

In [None]:
rates = ['eur','jpy','usd','rub','cad']
#bases = ['eur','jpy','usd','rub','cad']
bases = ['eur', 'gbp', 'aud', 'chf', 'cad', 'kes', 'bhd', 'egp', 'krw', 'cop', 'bbd', 'djf', 'hnl', 'ugx']

In [None]:

a= time.time()

for base in bases:
    fetch_rate(base, rates)
    
print("Total Time without threads Elapsed:  {:.02f}s".format(time.time()-a))

a= time.time()
threaded(True)
print("----------------")
print("Total Time with threads Elapsed:  {:.02f}s".format(time.time()-a))


## Thread Pool
A thread pool is an approach to solve some problems within threads and make it safer to work with.

Thread pools start with a predefined number of threads and then queue the remaining threads. 


Why do we implement Thread Pools?

thread pools helps us to solve 2 major problems with threading those problems are

- If you notice we pass bases an list of data. What if the list is a huge number of items?
- It is very hard to control the rate that data is flowing into the system. It is able to handle harge amounds of data fast but there are hard limits on your processing. 

A minor problem is while unlikely in an acutal application Print() or any other slower outputing application (print is consitered a very slow application) because of this outputs can be corupted or malformed. You could see how this could lead to order of storing data. 

In [None]:
from queue import Queue, Empty
from threading import Thread
def worker(work_queue):
    while not work_queue.empty():
        try:
            item = work_queue.get(block=False)
        except Empty:
            break
        else:
            fetch_rate(base, rates,False, True)
            work_queue.task_done()

def threaded_pool():            
    work_queue = Queue()

    for base in bases:
        work_queue.put(base)
    threads = [
        Thread(target=worker, args=(work_queue,)) 
        for _ in range(THREAD_POOL_SIZE)
    ]
    
    for thread in threads:
        thread.start()

    work_queue.join()

    while threads:    #used to delay the time output lines
        threads.pop().join
 


In [None]:
rates = ['eur','jpy','usd','rub','cad']
#rates = ['eur','jpy']
#bases = ['eur','jpy','usd','rub','cad']
#bases = ['eur','jpy']
bases = ['eur', 'gbp', 'aud', 'chf', 'cad', 'kes', 'bhd', 'egp', 'krw', 'cop', 'bbd', 'djf', 'hnl', 'ugx']

In [4]:
print(threads)

NameError: name 'threads' is not defined

In [6]:
THREAD_POOL_SIZE = 4

a = time.time()
print("Number of threads before start: "+ str(threading.active_count())+"\n\n\n")

threaded_pool()

b=time.time()-a
print("----------------\nTotal Time:  {:.2f}s".format(b))
print("Number of Threads after running: "+ str(threading.active_count())+"\n\n\n")

Exception in thread Thread-9 (worker):
Traceback (most recent call last):
  File "/usr/lib/python3.12/threading.py", line 1073, in _bootstrap_inner
Exception in thread Thread-10 (worker):
Traceback (most recent call last):
  File "/usr/lib/python3.12/threading.py", line 1073, in _bootstrap_inner
    self.run()
  File "/usr/lib/python3/dist-packages/ipykernel/ipkernel.py", line 761, in run_closure
    self.run()
  File "/usr/lib/python3/dist-packages/ipykernel/ipkernel.py", line 761, in run_closure
Exception in thread Thread-11 (worker):
Traceback (most recent call last):
  File "/usr/lib/python3.12/threading.py", line 1073, in _bootstrap_inner
    _threading_Thread_run(self)
  File "/usr/lib/python3.12/threading.py", line 1010, in run
    self._target(*self._args, **self._kwargs)
  File "/tmp/ipykernel_57760/2363631298.py", line 10, in worker
NameError: name 'base' is not defined. Did you mean: 'bases'?
Exception in thread Thread-12 (worker):
Traceback (most recent call last):
  File "

Number of threads before start: 7





KeyboardInterrupt: 

## Daemon Threads
Daemon Threads run in the background and are often used for tasks that should not block the program from exiting, such as monitoring or cleanup tasks.

Daemon threads with continue to run in the background and can be exited abrutptly when the main thread exits

Daemon threads are used on OS systems for large complex projects C and Java are recomended for a Daemon but for a simple easy deployable Python is the choice. 

In [None]:

def background_task():
    while True:
        print("Running in background...")
        time.sleep(1)

daemon_thread = threading.Thread(target=background_task, daemon=True)
daemon_thread.start()

print("Main thread will exit soon.")
time.sleep(3)
#The daemon=True flag specifies that the thread should not block the program from exiting.

In [None]:
import threading
import os
import time

def watch_log_file(filename):
    with open(filename, 'r') as file:
        file.seek(0, os.SEEK_END)

        while True:
            line = file.readline()
            if line:
                print(f"New data: {line.strip()}")
            time.sleep(1)

def start_watching():
    log_filename = 'log.txt'
    watch_thread = threading.Thread(target=watch_log_file, args=(log_filename,))
    watch_thread.daemon = True  # This makes the thread exit when the main program exits
    watch_thread.start()

if __name__ == "__main__":
    start_watching()
    
    # Keep the main program running to allow the thread to work
    while True:
        time.sleep(1)
        print("sleeping")


# Multiprocessing
Multiprocessing allows python to run CPU-intensive tasks side by side. This is done by launching multiple and independent copies of the python runtime. 

## Advantages
- True multi tasking by giving each python its own cpu core
    - IE amazing at CPU intensive tasks 
- Better Resorce Utilization
- Doese not work with the same data
- Do not need to manage resorces as intensive
- Easyer code to implement 
- No Threaded pools
- No Queues
- No Exseptions
- No Rate Limitations

        
       In other words a lot less work


## Disadvantages
- Additional overhead on the CPU
- Each subprocess needs to have a copy of the data
    - Due to this each Multiprocess increases the resorces needed.
    - Need to pass resorces between tasks




# Forking - Linux, macOS, Unix

PID - Process identifier is a number used by kernels based OS to uniquely identify an active process.

On a POSIX System (Linux, macOS, Unix) a fork is a system call in python, using os.fork() you create a Child process. 

The program will continue with each independed python of its own. 

You can perform the tasks under Cygwin but it is a bit slow. 
Currently Windows Linux Subsystem can no perform fork 

    All of the following set of code comes from Expert Python Programing 4th edition


In [None]:
import os #OS Manages the CPU tasks

pid_list = [] #List to store the CPU PID's 

tmp = 1 # setup a variable to track the PID
pid_list.append(os.getpid())
child_pid = os.fork() #create 1 fork

if child_pid == 0:
    
    pid_list.append(os.getpid())  
    print("CHLD: TMP value is: %d " %tmp)
    print("CHLD: pids are %s" % pid_list)

else:
    pid_list.append(os.getpid())
    tmp+=1
    print()
    print("PRNT: Child pid: %d" % child_pid)
    print("PRNT: pids are %s" % pid_list)
    print("PRNT: TMP value is: %d " %tmp)
    

Notice that the parrent only has 1 PID during exicution 

Notice that the Child has 2 PID's during exicution

The PID is the address space of the program and deals with its own set of memory independently

To communicate between each of the processes we need to use signals a low level system wide resource

In [None]:
"""
"Multiprocessing" section example showing how
to create new processes with `multiprocessing` module
"""

from multiprocessing import Process
import os


def work(identifier):
    print(
        'hey, i am a process {}, pid: {}'
        ''.format(identifier, os.getpid())
    )


def main():
    processes = [
        Process(target=work, args=(number,))
        for number in range(5)
    ]
    for process in processes:
        process.start()

    while processes:
        processes.pop().join()


if __name__ == "__main__":
    main()
from multiprocessing import Process
import os


def work(identifier):
    print(
        'hey, i am a process {}, pid: {}'
        ''.format(identifier, os.getpid())
    )


def main():
    processes = [
        Process(target=work, args=(number,))
        for number in range(5)
    ]
    for process in processes:
        process.start()

    while processes:
        processes.pop().join()


if __name__ == "__main__":
    main()

In [None]:
"""
!!!!THIS CODE MAY NOT RUN IN JUPYTER!!!
You may need to run this in terminal or another IDE
"""
from multiprocessing import Process, Pipe


class CustomClass:
    pass


def work(connection):
    while True:
        instance = connection.recv()

        if instance:
            print(
                "CHLD: recv: {}".format(instance)
            )

        else:
            return


def main():
    parent_conn, child_conn = Pipe()

    child = Process(target=work, args=(child_conn,))

    for item in (
        42,
        'some string',
        {'one': 1},
        CustomClass(),
        None,
    ):
        print(
            "PRNT: send: {}".format(item)
        )
        parent_conn.send(item)

    child.start()
    child.join()


if __name__ == "__main__":
    main()

In [None]:
import multiprocessing as mp
import random

val = random.random()

def simple_func():
    print(val)


if __name__ == '__main__':
    print('Before multiprocessing: ')
    simple_func()
    print('After multiprocessing:')
    p = mp.Process(target=simple_func)
    p.start()
    p.join()