# Threading
 Threading is the ability to have python perform one or more function at the same time independently from the rest of the program. 
 Python threads are OS threads therefore they run cooperatively (runs in a way that involves mutual assistance in working towards a common goal).
 
## Advantages
- Well understood so easy to implement
- Increase speed

## Disadvantages
- not designed for CPU intensive tasks
    - due to the cooperative nature of the tasks the CPU will need to pause the runtime to switch to another tread that is a higher priority then return to the python task. 
- The programer is responsible for managing states between treads.
    - Think performing actions on the data being processed in the treads. Ie saving the object after the threads run. 

## The basic setup of a threading

**start()**: Start the thread.
``` python
thread = threading.Thread(target=<FUNCTION>)
thread.start()
```


In [None]:
import threading
import time

def print_numbers():
    for i in range(5):
        print(f"Number: {i}")
        time.sleep(1)

thread = threading.Thread(target=print_numbers)
thread.start()
for i in range(5):
    print("Main thread continues to run...")
    time.sleep(1)

**join()**: Wait for the thread to complete.

``` python
for thread in threads:
    thread.join()
```

In [None]:
import threading
import time
import random

def worker(task_id):
    print(f"Thread-{task_id} starting")
    time.sleep(random.randint(1,4)) # run for a random time to illistrate threads can end at different times
    print(f"Thread-{task_id} finished")

# Creating threads
threads = []
for i in range(5):
    thread = threading.Thread(target=worker, args=(i,))
    threads.append(thread)
    thread.start()

#this will run
for i in range(5):
    print("Main thread continues to run...")
    time.sleep(1)

# Joining threads tells the code to wait 
for thread in threads:
    thread.join()

#will run after 
print("All threads have completed.")


## Thread safety

**lock()**: Used when threads access shared resources, synchronization is essential to prevent race conditions. The Lock object is a common tool for achieving thread safety.

``` python
lock = threading.Lock()

def function():
    with lock:
        #code
```

In [None]:
lock = threading.Lock()

def safe_print(lock):
    with lock:
        print("This is thread-safe.")

safe_print(lock)

In [None]:
lock = threading.Lock()
counter = 0
task = 0

def increment():
    global counter
    global task
    with lock:  # Lock ensures only one thread modifies the counter at a time
       # print(thread.getName)
        task+=1
        time.sleep(0.1)  # Simulate some work
        counter += task

def increment_Unsafe():
    global counter
    global task
    task+=1
    time.sleep(0.1)  # Simulate some work
    counter += task
    

#create 10 threads
threads = [threading.Thread(target=increment_Unsafe) for _ in range(10)]
for thread in threads:
    thread.start()
#hold untill all the threads are done
for thread in threads:
    thread.join()

print(f"Final counter value: {counter}")


## Lets look at an example to see performance
The following code will read a json file from the web page floatrates

### The old way

In [None]:
import threading
import requests
import json
import time

URL = "https://www.floatrates.com/daily/usd.json"
response = requests.get(URL)
print(json.dumps(response.json()['jpy'], indent = 2))
threading.active_count()

In [None]:
rates = ['eur','jpy','usd','rub','cad']
bases = ['eur','jpy','usd','rub','cad']


In [None]:
#define a function for the rest of the notbook to use
def fetch_rate(bases, symbols =['eur','jpy','usd'], timing = False, output = False):
    """
    A function to fetch the conversion rate of a given input
    Takes in the desired base currentcey and outputs the given
    conversion rate to supplied symbol"""
    
    #setup up time for deminstration
    if timing:
        a = time.time()
    if output:
        print("Current active threads: "+str(threading.active_count()))

    # Run the main part of the function to get the rates
    web = "http://www.floatrates.com/daily/"+str(bases)+".json"
    response = requests.get(web)
    rate = response.json()
    rate[bases]= {'rate':1}
    
    #create a line to output the rate
    if output: 
        print_rates(bases,rate, symbols)
    if timing:
        print("Time Elapsed in Thread:  {:.02f}s\n".format((time.time()-a)))
    


def print_rates(base, rate, symbols):
    rates_line = ", ".join(
        [f"{symbol}{float(rate[symbol]['rate']):10.04}" 
         for symbol in symbols]
    )
    print(f"{base} = {rates_line}")

In [None]:
# running the function without threads
a= time.time()

for base in bases:
    fetch_rate(base, rates, True, True)
    
print("Total Time without threads Elapsed:  {:.02f}s".format(time.time()-a))


    Notice the above time it took to get the "current active threads is the total number of threads my computer is running

### Now lets run it as a threaded process:

In [None]:
def threaded(debug=False):
    threads = []
    for base in bases:
        thread = threading.Thread(target=fetch_rate, args=(base, rates, debug, debug))
        thread.start()
        threads.append(thread)

    for thread in threads:
        thread.join()    

In [None]:

a= time.time()
threaded(True)
print("Total Time with threads Elapsed:  {:.02f}s".format(time.time()-a))

    Running it combined

In [None]:
rates = ['eur','jpy','usd','rub','cad']
bases = ['eur','jpy','usd','rub','cad']
#bases = ['eur', 'gbp', 'aud', 'chf', 'cad', 'kes', 'bhd', 'egp', 'krw', 'cop', 'bbd', 'djf', 'hnl', 'ugx']

In [None]:

a= time.time()

for base in bases:
    fetch_rate(base, rates)
    
print("Total Time without threads Elapsed:  {:.02f}s".format(time.time()-a))

a= time.time()
threaded()
print("----------------")
print("Total Time with threads Elapsed:  {:.02f}s".format(time.time()-a))


## Thread Pool
A thread pool is an approach to solve some problems within threads and make it safer to work with.

Thread pools start with a predefined number of threads and then queue the remaining threads. 


Why do we implement Thread Pools?

thread pools helps us to solve 2 major problems with threading those problems are

- If you notice we pass bases an list of data. What if the list is a huge number of items?
- It is very hard to control the rate that data is flowing into the system. It is able to handle harge amounds of data fast but there are hard limits on your processing. 

A minor problem is while unlikely in an acutal application Print() or any other slower outputing application (print is consitered a very slow application) because of this outputs can be corupted or malformed. You could see how this could lead to order of storing data. 

In [None]:
from queue import Queue, Empty
from threading import Thread
def worker(work_queue):
    while not work_queue.empty():
        try:
            item = work_queue.get(block=False)
        except Empty:
            break
        else:
            fetch_rate(base, rates,False, True)
            work_queue.task_done()

def threaded_pool():            
    work_queue = Queue()

    for base in bases:
        work_queue.put(base)
        threads = [
            Thread(target=worker, args=(work_queue,)) 
            for _ in range(THREAD_POOL_SIZE)
        ]
    
    for thread in threads:
        thread.start()

    work_queue.join()

    while threads:    #used to delay the time output lines
        threads.pop().join
 


In [None]:
rates = ['eur','jpy','usd','rub','cad']
#rates = ['eur','jpy']
#bases = ['eur','jpy','usd','rub','cad']
#bases = ['eur','jpy']
bases = ['eur', 'gbp', 'aud', 'chf', 'cad', 'kes', 'bhd', 'egp', 'krw', 'cop', 'bbd', 'djf', 'hnl', 'ugx']

In [None]:
THREAD_POOL_SIZE = 4

a = time.time()
print("Number of threads before start: "+ str(threading.active_count())+"\n\n\n")

threaded_pool()

b=time.time()-a
print("----------------\nTotal Time:  {:.2f}s".format(b))
print("Number of Threads after running: "+ str(threading.active_count())+"\n\n\n")

## Daemon Threads
Daemon Threads run in the background and are often used for tasks that should not block the program from exiting, such as monitoring or cleanup tasks.

Daemon threads with continue to run in the background and can be exited abrutptly when the main thread exits

Daemon threads are used on OS systems for large complex projects C and Java are recomended for a Daemon but for a simple easy deployable Python is the choice. 

In [None]:

def background_task():
    while True:
        print("Running in background...")
        time.sleep(1)

daemon_thread = threading.Thread(target=background_task, daemon=True)
daemon_thread.start()

print("Main thread will exit soon.")
time.sleep(3)
#The daemon=True flag specifies that the thread should not block the program from exiting.

Running in background...
Running in background...


In [1]:
import threading
import os
import time

def watch_log_file(filename):
    with open(filename, 'r') as file:
        file.seek(0, os.SEEK_END)

        while True:
            line = file.readline()
            if line:
                print(f"New data: {line.strip()}")
            time.sleep(1)

def start_watching():
    log_filename = 'log.txt'
    watch_thread = threading.Thread(target=watch_log_file, args=(log_filename,))
    watch_thread.daemon = True  # This makes the thread exit when the main program exits
    watch_thread.start()

if __name__ == "__main__":
    start_watching()
    
    # Keep the main program running to allow the thread to work
    while True:
        time.sleep(1)
        print("sleeping")


sleeping
sleeping
sleeping
sleeping
sleeping
sleeping
sleeping
sleeping
New data: 
sleeping
New data: hkh
sleeping
New data: hjjlk
sleeping
sleeping
sleeping
sleeping
sleeping
sleeping
sleeping
sleeping


KeyboardInterrupt: 