**Assignment 1: Parallel Vector Addition**

In [7]:
import numpy  as np
import time
from numba import njit,prange

def serial_vector_op(A,B ,alpha):
  C = np.empty_like(A)
  for i in range (len(A)):
    C[i]= A[i] + B[i]
  return C

@njit(parallel=True)
def paralled_vector_op(A,B ,alpha):
  C = np.empty_like(A)
  for i in prange (len(A)):
    C[i]=  A[i] + B[i] # Corrected line: A[i] instead of [i]
  return C

def main():
  sizes = [10_000,100_000,1_000_000,2_000_000]
  alpha = 2.5

  print(f"{'size':>10} {'serial':>10} {'parallel':>12} | speedup")
  print("-"*55)

  for N in sizes:
    A = np.random.rand(N)
    B = np.random.rand(N)

    # Warm-up call for Numba JIT
    paralled_vector_op(A,B,alpha)

    #serial timing
    start = time.time()
    serial_vector_op(A,B,alpha)
    end = time.time()
    t_serial = end - start

    #parallel timing
    start = time.time()
    paralled_vector_op(A,B,alpha)
    t_parallel = time.time() - start

    speedup = t_serial / t_parallel

    print(f"{N:10d} | {t_serial:10.4f} | {t_parallel:12.4f} | {speedup:6.2f} x")

if __name__=="__main__":
  main()

      size     serial     parallel | speedup
-------------------------------------------------------
     10000 |     0.0038 |       0.0001 |  71.36 x
    100000 |     0.0409 |       0.0003 | 133.63 x
   1000000 |     0.3817 |       0.0021 | 183.10 x
   2000000 |     0.7787 |       0.0040 | 195.61 x


**Assignment 2: Static Scheduling in Parallel Loop**

In [14]:
import numpy as np
import time
from numba import njit,prange,get_num_threads,get_thread_id

#serial computation
def serial_computation(A,B):
  N = len(A)
  C =np.zeros(N)

  for i in range(N):
    C[i] = A[i] * B[i] + A[i]

  return C

#static scheduling
@njit(parallel=True)
def parallel_static_compute(A,B,thread_work):
  N = len(A)
  C =np.zeros(N)

  for i in prange(N):
      tid = get_thread_id()
      thread_work[tid]+= 1
      C[i] = A[i] * B[i] + A[i]
  return C

def main():
  #vector size
  N = 10_000_000

  #initilize arrays
  A = np.random.rand(N)
  B = np.random.rand(N)

  #serial execution
  start= time.time()
  C_serial = serial_computation(A,B)
  serial_time = time.time() - start # Corrected variable name from 'serial' to 'serial_time'


  print("serial execution time :",serial_time)

  #parallel exection
  num_threads = get_num_threads() # Corrected variable and function names
  thread_work = np.zeros(num_threads)

  start_time_parallel = time.time() # Added new variable for parallel start time
  C_parallel = parallel_static_compute(A,B,thread_work)
  parallel_time = time.time() - start_time_parallel # Corrected variable name

  print ("parallel execution time:",parallel_time)

  print("result match:",np.allclose(C_serial,C_parallel))

  #work
  print("\nWork distribution among threads:")
  for i in range(num_threads):
    print(f"Thread {i}  handled {int(thread_work[i])} iterations") # Corrected indexing of thread_work

if __name__=="__main__": # Corrected 'if__name__'
  main()

serial execution time : 7.781860589981079
parallel execution time: 0.9708921909332275
result match: True

Work distribution among threads:
Thread 0  handled 5000000 iterations
Thread 1  handled 5000000 iterations


**Assignment 3: Load Imbalance in Parallel Execution**

In [20]:
from numba.np.ufunc import parallel
import numpy as np
import time
from numba import njit,prange,get_thread_id,get_num_threads

#serial computition
def serial_imbalance(arr):
  N = len(arr)
  result = np.zeros(N)

  for i in range(N):

    #light computation
    if arr[i]< 0.5:
      s=0
      for j in range (100):
        s+= j* arr[i]

    #heavy computation
    else:
      s = 0
      for j in range(5000):
        s += j* arr[i]

    result[i]= s

  return result

#parallel computation
@njit(parallel=True)
def parallel_imbalance(arr,thread_work):
  N = len (arr)
  result = np.zeros(N)

  for i in prange(N):

    tid = get_thread_id()
    thread_work[tid] += 1

    #light computation
    if arr[i]< 0.5:
      s=0
      for j in range(100):
        s += j*arr[i]

    #heavy computation
    else:
      s = 0
      for j in range(5000):
        s += j * arr[i]

    result[i] = s

  return result

def main():

  #array
  N = 200000

  #createe random array
  arr = np.random.rand(N)

  #serial exe
  start= time.time()
  serial_result = serial_imbalance(arr)
  serial_time = time.time() - start

  print("serial execution time:",serial_time)

  #parallel exe
  num_threads = get_num_threads()
  thread_work = np.zeros(num_threads)

  start=time.time()
  parallel_result = parallel_imbalance(arr,thread_work)
  parallel_time = time.time() - start

  print("parallel execution time:",parallel_time)

  #verfifcation
  print("result match:",np.allclose(serial_result,parallel_result))
  #work
  print("\nwork distribution among threads:")
  for i in range(num_threads):
    print(f"thread {i}: {thread_work[i]} iterations")

if __name__ == "__main__":
  main()

serial execution time: 168.17438006401062
parallel execution time: 1.4204299449920654
result match: True

work distribution among threads:
thread 0: 100000.0 iterations
thread 1: 100000.0 iterations


**Assignment 4: Parallel Reduction (Synchronization)**

In [24]:
import numpy as np
import time
from numba import njit,prange,get_num_threads,get_thread_id

#serial computation
def serial_computation(A,B):
  N = len(A)
  C =0# Initialize C as a NumPy array of zeros

  for i in range(N):
    C += A[i]

  return C

#static scheduling
@njit(parallel=True)
def parallel_static_compute(A,B,thread_work):
  N = len(A)
  C = 0  # Initialize C as a NumPy array of zeros

  for i in prange(N):
      tid = get_thread_id()
      thread_work[tid]+= 1
      C+= A[i]
  return C

def main():
  #vector size
  N = 10_000_000

  #initilize arrays
  A = np.random.rand(N)
  B = np.random.rand(N)

  #serial execution
  start= time.time()
  C_serial = serial_computation(A,B)
  serial_time = time.time() - start # Corrected variable name from 'serial' to 'serial_time'


  print("serial execution time :",serial_time)

  #parallel exection
  num_threads = get_num_threads() # Corrected variable and function names
  thread_work = np.zeros(num_threads)

  start_time_parallel = time.time() # Added new variable for parallel start time
  C_parallel = parallel_static_compute(A,B,thread_work)
  parallel_time = time.time() - start_time_parallel # Corrected variable name

  print ("parallel execution time:",parallel_time)

  print("result match:",np.allclose(C_serial,C_parallel))

  #work
  print("\nWork distribution among threads:")
  for i in range(num_threads):
    print(f"Thread {i}  handled {int(thread_work[i])} iterations") # Corrected indexing of thread_work

if __name__=="__main__": # Corrected 'if__name__'
  main()

serial execution time : 1.8818433284759521
parallel execution time: 0.5545713901519775
result match: True

Work distribution among threads:
Thread 0  handled 5000000 iterations
Thread 1  handled 5000000 iterations


**Assignment 5: Barrier Synchronization (Two-Phase
Computation)**

In [27]:
import numpy as np
import time
from numba import njit,prange

# phase 1
@njit(parallel=True)
def phase1(A,B,c):
  N = len(A)

  for i in prange(N):
    c[i]=A[i]+B[i]

#phase 2
@njit(parallel=True)
def phase2(C,D):
  N = len(C)
  for i in prange(N):
    C[i]=C[i] * 2

def main():

  #vector size
  N = 5_000_000

  #initilaze arrays
  A = np.random.rand(N)
  B = np.random.rand(N)
  C = np.zeros(N)
  D =np.zeros(N)

  print("starting phase 1")

  #phase 1
  start = time.time()
  phase1(A,B,C)

  #barrier
  print("barrier reached: all threads completed phase 1 ")

  #phase 2
  phase2(C,D)
  end = time.time()
  print("total exe time:",end-start)

if __name__ == "__main__":
  main()

starting phase 1
barrier reached: all threads completed phase 1 
total exe time: 1.0851411819458008
