<a href="https://colab.research.google.com/github/techieAKR/Masters-ParallelComputing/blob/main/sheet-1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# scalar-products

In [None]:
# Created by Akshay Kumar


import numpy as np
import time
from multiprocessing import Pool, cpu_count
import matplotlib.pyplot as plt
from typing import List, Tuple

def generate_vectors(size: int = 160) -> Tuple[List[float], List[float]]:
    """Generate two random vectors of specified size."""
    A = list(np.random.rand(size))
    B = list(np.random.rand(size))
    return A, B

def sequential_scalar_product(A: List[float], B: List[float]) -> float:
    """Calculate scalar product sequentially."""
    return np.dot(A, B) #sum(a * b for a, b in zip(A, B))

def parallel_chunk_multiply(args: Tuple[np.ndarray, np.ndarray]) -> float:
        """Helper function for parallel processing - multiplies chunk of vectors."""
        chunk_A, chunk_B = args
        return np.dot(chunk_A, chunk_B)  # Use NumPy's dot for chunks

def parallel_scalar_product(A, B, num_processors):
    # Use NumPy's array_split for potentially better performance
    chunk_size = len(A) // num_processors
    chunks_A = [A[i:i + chunk_size] for i in range(0, len(A), chunk_size)]
    chunks_B = [B[i:i + chunk_size] for i in range(0, len(B), chunk_size)]
    chunks = list(zip(chunks_A, chunks_B))

    with Pool(processes=num_processors) as pool:
        results = pool.map(parallel_chunk_multiply, chunks)

    return sum(results)


def measure_performance(vector_size: int = 160, num_processors: int = 8,
                       num_trials: int = 1) -> Tuple[float, float]:
    """Measure performance of sequential vs parallel implementation."""
    seq_times = []
    par_times = []

    A, B = generate_vectors(vector_size)

    # Measure sequential time
    start = time.time()
    seq_result = sequential_scalar_product(A, B)
    seq_time = time.time() - start
    seq_times.append(seq_time)

    # Measure parallel time
    start = time.time()
    par_result = parallel_scalar_product(A, B, num_processors)
    par_time = time.time() - start
    par_times.append(par_time)

    # Print results only once after the loop
    print(f"Sequential result: {seq_result}")
    print(f"Parallel result: {par_result}")
    print(f"Difference: {abs(seq_result - par_result)}")

    return np.mean(seq_times), np.mean(par_times)

    return np.mean(seq_times), np.mean(par_times)

def calculate_metrics(seq_time: float, par_time: float,
                     num_processors: int) -> Tuple[float, float]:
    """Calculate speedup and efficiency."""
    speedup = seq_time / par_time
    efficiency = speedup / num_processors
    return speedup, efficiency

def plot_speedup(sizes: List[int], num_processors: int = 8):
    """Create visualization of speedup for different vector sizes."""
    results = []

    for size in sizes:
        print(f"\nTesting vector size: {size}")
        seq_time, par_time = measure_performance(size, num_processors)
        speedup, efficiency = calculate_metrics(seq_time, par_time, num_processors)
        results.append({
            'size': size,
            'sequential_time': seq_time,
            'parallel_time': par_time,
            'speedup': speedup,
            'efficiency': efficiency
        })

    # Extract data for plotting
    sizes = [r['size'] for r in results]
    speedups = [r['speedup'] for r in results]
    efficiencies = [r['efficiency'] for r in results]

    # Create subplot
    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 5))

    # Speedup plot
    ax1.plot(sizes, speedups, 'b-', marker='o')
    ax1.axhline(y=num_processors, color='r', linestyle='--',
                label=f'Ideal Speedup ({num_processors}x)')
    ax1.set_xlabel('Vector Size')
    ax1.set_ylabel('Speedup')
    ax1.set_title('Speedup vs Vector Size')
    ax1.grid(True)
    ax1.legend()

    # Efficiency plot
    ax2.plot(sizes, efficiencies, 'g-', marker='o')
    ax2.axhline(y=1, color='r', linestyle='--', label='Ideal Efficiency (100%)')
    ax2.set_xlabel('Vector Size')
    ax2.set_ylabel('Efficiency')
    ax2.set_title('Efficiency vs Vector Size')
    ax2.grid(True)
    ax2.legend()

    plt.tight_layout()
    plt.show()

    # Print detailed results
    print("\nDetailed Results:")
    print("Size\tSeq Time\tPar Time\tSpeedup\tEfficiency")
    print("-" * 60)
    for r in results:
        print(f"{r['size']}\t{r['sequential_time']:.6f}\t{r['parallel_time']:.6f}\t"
              f"{r['speedup']:.2f}\t{r['efficiency']:.2f}")

    return results

# Main demonstration
if __name__ == "__main__":
    # Possible to test with different vector sizes, Just extend the array.
    sizes = [1000000, 5000000, 10000000]
    # Dynamically determine the number of processors
    num_processors = cpu_count()
    print(num_processors)
    results = plot_speedup(sizes, num_processors)




# Steps required for the sequential and parallel

2

Testing vector size: 1000000
Sequential result: 249772.4726325275
Parallel result: 249772.47263252758
Difference: 8.731149137020111e-11

Testing vector size: 5000000
