In [None]:
import time
import threading
import os
import multiprocessing
from collections import Counter

def count_words(text):
    return Counter(text.split())

def run_serial(text):
    t0 = time.time()
    _ = count_words(text)
    return time.time() - t0

def run_parallel(text, num_threads):
    words = text.split()
    n = len(words)
    chunk = n // num_threads
    parts = []
    for i in range(num_threads):
        start = i * chunk
        end = (i + 1) * chunk if i != num_threads - 1 else n
        parts.append(" ".join(words[start:end]))

    results = [None] * num_threads
    threads = []
    t0 = time.time()
    for i in range(num_threads):
        th = threading.Thread(target=lambda idx, part: results.__setitem__(idx, count_words(part)), args=(i, parts[i]))
        threads.append(th)
        th.start()
    for th in threads:
        th.join()
    t_elapsed = time.time() - t0
    return t_elapsed

def main():
    path = "wordcount_sample_2MB.txt"  # default file
    if not os.path.exists(path):
        print("File not found.")
        return

    with open(path, "r", encoding="utf-8") as f:
        text = f.read()

    # --- change fraction here to use part of the data ---
    fraction = 1.0  # 1.0 = 100%, 0.5 = 50%, 0.25 = 25%, etc.
    words = text.split()
    cut = int(len(words) * fraction)
    text = " ".join(words[:cut])

    num_threads = multiprocessing.cpu_count()

    t_serial = run_serial(text)
    t_p1 = run_parallel(text, 1)
    t_pN = run_parallel(text, num_threads)

    speedup = t_serial / t_pN if t_pN > 0 else float('inf')
    rel_speedup = t_p1 / t_pN if t_pN > 0 else float('inf')
    efficiency = speedup / num_threads if num_threads > 0 else 0.0

    print(f"T_serial: {t_serial:.4f}s")
    print(f"T_p1: {t_p1:.4f}s")
    print(f"T_pN ({num_threads} threads): {t_pN:.4f}s")
    print(f"Speedup: {speedup:.2f}")
    print(f"Relative Speedup: {rel_speedup:.2f}")
    print(f"Efficiency: {efficiency:.2f}")

if __name__ == "__main__":
    main()


T_serial: 0.0506s
T_p1: 0.0516s
T_pN (2 threads): 0.0532s
Speedup: 0.95
Relative Speedup: 0.97
Efficiency: 0.48
