In [None]:
'''
let's do a3c in pure torch + mp on cpus
torch.share_memory the global params 
nstates = 3, nactions is a scalar in [-2, 2]
we want to train a policy to maximize reward
since we are doing actor-critic, we also need a value net 
and the loss on each batch is like ppo but without clipping? 
'''

In [None]:
# let's first just split workers, make each do independent matmuls and write to a shared counter 
import multiprocessing as mp 
import torch 
import torch.nn as nn 
import os 
import time
import matplotlib.pyplot as plt
import numpy as np
from tqdm import tqdm 

def worker_matmul(counter, n=50): 
    A = torch.randn(n, n)
    B = torch.randn(n, n)
    A @ B
    
    # update shared mem 
    with counter.get_lock(): 
        counter.value += 1

def worker_loop(counter, nops, n=16):
    # Keep doing matrix multiplications until counter reaches NOPS
    while True:
        worker_matmul(counter, n)
        with counter.get_lock():
            if counter.value >= nops:
                break

def run_mp(nops, n=16):
    # Create a shared counter using Value
    counter = mp.Value('i', 0)
    
    # Record start time
    start_time = time.time()

    # Create and start workers
    workers = []
    num_workers = os.cpu_count()//2
    for _ in range(num_workers): 
        p = mp.Process(target=worker_loop, args=(counter, nops, n))
        p.daemon = True 
        p.start()
        workers.append(p)

    # Wait until counter reaches NOPS
    while True:
        with counter.get_lock():
            if counter.value >= nops:
                break
        time.sleep(0.01)  # Small sleep to avoid busy waiting

    # Calculate elapsed time
    elapsed_time = time.time() - start_time
    
    # Clean up workers
    for p in workers:
        p.terminate()
        
    return elapsed_time


96