In [9]:
import json
import matplotlib.pyplot as plt
import numpy as np
import warnings
import datetime
import argparse
import threading
import multiprocessing
import os

from tqdm import tqdm

from GBP_Simulations.GBP.data import DataGenerator
from GBP_Simulations.GBP.gbp import run_GaBP_SYNC_ACCELERATED, run_GaBP_HARDWARE_ACCELERATED, run_GaBP_HARDWARE_ACCELERATED_EXCLUSION, run_GaBP_HARDWARE_ACCELERATED_RESIDUAL
from GBP_Simulations.GBP.utilities import HiddenPrints
from GBP_Simulations.GBP.visulisation import set_plot_options, get_plot_colors, NetworkxGraph, AnalyzeResult

# Option 1: Suppress all warnings
warnings.filterwarnings("ignore")

set_plot_options()
colors = get_plot_colors()

data_gen = DataGenerator()
result_analyzer = AnalyzeResult()

In [10]:
# file_path = 'GBP_Simulations/GBP/Raw_Datasets/data/input_MITb_g2o.g2o'
# data_gen.generate_SLAM_dataset(file_path=file_path)

In [11]:




dataset = 'input_MITb_g2o'
filepath_n = 'GBP_Simulations/GBP/Raw_Datasets/gbp_data'
factor_data = os.path.join(filepath_n, f'{dataset}_factor_data.txt')
marginal_data = os.path.join(filepath_n, f'{dataset}_marginal_data.txt')



A, b = data_gen.get_1D_line_matrix(1000, scaling=True, normalized=False)


filename = "slam_1d_1000"
with open(f"Hardware_Model/designs/{filename}.json") as f:
    data = json.load(f)

# Extract designs
graph = data.get('inp_graph_topology', [])
designs = data.get('all_designs', [])





In [12]:
designs[0]['resources']['resources_%']

{'lut': 26.42105263157895,
 'ff': 16.62312030075188,
 'bram': 4.285714285714286,
 'dsp': 13.181818181818182}

In [13]:
limit = 100
design_with_max_nodes_on_chip = None
for design in designs:
    resources_percent = design['resources']['resources_%']
    if resources_percent['lut'] < limit and resources_percent['ff'] < limit and resources_percent['bram'] < limit and resources_percent['dsp'] < limit:
        if design_with_max_nodes_on_chip is None:
            design_with_max_nodes_on_chip = design
        else:
            node_update_per_second_previous = design_with_max_nodes_on_chip['design']['nodes_updt_per_stream']/design_with_max_nodes_on_chip['latency']['latency_total'] 
            node_update_per_second_new = design['design']['nodes_updt_per_stream']/design['latency']['latency_total']
            if node_update_per_second_new > node_update_per_second_previous:
                design_with_max_nodes_on_chip = design
print(design_with_max_nodes_on_chip)
print(design_with_max_nodes_on_chip['design']['nodes_updt_per_stream']/design_with_max_nodes_on_chip['latency']['latency_total'] )

{'design': {'number_pes': 4, 'nodes_updt_per_pe': 128, 'nodes_updt_per_stream': 512, 'compute_unroll_factors': {'inv': 3, 'mul': 3, 'add': 1, 'sub': 1}, 'binary_searcher': {'buffer_size': 9, 'resource_scaling': 1}, 'double_buffering': True, 'capping': None, 'policy': 'random', 'cache': False}, 'resources': {'resources_total': {'lut': 51578, 'ff': 53778, 'bram': 57, 'dsp': 212}, 'resources_pe': {'lut': 11386, 'ff': 11271, 'bram': 13, 'dsp': 53}, 'resources_%': {'lut': 96.95112781954887, 'ff': 50.54323308270677, 'bram': 40.714285714285715, 'dsp': 96.36363636363636}}, 'latency': {'latency_compute': 0.00018732696000000001, 'latency_stream1': 0.00012015, 'latency_stream2': 0.00013001, 'latency_total': 0.00031733696000000005}}
1613426.9389862432


In [14]:
limits = [30, 35, 40, 45, 50, 55, 60, 65, 70, 75, 80, 85, 90, 95, 100]
ALL_DESIGNS = {}

for limit in limits:
    design_with_max_nodes_on_chip = None
    for design in designs:
        resources_percent = design['resources']['resources_%']
        if resources_percent['lut'] < limit and resources_percent['ff'] < limit and resources_percent['bram'] < limit and resources_percent['dsp'] < limit:
            if design_with_max_nodes_on_chip is None:
                design_with_max_nodes_on_chip = design
            else:
                node_update_per_second_previous = design_with_max_nodes_on_chip['design']['nodes_updt_per_stream']/design_with_max_nodes_on_chip['latency']['latency_total'] 
                node_update_per_second_new = design['design']['nodes_updt_per_stream']/design['latency']['latency_total']
                if node_update_per_second_new > node_update_per_second_previous:
                    design_with_max_nodes_on_chip = design
    
    n = int(design_with_max_nodes_on_chip['design']['nodes_updt_per_pe'])
    p = int(design_with_max_nodes_on_chip['design']['number_pes'])
    ALL_DESIGNS[limit] = {'n':n, 'p':p, 'latency_total': design_with_max_nodes_on_chip['latency']['latency_total'], 'Rcf': n*p/design_with_max_nodes_on_chip['latency']['latency_total']}

In [15]:
ALL_DESIGNS

{30: {'n': 64,
  'p': 1,
  'latency_total': 0.00026491944000000005,
  'Rcf': 241582.87515631164},
 35: {'n': 128,
  'p': 1,
  'latency_total': 0.00031673696000000004,
  'Rcf': 404120.82000155584},
 40: {'n': 146,
  'p': 1,
  'latency_total': 0.00034296,
  'Rcf': 425705.62164683925},
 45: {'n': 146,
  'p': 1,
  'latency_total': 0.00034296,
  'Rcf': 425705.62164683925},
 50: {'n': 64, 'p': 2, 'latency_total': 0.00024248, 'Rcf': 527878.5879247773},
 55: {'n': 128,
  'p': 2,
  'latency_total': 0.00031733696000000005,
  'Rcf': 806713.4694931216},
 60: {'n': 128,
  'p': 2,
  'latency_total': 0.00031673696000000004,
  'Rcf': 808241.6400031117},
 65: {'n': 146,
  'p': 2,
  'latency_total': 0.00034305122,
  'Rcf': 851184.8463911598},
 70: {'n': 128,
  'p': 3,
  'latency_total': 0.00039594888000000003,
  'Rcf': 969822.164921896},
 75: {'n': 128,
  'p': 3,
  'latency_total': 0.00039534888,
  'Rcf': 971294.0125187656},
 80: {'n': 128,
  'p': 3,
  'latency_total': 0.00031673696000000004,
  'Rcf': 1

# What is the best configuration?

### Sync

In [16]:
sync_convergence_threshold = 1*10**-8 # convergence threshold
convergence_type = 'all' #all
simulation_convergence_threshold = 1*10**-8

P_i, mu_i, N_i, P_ii, mu_ii, P_ij, mu_ij, iter_dist, stand_divs, means, iteration = run_GaBP_SYNC_ACCELERATED(A, 
                                                                                                      b, 
                                                                                                      max_iter=200, 
                                                                                                      mae=True if convergence_type == 'mae' else False, 
                                                                                                      convergence_threshold=simulation_convergence_threshold,
                                                                                                      show=True)
final_mean = list(mu_i)

iteration: 1
14.094303009408895
-----
iteration: 2
14.138171258294365
-----
iteration: 3
1.0209167922892162
-----
iteration: 4
1.0390191207928474
-----
iteration: 5
0.21598890204830318
-----
iteration: 6
0.4217135632665819
-----
iteration: 7
0.2674018043920488
-----
iteration: 8
0.3131029801381572
-----
iteration: 9
0.1450567895600941
-----
iteration: 10
0.11228881383315692
-----
iteration: 11
0.16775772323941307
-----
iteration: 12
0.10567363150161124
-----
iteration: 13
0.026324563740691094
-----
iteration: 14
0.017157421114556974
-----
iteration: 15
0.006677212738127073
-----
iteration: 16
0.006714443855862459
-----
iteration: 17
0.007704000785911667
-----
iteration: 18
0.0052330850015485375
-----
iteration: 19
0.001615450532543014
-----
iteration: 20
0.001373672579479538
-----
iteration: 21
0.0010871056213938614
-----
iteration: 22
0.0005679378256361178
-----
iteration: 23
0.0008210589156461239
-----
iteration: 24
0.0005641384968866668
-----
iteration: 25
0.00075133195864792
-----


### Async

In [17]:
print(ALL_DESIGNS)

{30: {'n': 64, 'p': 1, 'latency_total': 0.00026491944000000005, 'Rcf': 241582.87515631164}, 35: {'n': 128, 'p': 1, 'latency_total': 0.00031673696000000004, 'Rcf': 404120.82000155584}, 40: {'n': 146, 'p': 1, 'latency_total': 0.00034296, 'Rcf': 425705.62164683925}, 45: {'n': 146, 'p': 1, 'latency_total': 0.00034296, 'Rcf': 425705.62164683925}, 50: {'n': 64, 'p': 2, 'latency_total': 0.00024248, 'Rcf': 527878.5879247773}, 55: {'n': 128, 'p': 2, 'latency_total': 0.00031733696000000005, 'Rcf': 806713.4694931216}, 60: {'n': 128, 'p': 2, 'latency_total': 0.00031673696000000004, 'Rcf': 808241.6400031117}, 65: {'n': 146, 'p': 2, 'latency_total': 0.00034305122, 'Rcf': 851184.8463911598}, 70: {'n': 128, 'p': 3, 'latency_total': 0.00039594888000000003, 'Rcf': 969822.164921896}, 75: {'n': 128, 'p': 3, 'latency_total': 0.00039534888, 'Rcf': 971294.0125187656}, 80: {'n': 128, 'p': 3, 'latency_total': 0.00031673696000000004, 'Rcf': 1212362.4600046675}, 85: {'n': 128, 'p': 3, 'latency_total': 0.00031673

In [18]:
async_convergence_threshold = 1*10**-2
ASYNC_ITER = 10
ALL_DESIGNS_LATENCY = ALL_DESIGNS

In [19]:
total_iterations = len(['random', 'random-exclusion', 'fixed', 'residual']) * len([False, True]) * ASYNC_ITER * len(ALL_DESIGNS)

In [20]:
pbar = tqdm(total=total_iterations, desc="Running GBP Simulations", unit="designs")

for limit, design in ALL_DESIGNS.items():
    NODE_UPDT_PE = design['n']
    PEs = design['p']

    RESULTS = {}

    for policy in  ['random', 'random-exclusion', 'fixed', 'residual']:
        for cache in [False, True]:

            str = f'{policy}_{cache}'

            # variable
            sum_of_iterations = 0
            
            # print(f"policy: {policy}, cache: {cache}")

            # implement multiple iterations
            it = 0
            while (it < ASYNC_ITER):
                if policy == 'fixed':
                    num_nodes = A.shape[0]
                    node_update_schedule = np.arange(num_nodes, dtype=np.int64)
                    P_i, mu_i, iteration = run_GaBP_HARDWARE_ACCELERATED(
                        A, 
                        b, 
                        caching=True, 
                        mode='fixed', 
                        node_update_schedule_enter=node_update_schedule, 
                        node_updates_per_pe=NODE_UPDT_PE, 
                        number_pes=PEs, 
                        TRUE_MEAN=final_mean, 
                        max_iter=10000, 
                        mae=False, 
                        convergence_threshold=async_convergence_threshold, 
                        show=False)

                    if iteration < 5000:
                        sum_of_iterations += iteration
                        it += 1
                    else:
                        iteration = float("inf")
                        print("=========== FIXED: NOT CONVERGING ===========")
                elif policy == 'random':
                    P_i, mu_i, iteration = run_GaBP_HARDWARE_ACCELERATED(
                        A, 
                        b, 
                        node_updates_per_pe=NODE_UPDT_PE, 
                        number_pes=PEs, 
                        TRUE_MEAN=final_mean, 
                        max_iter=5000, 
                        mae=False, 
                        convergence_threshold=async_convergence_threshold,
                        show=False,
                        mode=policy,
                        caching=cache
                    )
                    if iteration < 5000:
                        sum_of_iterations += iteration
                        it += 1
                    else:
                        iteration = float("inf")
                        print("=========== RANDOM-EXCLUSION: NOT CONVERGING ===========")
                elif policy == 'random-exclusion':
                    P_i, mu_i, iteration, _ = run_GaBP_HARDWARE_ACCELERATED_EXCLUSION(
                        A, 
                        b, 
                        caching=cache,
                        node_updates_per_pe=NODE_UPDT_PE, 
                        number_pes=PEs, 
                        TRUE_MEAN=final_mean, 
                        max_iter=5000, 
                        mae=False, 
                        convergence_threshold=async_convergence_threshold, 
                        show=False
                    )
                    if iteration < 5000:
                        sum_of_iterations += iteration
                        it += 1
                    else:
                        iteration = float("inf")
                        print("=========== RANDOM: NOT CONVERGING ===========")
                elif policy == 'residual':
                    P_i, mu_i, iteration = run_GaBP_HARDWARE_ACCELERATED_RESIDUAL(
                        A, 
                        b, 
                        caching=cache,
                        node_updates_per_pe=NODE_UPDT_PE, 
                        number_pes=PEs, 
                        TRUE_MEAN=final_mean, 
                        max_iter=5000, 
                        mae=False, 
                        convergence_threshold=async_convergence_threshold, 
                        show=False
                    )
                    if iteration < 5000:
                        sum_of_iterations += iteration
                        it += 1
                    else:
                        iteration = float("inf")
                        print("=========== RESIDUAL: NOT CONVERGING ===========")
                else:
                    # print(policy)
                    # print("----------------")
                    raise Exception("Error: No Matching Policy")              

                pbar.update(1)  

                # print(f"-------------- ITERATION = {it} => Streams = {iteration} --------------")
            
            # average
            ave = sum_of_iterations/ASYNC_ITER

            RESULTS[str] = ave

            ALL_DESIGNS_LATENCY[limit]['stream_results'] = RESULTS

            # print("-----------")
pbar.close()

Running GBP Simulations:   0%|          | 0/1200 [00:00<?, ?designs/s]

Running GBP Simulations: 100%|██████████| 1200/1200 [1:08:07<00:00,  3.41s/designs]


In [21]:
ALL_DESIGNS

{30: {'n': 64,
  'p': 1,
  'latency_total': 0.00026491944000000005,
  'Rcf': 241582.87515631164,
  'stream_results': {'random_False': 523.5,
   'random_True': 491.3,
   'random-exclusion_False': 485.7,
   'random-exclusion_True': 478.5,
   'fixed_False': 421.0,
   'fixed_True': 421.0,
   'residual_False': 145.1,
   'residual_True': 125.7}},
 35: {'n': 128,
  'p': 1,
  'latency_total': 0.00031673696000000004,
  'Rcf': 404120.82000155584,
  'stream_results': {'random_False': 262.7,
   'random_True': 235.9,
   'random-exclusion_False': 240.1,
   'random-exclusion_True': 223.5,
   'fixed_False': 210.0,
   'fixed_True': 210.0,
   'residual_False': 88.5,
   'residual_True': 68.3}},
 40: {'n': 146,
  'p': 1,
  'latency_total': 0.00034296,
  'Rcf': 425705.62164683925,
  'stream_results': {'random_False': 234.6,
   'random_True': 223.8,
   'random-exclusion_False': 210.3,
   'random-exclusion_True': 204.1,
   'fixed_False': 184.0,
   'fixed_True': 184.0,
   'residual_False': 79.2,
   'residual_

In [22]:
filename_to_save_design = f"Results/SLAM_1d_1000.json"

# Write the list of dictionaries to the file
with open(filename_to_save_design, 'w') as file:
    json.dump(ALL_DESIGNS, file, indent=4)