# Benchmark the rlap and scipy solvers

Prerequisite: Bazel is needed to build the rlap library and expose the python wrappers.
```
$ bazel build //rlap:all
```

## Imports

In [None]:
!pip install matplotlib
!pip install scipy
!pip install pandas

import os
os.environ["DATAPATH"] = "bazel-bin"
import time
import numpy as np
import pandas as pd
from rlap import ApproximateCholesky
from scipy.sparse import linalg

### Download the datasets

The datasets can be downloaded from https://graphchallenge.mit.edu/data-sets along with their metadata. However, we also provide a "generator.py" function for generating 3D grid and complete connected graphs of N nodes. The "grid50.tsv" and "connected1000.tsv" were generared from it. Rest can be downloaded and put in the `data/` folder.

In [None]:
# datasets

items = [
    { "dataset": "connected1000.tsv", "num_nodes": 1000, "num_edges": 1000000},
    { "dataset":  "grid50.tsv", "num_nodes": 125000, "num_edges": 735000},
    
#     Uncomment the following lines ones the datasets have been downloaded from https://graphchallenge.mit.edu/data-sets
#     and placed in the `data/` folder.
    
#     { "dataset": "ca-GrQc_adj.tsv", "num_nodes": 5242, "num_edges": 28968},
#     { "dataset":  "ca-HepPh_adj.tsv", "num_nodes": 12008, "num_edges": 236978},
#     { "dataset":  "email-Enron_adj.tsv", "num_nodes": 36692, "num_edges": 367662},
#     { "dataset":  "soc-Epinions1_adj.tsv", "num_nodes": 75879, "num_edges": 811480},
#     { "dataset":  "amazon0302_adj.tsv", "num_nodes": 262111, "num_edges": 1799584},
#     { "dataset":  "amazon0312_adj.tsv", "num_nodes": 400727, "num_edges": 4699738},
#     { "dataset":  "roadNet-CA_adj.tsv", "num_nodes": 1965206, "num_edges": 5533214},
#     { "dataset":  "cit-Patents_adj.tsv", "num_nodes": 3774768, "num_edges": 33037894},
]

#scipy solvers

scipy_solvers = {
    "bicg": linalg.bicg,
    "cgs": linalg.cgs,
    "lgmres": linalg.lgmres,
}


## Helper methods for running the solvers

In [None]:

def run_rlap(dataset, N):
    stats = []
    for pre in ["order", "coarsen", "degree"]:
        print("RLAP: {}".format(pre))
        filename = "data/" + dataset
        fact = ApproximateCholesky(filename=filename, nrows=N, ncols=N, pre=pre)
        s = time.time()
        x = fact.solve(b)
        e = time.time()
        print("STATS", e-s, fact.get_num_iters(), fact.get_sparsity_ratio())
        stat = {
            "solver": pre,
            "dataset": dataset.replace("_adj.tsv", "").replace(".tsv", ""),
            "duration": e-s,
            "num_iters": fact.get_num_iters(),
            "sparsity_ratio": fact.get_sparsity_ratio()
        }
        stats.append(stat)
    return stats

def run_scipy(dataset):
    stats = []
    for solver in scipy_solvers:
        print("Scipy: {}".format(solver))
        num_iters = 0
        def scipy_callback(x):
            nonlocal num_iters
            num_iters += 1
        s = time.time()
        x_t, info = scipy_solvers[solver](L, b, tol=1e-12, maxiter=5000, callback=scipy_callback)
        e = time.time()
        print("STATS", e-s, num_iters)
        stat = {
            "solver": solver,
            "dataset": dataset.replace("_adj.tsv", "").replace(".tsv", ""),
            "duration": e-s,
            "num_iters": num_iters,
            "sparsity_ratio": -1
        }
        stats.append(stat)
    return stats
        

In [None]:
rlap_stats = []
scipy_stats = []
print(len(items))
for item in items:
    dataset = item["dataset"]
    N = item["num_nodes"]
    print("===================DATASET: {}, N: {}=================".format(dataset, N))
    filename = "data/" + dataset
    fact = ApproximateCholesky(filename=filename, nrows=N, ncols=N, pre="order")
    # retrieve the laplacian
    L = fact.get_laplacian()
    # generate a random ground truth x_gt
    x_gt = np.random.rand(N)
    # calculate the respective b
    b = L*x_gt
    # run the solvers and collect stats
    rlap_stats.extend(run_rlap(dataset=dataset, N=N))
    scipy_stats.extend(run_scipy(dataset=dataset))

## Tabulate the collected statistics

In [None]:
stats_df = pd.DataFrame(rlap_stats+scipy_stats)[["duration", "num_iters", "solver", "dataset"]]

In [None]:
stats_df = stats_df.pivot(index='dataset', columns='solver', values=["duration", "num_iters"])

In [None]:
pd.set_option("display.precision", 3)
stats_df

### Optional export to latex format

In [None]:
print(stats_df.to_latex())