<a href="https://colab.research.google.com/github/linyuehzzz/hedetniemi_distance/blob/master/all_pair_distance_cuda.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

##**CUDA for all-pair distance algorithms**
CUDA parallelism for all-pair distance algorithms.  
Yue Lin (lin.3326 at osu.edu)  
Created: 6/12/2020

In [None]:
from google.colab import drive
drive.mount('/content/gdrive')

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3aietf%3awg%3aoauth%3a2.0%3aoob&response_type=code&scope=email%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdocs.test%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive.photos.readonly%20https%3a%2f%2fwww.googleapis.com%2fauth%2fpeopleapi.readonly

Enter your authorization code:
··········
Mounted at /content/gdrive


#### **Install packages** 

In [2]:
!pip install timeout-decorator

Collecting timeout-decorator
  Downloading https://files.pythonhosted.org/packages/07/1c/0d9adcb848f1690f3253dcb1c1557b6cf229a93e724977cb83f266cbd0ae/timeout-decorator-0.4.1.tar.gz
Building wheels for collected packages: timeout-decorator
  Building wheel for timeout-decorator (setup.py) ... [?25l[?25hdone
  Created wheel for timeout-decorator: filename=timeout_decorator-0.4.1-cp36-none-any.whl size=5021 sha256=2cc300f4d7dd46b3db553a69dfdfe5648b9bc89c2e9e17c2d1d597b261ad4272
  Stored in directory: /root/.cache/pip/wheels/f1/e6/ea/7387e3629cb46ba65140141f972745b823f4486c6fe884ccb8
Successfully built timeout-decorator
Installing collected packages: timeout-decorator
Successfully installed timeout-decorator-0.4.1


#### **CUDA device query** 

In [None]:
!nvcc --version
from numba import cuda
print(cuda.gpus)

nvcc: NVIDIA (R) Cuda compiler driver
Copyright (c) 2005-2019 NVIDIA Corporation
Built on Sun_Jul_28_19:07:16_PDT_2019
Cuda compilation tools, release 10.1, V10.1.243
<Managed Device 0>


In [None]:
%cd /usr/local/cuda-10.1/samples/1_Utilities/deviceQuery
!ls

/usr/local/cuda-10.1/samples/1_Utilities/deviceQuery
deviceQuery.cpp  Makefile  NsightEclipse.xml  readme.txt


In [None]:
!make
!./deviceQuery

make: Nothing to be done for 'all'.
./deviceQuery Starting...

 CUDA Device Query (Runtime API) version (CUDART static linking)

Detected 1 CUDA Capable device(s)

Device 0: "Tesla P100-PCIE-16GB"
  CUDA Driver Version / Runtime Version          10.1 / 10.1
  CUDA Capability Major/Minor version number:    6.0
  Total amount of global memory:                 16281 MBytes (17071734784 bytes)
  (56) Multiprocessors, ( 64) CUDA Cores/MP:     3584 CUDA Cores
  GPU Max Clock rate:                            1329 MHz (1.33 GHz)
  Memory Clock rate:                             715 Mhz
  Memory Bus Width:                              4096-bit
  L2 Cache Size:                                 4194304 bytes
  Maximum Texture Dimension Size (x,y,z)         1D=(131072), 2D=(131072, 65536), 3D=(16384, 16384, 16384)
  Maximum Layered 1D Texture Size, (num) layers  1D=(32768), 2048 layers
  Maximum Layered 2D Texture Size, (num) layers  2D=(32768, 32768), 2048 layers
  Total amount of constant memory: 

#### **Read graph data** 

##### Data from the original article

In [3]:
## [node i, node j, distance between node i and j]
## using data from example 1: San Francisco Bay Area Graph of Time-Distances (in minutes)
data = [[1, 2, 30], [1, 4, 30], [1, 9, 40],
        [2, 3, 25], [2, 4, 40], [3, 4, 50],
        [4, 5, 30], [4, 6, 20], [5, 7, 25],
        [6, 7, 20], [6, 9, 20], [7, 8, 25],
        [8, 9, 20]]
nodes = 9

##### Read random graph

In [None]:
%cd '/content/gdrive/My Drive/Colab Notebooks/hedetniemi_matrix_sum'

## Number of nodes (100/1,000/10,000/100,000/1,000,000)
nodes = 100
print('Nodes: ', nodes)
## Total degree
degree = 3
print('Degree: ', degree)

data = []
with open('graph_n' + str(nodes) + '_d' + str(degree) + '.txt', 'r') as f:
  lines = f.read().splitlines()
  for line in lines:
    l = line.split()
    item = [int(l[0]), int(l[1]), float(l[2])]
    data.append(item)

print(data[0])

/content/gdrive/My Drive/Colab Notebooks/hedetniemi_matrix_sum
Nodes:  100
Degree:  3
[77, 86, 89.39726376738572]


#### **Configure CUDA** 

In [29]:
# number of threads: 32、128、256
NUM_THREADS = 32 

def get_cuda_execution_config(m, n):
    dimBlock = (NUM_THREADS, NUM_THREADS)
    dimGrid = ((n // dimBlock[0]) + 1, (m // dimBlock[1]) + 1)
    return dimGrid, dimBlock


dimGrid, dimBlock = get_cuda_execution_config(nodes, nodes)
print('dimGrid: ', dimGrid)
print('dimBlock: ', dimBlock)

dimGrid:  (1, 1)
dimBlock:  (32, 32)


#### **Hedetniemi distance** 

##### Construct distance matrix

In [41]:
from timeit import default_timer
from numba import cuda, njit
import numpy as np


@cuda.jit
def graph2dist(graph, dist_mtx, n):
  ## initialize distance matrix
  a, b = cuda.grid(2)
  dist_mtx[a,b] = np.inf

  ## calculate distance matrix
  x = cuda.grid(1)
  if x < graph.shape[0]:
    i = int(graph[x,0]) - 1
    j = int(graph[x,1]) - 1
    d = graph[x,2]
    dist_mtx[i,j] = d
    dist_mtx[i,j] = d    
  
  ## set diagonal to 0
  y = cuda.grid(1)
  if y < n:
    dist_mtx[y,y] = 0.0


def distance_matrix(graph, n):
  ## copy data to device
  graph_device = cuda.to_device(graph)
  dist_mtx_device = cuda.device_array(shape=(n,n))

  ## calculate distance matrix
  graph2dist[dimGrid, dimBlock](graph_device, dist_mtx_device, n)
  
  ## copy data to host
  dist_mtx_host = dist_mtx_device.copy_to_host()
 
  return dist_mtx_host


## print time costs
try:
  start = default_timer()
  dist_mtx = distance_matrix(np.array(data), nodes) 
  stop = default_timer()
  print('Time: ', stop - start)
except:
  print('Time: inf')
  raise

Time:  0.17713169599846879


##### Calculate Hedetniemi Matrix Sum

In [None]:
from timeit import default_timer
from numba import cuda, njit
from operator import *
import numpy as np

@njit
def init_mtx(n):
  return np.full((n,n), np.inf), matrix.copy()

@cuda.jit
def cal_mtx(mtx_a_t_1, matrix, mtx_a_t, n):
  x, y = cuda.grid(2)
  if x < n and y < n:
    a = mtx_a_t_1[x]
    b = matrix[:,y]
    summ = np.inf
    for k in range(n):
      summ = min(summ, a[k] + b[k])
    mtx_a_t[x,y] = summ


@njit
def cmp_mtx(mtx_a_t, mtx_a_t_1, p=True):
  if np.array_equal(mtx_a_t, mtx_a_t_1):
    p =  False
  else:
    mtx_a_t_1 = mtx_a_t.copy()

  return p


def hede_distance(matrix, n):
  mtx_a_t, mtx_a_t_1 = init_mtx(n)

  p = False
  while p:
    cal_mtx[dimGrid, dimBlock](mtx_a_t_1, matrix, mtx_a_t, n)
    cuda.synchronize()    
    p = cmp_mtx(mtx_a_t, mtx_a_t_1, p)
  
  return mtx_a_t


## print time costs
try:
  start = default_timer()
  mtx_a_t = hede_distance(dist_mtx, n)
  stop = default_timer()
  print('Time: ', stop - start)
except:
  print('Time: inf')
  raise

## print shortest path matrix
with open('hedet_mtx_nb_cuda.txt', 'w') as fw:
  fw.write('\n'.join(['\t'.join([str(cell) for cell in row]) for row in mtx_a_t.tolist()]))

Time: inf


CudaAPIError: ignored

#### **Floyd–Warshall distance** 

##### Construct distance matrix

In [None]:
from timeit import default_timer
from numba import cuda, njit
import numpy as np


@cuda.jit
def graph2dist(graph, dist_mtx, n):
  ## initialize distance matrix
  a, b = cuda.grid(2)
  dist_mtx[a,b] = np.inf

  ## calculate distance matrix
  x = cuda.grid(1)
  if x < graph.shape[0]:
    i = int(graph[x,0]) - 1
    j = int(graph[x,1]) - 1
    d = graph[x,2]
    dist_mtx[i,j] = d
    dist_mtx[i,j] = d    
  
  ## set diagonal to 0
  y = cuda.grid(1)
  if y < n:
    dist_mtx[y,y] = 0.0


def distance_matrix(graph, n):
  ## copy data to device
  graph_device = cuda.to_device(graph)
  dist_mtx_device = cuda.device_array(shape=(n,n))

  ## calculate distance matrix
  graph2dist[dimGrid, dimBlock](graph_device, dist_mtx_device, n)
  
  ## copy data to host
  dist_mtx_host = dist_mtx_device.copy_to_host()
 
  return dist_mtx_host


## print time costs
try:
  start = default_timer()
  dist_mtx = distance_matrix(np.array(data), nodes) 
  stop = default_timer()
  print('Time: ', stop - start)
except:
  print('Time: inf')
  raise

##### Calculate Floyd–Warshall distance

#### **Compare results** 

In [None]:
!diff 'hedet_mtx_list.txt' 'hedet_mtx_nb_cuda.txt'