<a href="https://colab.research.google.com/github/trefftzc/partition/blob/main/PartitionWithNumbaCUDA.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Code to solve the partition problem using an NVIDIA GPU
The code below solves the partition problem. This code is written in Python and it uses NUMBA to generate code that executes on an NVIDIA GPU. Make sure that in the Notebook setting you choose a GPU as accelerator.

In [None]:
#
# Program that solves the partition problem in python
# Parallel version with numba
#
import sys
import numpy as np
#import numba
from numba import cuda
from numba.cuda.cudadrv.devicearray import DeviceNDArray
import time

# This is a max reduction
# The documentation is available here:
# https://numba.readthedocs.io/en/stable/cuda/reduction.html
@cuda.reduce
def max_reduce(a, b):
  if a > b:
    return a
  else:
    return b


#
# This is the kernel, the code that is executed in each processor
# in the GPU
#
@cuda.jit
def evaluatePartition(  array:DeviceNDArray,result:DeviceNDArray,n:np.dtype=np.int64):
   value = cuda.grid(1)
   sum0s = 0
   sum1s = 0
   mask = 1
   for i in range(0,n):
    if ((mask & value) != 0):
      sum1s = sum1s + array[i]
    else:
      sum0s = sum0s + array[i]
    mask = mask * 2
   if (sum0s == sum1s):
     # print("Evaluate partition ",value," returns ",value)
     result[value] = value
   else:
    # print("Evaluate partition ",value," returns ",0)
    result[value] = 0

def printResults(value, n, array):
  print("Solution:\n")
  print("First partition: ")
  mask = 1
  sum = 0
  for i in range(0,n):

    if ((mask & value) != 0):
      print(array[i],end=" ")
      sum = sum + array[i]
    mask = mask * 2

  print(" sum: ",sum)
  print("Second partition: ")
  mask = 1
  sum = 0
  for i in range(0,n):
    if ((mask & value) == 0):
      print(array[i],end=" ")
      sum = sum + array[i]

    mask = mask * 2

  print(" sum: ",sum)
  print()

def parallelFor(n,array,nPartitions):
  solutionFound = 0
  solution = -1
  result = np.zeros(nPartitions,dtype=np.int64)
  arrayGPU = cuda.to_device(array)
  resultGPU = cuda.to_device(result)
  evaluatePartition.forall(nPartitions)( arrayGPU,resultGPU, n)
  # Copy the result array back to the CPU
  # resultGPU.copy_to_host(result)

  # print("At the end array contains: ",result)
  # solutionFound = np.max(result)
  solutionFound = max_reduce(resultGPU)
  solution = solutionFound


  if (solutionFound):
    printResults(solution, n, array)
  else:
    print("No solution was found.")

def main():

  # n is the size of the array with the integer values
  # array contains the set of integer values
  # n is the size of the array with the integer values
  # array contains the set of integer values

  n = 24

  print("The value of n is \n",n)
  array=[1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,23]


  print("The values in the multiset are: \n")
  for i in range(0,n):
    print(array[i],end=" ")

  print("")

  nPartitions = 1
  # Only half of all possible partitions need be examined
  # The second half is symmetrical to the first half
  for i in range(1,n):
    nPartitions = nPartitions * 2

  print("The number of possible partitions is: ",nPartitions)
  solutionFound = 0
  solution = -1
  result = np.zeros(nPartitions,dtype=np.int64)
  threadsPerBlock = 0
  blocksPerGrid = 0
  if nPartitions >= 512:
    threadsPerBlock = 512
    blocksPerGrid = nPartitions // 512
  else:
    threadsPerBlock = 32
    blocksPerGrid = nPartitions // 32
  #for i in prange(1,nPartitions):
  # for i in range(1,nPartitions):
  start = time.time()
  arrayGPU = cuda.to_device(array)
  resultGPU = cuda.to_device(result)
  evaluatePartition[blocksPerGrid,threadsPerBlock]( arrayGPU,resultGPU, n)
  # Copy the result array back to the CPU
  resultGPU.copy_to_host(result)
  end = time.time()
  print("Execution time with Grid Block: ",end-start)
  # print("At the end array contains: ",result)
  solutionFound = np.max(result)
  solution = solutionFound


  if (solutionFound):
    printResults(solution, n, array)
  else:
    print("No solution was found.")

# An alternative way, using the forAll function
  start = time.time()
  parallelFor(n,array,nPartitions)
  end = time.time()
  print("Execution time with parallelFor: ",end-start)


if __name__ == "__main__":
# Call twice so that the compilation time is not counted
  main()
  main()





The value of n is 
 24
The values in the multiset are: 

1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 23 
The number of possible partitions is:  8388608
Execution time with Grid Block:  0.23495078086853027
Solution:

First partition: 
1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1  sum:  23
Second partition: 
23  sum:  23





Solution:

First partition: 
1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1  sum:  23
Second partition: 
23  sum:  23

Execution time with parallelFor:  0.6948294639587402
The value of n is 
 24
The values in the multiset are: 

1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 23 
The number of possible partitions is:  8388608
Execution time with Grid Block:  0.10473251342773438
Solution:

First partition: 
1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1  sum:  23
Second partition: 
23  sum:  23

Solution:

First partition: 
1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1  sum:  23
Second partition: 
23  sum:  23

Execution time with parallelFor:  0.041609764099121094


