<a href="https://colab.research.google.com/github/trefftzc/partition_COLAB_notebooks/blob/main/partition_numba_cuda.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Partition with NUMBA/CUDA
This python program is based on NUMBA/CUDA.
It solves the partition problem.

Make sure that COLAB has been set up to use a GPU.
In the main menu select the option:
 Runtime
In the pull-down menu select:
 Change runtime type
Select:
 T4 GPU

GPU cards are dedicated computers with their own memory and their own processors.

When programming a GPU,

In [2]:
%%writefile partition_numba_cuda.py
#
# Program that solves the partition problem in python
# Parallel version with numba
#
import sys
import numpy as np
#import numba
from numba import cuda
from numba.cuda.cudadrv.devicearray import DeviceNDArray
import time

#
# This is the kernel, the code that is executed in each processor
# in the GPU
#
@cuda.jit
def evaluatePartition(array:DeviceNDArray,result:DeviceNDArray,n:np.dtype=np.int64):
   value = cuda.grid(1)
   sum0s = 0
   sum1s = 0
   mask = 1
   for i in range(0,n):
    if ((mask & value) != 0):
      sum1s = sum1s + array[i]
    else:
      sum0s = sum0s + array[i]
    mask = mask * 2
   if (sum0s == sum1s):
     # print("Evaluate partition ",value," returns ",value)
     result[value] = value
   else:
    # print("Evaluate partition ",value," returns ",0)
    result[value] = 0

def printResults(value, n, array):
  print("Solution:\n")
  print("First partition: ")
  mask = 1
  sum = 0
  for i in range(0,n):

    if ((mask & value) != 0):
      print(array[i]," ")
      sum = sum + array[i]
    mask = mask * 2
  print(" sum: ",sum)
  print("Second partition: ")
  mask = 1
  sum = 0
  for i in range(0,n):
    if ((mask & value) == 0):
      print(array[i]," ")
      sum = sum + array[i]

    mask = mask * 2

  print(" sum: \n",sum)

def parallelFor(n,array,nPartitions):
  solutionFound = 0
  solution = -1
  result = np.zeros(nPartitions,dtype=np.int64)
  arrayGPU = cuda.to_device(array)
  resultGPU = cuda.to_device(result)
  evaluatePartition.forall(nPartitions)( arrayGPU,resultGPU, n)
  # Copy the result array back to the CPU
  resultGPU.copy_to_host(result)

  # print("At the end array contains: ",result)
  solutionFound = np.max(result)
  solution = solutionFound


  if (solutionFound):
    printResults(solution, n, array)
  else:
    print("No solution was found.")

if __name__ == "__main__":
  start = time.time()
  # Read the problem
  n = int(input())
  valuesString = input()
  values = valuesString.split()
  for i in range(len(values)):
    values[i] = int(values[i])
# Print the instance of the problem
  print("Problem size: ",n)
  print("Problem instance: ",values)
  nPartitions = 2 ** n
  np_array = np.array(values)
  parallelFor(n,np_array,nPartitions)
  end = time.time()
  elapsed = end - start
  print("The program took: ",elapsed," seconds.")
  parallelFor(n,np_array,nPartitions)

Writing partition_numba_cuda.py


In [3]:
%%writefile instanceNoSolution24.Text
24
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 1000000

Writing instanceNoSolution24.Text


In [4]:
!python partition_numba_cuda.py < instanceNoSolution24.Text

Problem size:  24
Problem instance:  [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 1000000]
No solution was found.
The program took:  1.2094764709472656  seconds.
No solution was found.


In [5]:
!grep 1 instanceNoSolution24.Text

1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 1000000


In [6]:
!awk ' {print $0}' instanceNoSolution24.Text

24
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 1000000
