<a href="https://colab.research.google.com/github/trefftzc/partition_COLAB_notebooks/blob/main/partition_thrust.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Thrust

Thrust is a C++ library that NVIDIA has created to simplify the programming of GPUs.

The web site for Thrust is:
 https://developer.nvidia.com/thrust

 Thrust uses similar design principles to the STL library in C++.

 One of the most used classes in STL is the class vector.

 Thrust extends the class vector with two specialized classes:

 host_vector

 and
  
 device_vector

 host_vector instances are allocated in the memory of the host while device_vector instances are allocated in the memory of the GPU.

 Copying back and forth can be done with a simple assignment.


 The kernels that one uses in CUDA are replaced with functors where one adds the keywords __host__ and/or __device__ to indicate where those functors can be executed.
 functors are C++ functions that can be applied to all the entries in a vector.

 One uses the transform primitive in C++ to apply a functor to all the elements of a vector and then to store the results in another vector.

 Thurst also offers a number of powerful primitives, for instance reduction operations.

In [37]:
%%writefile partition_thrust.cu
//
// The Thrust documentation is available at
// https://docs.nvidia.com/cuda/thrust/index.html
//
#include <thrust/host_vector.h>
#include <thrust/device_vector.h>
#include <thrust/sequence.h>
#include <thrust/reduce.h>
#include <thrust/functional.h>
#include <thrust/execution_policy.h>

#include <iostream>



using namespace std;

// In Thrust, kernels are written as functors in C++
// With the additional use of the keywords
// __host__
// __device__
// The keyword __host__ states that this functor can be used on the host
// The keyword __device__ states that this functor can be used on the device


// This functor evaluates a subset encoded by an integer value
// as a possible solution to the partition problem.
// If this subset is indeed a solution, then the result is the
// value of the integer that encodes the subset.
// Otherwise the result is 0.

// Thus the vector with the results will contain non-zero values
// in the entries that encode subsets that are solutions to this
// particular instance of the problem.
// If the instance of the problem does not have any solutions,
// the entire vector with results will contain 0s.

struct evaluateFunctor
{

  const int n;
  const int *array;

  evaluateFunctor(int _n,int *_array) : n(_n),array(_array) {}
    __host__ __device__

    int operator()( const int value) {
      int sum0s = 0;
      int sum1s = 0;
      unsigned int mask = 1;
      for(int i = 0;i < n;i++) {
        if ((mask & value) != 0) {
          sum1s = sum1s + array[i];
        }
        else {
          sum0s = sum0s + array[i];
        }
        mask = mask * 2;
      }
      if (sum0s == sum1s)
        return value;
      else
        return 0;
  }

};

void printResults(int value,int n,thrust::host_vector < int > array)
{
  cout << "Solution:\n" << endl;
  cout << "First partition: " << endl ;
  unsigned int mask = 1;
  int sum = 0;
  for(int i = 0;i < n;i++) {
    if ((mask & value) != 0) {
      cout << array[i] << " ";
      sum = sum + array[i];
    }
    mask = mask * 2;
  }
  cout << " sum: " << sum << endl;
  cout << "Second partition: " << endl ;
  mask = 1;
  sum = 0;
  for(int i = 0;i < n;i++) {
    if ((mask & value) == 0) {
      cout << array[i] << " ";
      sum = sum + array[i];
    }
    mask = mask * 2;
  }
  cout << " sum: " << sum << endl;
}

int main(void) {
    // Read the instance of the problem from standard input
    // Read first the size of the problem

    int n;
    cin >> n;

    // Allocate an array in the host and read the n integer values

    thrust::host_vector < int > host_array(n);
    for(int i = 0;i < n;i++) {
      cin >> host_array[i];
    }

    // Now allocate a device_vector and copy
    // the host_vector to the GPU memory

    thrust::device_vector < int > device_array(n);
    device_array = host_array;

    // Generate the sequence of integer values
    // that encode all the possible subsets

    int powerOf2 = 1;
    for(int i = 0;i < n-1;i++) {
      powerOf2 = powerOf2 * 2;
    }
    std::cout << "Number of subsets to evaluate is : " << powerOf2 << std::endl;
    thrust::host_vector<int> sequentialValues(powerOf2);
    // cout << "Allocated sequentialValues." << endl;
    thrust::sequence(sequentialValues.begin(),sequentialValues.end());
    thrust::device_vector<int>  is_solution(powerOf2);
    // cout << "Allocated is_solution." << endl;
    thrust::device_vector<int> encodingOfSubset(powerOf2);
    // cout << "Allocated encodigOfSubset." << endl;
    encodingOfSubset = sequentialValues;
    // cout << "Copied data to GPU." << endl;


    // Instantiate the functor

    evaluateFunctor ef(n,thrust::raw_pointer_cast(device_array.data()));
    // cout << "Instantiated functor " << endl;
    // Execute the kernel. In this case a C++ functor
    // transform applies a functor to the input parameters
    // and leaves the result in the last parameter
    thrust::transform(encodingOfSubset.begin(), encodingOfSubset.end(),
                        is_solution.begin(), ef);
    // cout << "Performed transformation." << endl;
    int result = thrust::reduce(
                            is_solution.begin(),is_solution.end(),
                            0,
                            thrust::maximum<int>());
    // cout << "result is: " << result << endl;
    if (result == 0)
       cout << "This instance does not have a solution. " << endl;
    else
	    printResults(result,n,host_array);

}


Overwriting partition_thrust.cu


In [38]:
!nvcc partition_thrust.cu -o partition_thrust


In [29]:
%%writefile test_with_solution_24.txt
24
1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 23

Writing test_with_solution_24.txt


In [39]:
!time ./partition_thrust < test_with_solution_24.txt

Number of subsets to evaluate is : 8388608
Solution:

First partition: 
1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1  sum: 23
Second partition: 
23  sum: 23

real	0m2.220s
user	0m2.072s
sys	0m0.132s


In [40]:
%%writefile test_with_no_solution_24.txt
24
1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 32

Overwriting test_with_no_solution_24.txt


In [41]:
!time ./partition_thrust < test_with_no_solution_24.txt

Number of subsets to evaluate is : 8388608
This instance does not have a solution. 

real	0m2.238s
user	0m2.097s
sys	0m0.132s
