In [2]:
!pip install pyopencl

Collecting pyopencl
[?25l  Downloading https://files.pythonhosted.org/packages/7a/12/7d4171ecfaf61bafdc4a628263d086b8e75ff89f4ada5458ff1fd16d953c/pyopencl-2020.3.1-cp36-cp36m-manylinux1_x86_64.whl (738kB)
[K     |████████████████████████████████| 747kB 9.3MB/s 
[?25hCollecting pytools>=2017.6
[?25l  Downloading https://files.pythonhosted.org/packages/b7/30/c9362a282ef89106768cba9d884f4b2e4f5dc6881d0c19b478d2a710b82b/pytools-2020.4.3.tar.gz (62kB)
[K     |████████████████████████████████| 71kB 9.0MB/s 
[?25hCollecting appdirs>=1.4.0
  Downloading https://files.pythonhosted.org/packages/3b/00/2344469e2084fb287c2e0b57b72910309874c3245463acd6cf5e3db69324/appdirs-1.4.4-py2.py3-none-any.whl
Building wheels for collected packages: pytools
  Building wheel for pytools (setup.py) ... [?25l[?25hdone
  Created wheel for pytools: filename=pytools-2020.4.3-py2.py3-none-any.whl size=61374 sha256=93cb2c3538a6580827cc5656e4bbf6940f3fd7bb76f117296f568ab5c577afd9
  Stored in directory: /root/.ca

In [1]:
import time as tm
import numpy as np
import pyopencl as cl
import math

In [2]:
K = 10000
localSize = 80
globalSize = math.ceil(K/localSize) * localSize
groups = math.ceil(K/localSize)


L = 2
np.random.seed(12)
host_vectors_x = np.random.uniform(low=-L, high=L, size=globalSize,).astype(np.float32)
np.random.seed(11)
host_vectors_y = np.random.uniform(low=-L, high=L, size=globalSize).astype(np.float32)
host_labels = np.zeros(globalSize).astype(np.float32)
host_group_sums = np.zeros(groups).astype(np.float32)

plataforms = cl.get_platforms()
print("Plataformas: ", plataforms)

gpu_devices = plataforms[0].get_devices(cl.device_type.GPU)
print("Devices: ", gpu_devices)

context = cl.Context(gpu_devices)
queue = cl.CommandQueue(context)

device_vectors_x = cl.Buffer(context, cl.mem_flags.READ_ONLY | cl.mem_flags.COPY_HOST_PTR, hostbuf= host_vectors_x)
device_vectors_y = cl.Buffer(context, cl.mem_flags.READ_ONLY | cl.mem_flags.COPY_HOST_PTR, hostbuf= host_vectors_y)


device_group_sums = cl.Buffer(context, cl.mem_flags.WRITE_ONLY | cl.mem_flags.COPY_HOST_PTR, hostbuf= host_group_sums)
device_labels = cl.Buffer(context, cl.mem_flags.WRITE_ONLY, host_labels.nbytes)

print(f"Iniciando algoritmo de Monte Carlo con K={K}")
print(f"Redondeando K a {globalSize}")
print(f"Se generarán {groups} grupos con {localSize} hilos cada uno\n")

program = cl.Program(context, 
"""
__kernel void mandelbrot(
    __global float *vectors_x,
     __global float *vectors_y,
     __global float *device_labels,
      __global float *group_sums)
{
  int global_id = get_global_id(0);
  int local_id = get_local_id(0);
  int group_id = get_group_id(0); 
  int local_size = get_local_size(0);

  float c_x = vectors_x[global_id];
  float c_y = vectors_y[global_id];

  float z_nx = 0;
  float z_ny = 0;
  int i = 0;
  float norm_zn = 0;
  float u, v;
  
  while (i < 10000)
	{
		// Primero obtenemos el z_(n+1)
    u = (z_nx * z_nx) - (z_ny * z_ny);
    v =  2 * z_nx * z_ny;
    z_nx = u + c_x;
    z_ny = v + c_y;

    norm_zn = (z_nx * z_nx) + (z_ny * z_ny);
    if (norm_zn >= 4)
	  {
		  i = 1000000;
      device_labels[global_id] = 1;
	  }
    i += 1;
	}

  // Ahora hacemos la reducción
  
  if(local_id == 0){
    for (int j = 0; j < local_size; j++) {
      group_sums[group_id] += device_labels[global_id + j];
    }
  }

}
"""
).build()


t0_GPU = tm.time()
program.mandelbrot(queue, (globalSize,), (localSize,) , device_vectors_x, device_vectors_y, device_labels, device_group_sums)
cl.enqueue_copy(queue, host_group_sums, device_group_sums)
mandelbrot_complement = float(np.sum(host_group_sums))
ratio = np.float32((globalSize - mandelbrot_complement)/ globalSize)
area = 4*L*L*ratio 
t1_GPU = tm.time()
print(f"\nAproximación superficie: {area}")
print(f"Error: {np.abs(area-1.50659177) }")
print(f"Tiempo de ejecución: {t1_GPU-t0_GPU}")

Plataformas:  [<pyopencl.Platform 'Intel Gen OCL Driver' at 0x7f329943acc0>, <pyopencl.Platform 'Intel(R) CPU Runtime for OpenCL(TM) Applications' at 0x195a1f0>]
Devices:  [<pyopencl.Device 'Intel(R) HD Graphics Kabylake Desktop GT1.5' on 'Intel Gen OCL Driver' at 0x7f329943dfa0>]
Iniciando algoritmo de Monte Carlo con K=10000
Redondeando K a 10000
Se generarán 125 grupos con 80 hilos cada uno


Aproximación superficie: 6.8256001472473145
Error: 5.3190083772473145
Tiempo de ejecución: 0.024228811264038086
