In [10]:
!pip install pyopencl
import sys
sys.path.append('/usr/local/lib/python3.6/site-packages')
import pyopencl as cl
import pyopencl.array as cl_array
import numpy as np

Defaulting to user installation because normal site-packages is not writeable


In [11]:
!lscpu
!nvidia-smi

Arquitectura:                            x86_64
  modo(s) de operación de las CPUs:      32-bit, 64-bit
  Address sizes:                         36 bits physical, 48 bits virtual
  Orden de los bytes:                    Little Endian
CPU(s):                                  2
  Lista de la(s) CPU(s) en línea:        0,1
ID de fabricante:                        GenuineIntel
  Nombre del modelo:                     Intel(R) Core(TM)2 Duo CPU     P8700  @
                                          2.53GHz
    Familia de CPU:                      6
    Modelo:                              23
    Hilo(s) de procesamiento por núcleo: 1
    Núcleo(s) por «socket»:              2
    «Socket(s)»                          1
    Revisión:                            10
    CPU MHz máx.:                        2527,0000
    CPU MHz mín.:                        798,0000
    BogoMIPS:                            5041.87
    Indicadores:                         fpu vme de pse tsc msr pae mce cx8 apic
  

In [12]:
KernelSource = """
__kernel void simpleMultiply(
__global float* c, int Wa, int Wb,
__global float* a, __global float* b) {
//Get global position in Y direction
//Get global position in X direction
//Calculate result of one element
}
"""

In [13]:
def multiply_matrices(a, b, N):
    c = np.zeros((N, N), dtype=np.float32)
    for i in range(N):
        for j in range(N):
            for k in range(N):
                c[i, j] += a[i, k] * b[k, j]
    return c

In [14]:
N = 2500

# Obtener la plataforma
plataform_list = cl.get_platforms()

# Obtener los dispositivos
devices = plataform_list[0].get_devices(device_type = cl.device_type.GPU)

# Crear el contexto
context = cl.Context(devices=devices)

# Crear el Command Queue
queue = cl.CommandQueue(context)

a = np.random.rand(N, N)
b = np.random.rand(N, N)
c_serial = multiply_matrices(a, b, N)


# Create OpenCL buffers for matrices
a_g = cl.Buffer(context, cl.mem_flags.READ_WRITE, a.nbytes)
b_g = cl.Buffer(context, cl.mem_flags.READ_WRITE, b.nbytes)
c_g = cl.Buffer(context, cl.mem_flags.READ_WRITE, c.nbytes)

# Copy matrices to OpenCL buffers
cl.enqueue_copy(context.get_default_queue(), a_g, a)
cl.enqueue_copy(context.get_default_queue(), b_g, b)

# Create OpenCL kernel
kernel = cl.Kernel(context.get_program(None), "simpleMultiply")

# Set kernel arguments
kernel.set_arg(0, c_g)
kernel.set_arg(1, a_g)
kernel.set_arg(2, b_g)

# Execute kernel
cl.enqueue_nd_range_kernel(context.get_default_queue(), kernel, (N, N), None)

# Copy result from OpenCL buffer
cl.enqueue_copy(context.get_default_queue(), c, c_g)

# Check results
assert np.allclose(c_serial, c)

LogicError: Context failed: INVALID_VALUE

In [15]:
##Lo que sigue aca abajo es una alternativa mas modular
###############################################################


def create_opencl_context_and_queue(platform, device):
    """Create an OpenCL context and queue."""
    context = cl.Context(platform=[platform], devices=[device])
    queue = cl.CommandQueue(context)
    return context, queue


def compile_opencl_program(context, kernel_source):
    """Compile an OpenCL program."""
    program = cl.Program(context, kernel_source).build()
    return program


def set_kernel_arguments(kernel, c_g, a_g, b_g):
    """Set the arguments of an OpenCL kernel."""
    kernel.set_arg(0, c_g)
    kernel.set_arg(1, a_g)
    kernel.set_arg(2, b_g)


def copy_results_from_opencl_device(queue, c, c_g):
    """Copy the results of an OpenCL kernel from the device to the host."""
    queue.enqueue_copy(c, c_g)

In [17]:
def multiply_matrices_opencl(a, b, N):
    """Multiply two matrices in parallel on an OpenCL device."""
    # Get the platform and device.
    platform = cl.get_platforms()[0]
    device = platform.get_devices(device_type=cl.device_type.GPU)[0]

    # Create the context and queue.
    context, queue = create_opencl_context_and_queue(platform, device)

    # Create OpenCL buffers for the matrices.
    a_g = cl.Buffer(context, cl.mem_flags.READ_WRITE, a.nbytes)
    b_g = cl.Buffer(context, cl.mem_flags.READ_WRITE, b.nbytes)
    c_g = cl.Buffer(context, cl.mem_flags.READ_WRITE, c.nbytes)

    # Copy the matrices to the OpenCL device.
    queue.enqueue_copy(a_g, a)
    queue.enqueue_copy(b_g, b)

    # Compile the OpenCL program.
    program = compile_opencl_program(context, kernel_source)

    # Create the OpenCL kernel.
    kernel = cl.Kernel(program, "simpleMultiply")

    # Set the kernel arguments.
    set_kernel_arguments(kernel, c_g, a_g, b_g)

    # Execute the kernel.
    queue.enqueue_nd_range_kernel(kernel, (N, N), None)

    # Copy the results from the OpenCL device to the host.
    copy_results_from_opencl_device(queue, c, c_g)

    # Wait for the queue to finish.
    queue.finish()

    return c

#Variables
a = np.random.rand(N, N)
b = np.random.rand(N, N)



# Multiply the matrices using OpenCL.
c = multiply_matrices_opencl(a, b, N)

# Check the results.
assert np.allclose(c_serial, c)


TypeError: __init__(): incompatible constructor arguments. The following argument types are supported:
    1. pyopencl._cl.Context(devices: object = None, properties: object = None, dev_type: object = None)

Invoked with: kwargs: platform=[<pyopencl.Platform 'Portable Computing Language' at 0x7f262de9bd00>], devices=[<pyopencl.Device 'pthread-Intel(R) Core(TM)2 Duo CPU     P8700  @ 2.53GHz' on 'Portable Computing Language' at 0x558263529120>]