# Ejemplo 1 Python y cuda


## Paso 0: Instalar modulo pycuda
Este paso se debe realizar una vez por notebook.

In [3]:
!pip install pycuda



In [1]:
import pycuda.driver as cuda
import pycuda.autoinit
from pycuda.compiler import SourceModule

import numpy
m = 5
n = 5

# Defino la memoria CPU.
a_cpu = numpy.random.randn( m, n )
a_cpu = a_cpu.astype( numpy.float32() )

b_cpu = numpy.random.randn( m, n )
b_cpu = b_cpu.astype( numpy.float32() )

r_cpu = numpy.empty_like( a_cpu )

# Inicializo la memoria GPU.
a_gpu = cuda.mem_alloc( a_cpu.nbytes )
b_gpu = cuda.mem_alloc( b_cpu.nbytes )

# Copio la memoria GPU.
cuda.memcpy_htod( a_gpu, a_cpu )
cuda.memcpy_htod( b_gpu, a_cpu )

# Defino la lógica del kernel (en lenguaje similar a c (de proposito general))
module = SourceModule("""
__global__ void add( float *a, float *b )
{
  int idx = threadIdx.x + threadIdx.y*blockDim.x;
  a[idx]  = a[idx] + b[idx];
}
""") 

kernel = module.get_function("add")

kernel( a_gpu, b_gpu, block=(m,n,1) )

cuda.memcpy_dtoh( r_cpu, a_gpu )

print( "------------------------------------")
print( a_cpu )
print( "------------------------------------")
print( b_cpu )
print( "------------------------------------")
print( r_cpu )



------------------------------------
[[-1.0223628  -0.9463267  -0.25135282 -0.42176765 -0.49842495]
 [ 0.19651891 -0.6378178  -0.7643275  -0.58470917 -0.43497312]
 [-1.4196063   0.21524853 -0.44553477 -1.2642446   0.89078   ]
 [ 1.1027408  -1.245437   -0.41024438 -0.04699108  0.6916818 ]
 [-1.6535466  -0.09667925 -0.42934862 -0.01499268  0.5022499 ]]
------------------------------------
[[ 1.1914625   1.105957   -0.2088947  -0.7434191  -0.4830116 ]
 [ 0.30171323  0.6471187  -0.55696553 -0.3248315   1.4652334 ]
 [-1.1970657  -0.3954426  -0.578356    0.28670156  0.6803275 ]
 [-0.88739693  0.10012447 -2.2625914   0.21938543  1.955516  ]
 [-0.84720427 -0.2232366   1.467681   -1.4138446  -0.41007283]]
------------------------------------
[[-2.0447257  -1.8926533  -0.50270563 -0.8435353  -0.9968499 ]
 [ 0.39303783 -1.2756356  -1.528655   -1.1694183  -0.86994624]
 [-2.8392127   0.43049705 -0.89106953 -2.528489    1.78156   ]
 [ 2.2054815  -2.490874   -0.82048875 -0.09398216  1.3833636 ]
 [-3.

kernel.cu

  module = SourceModule("""
