### GPU Computing for Data Scientists
#### Using CUDA, Jupyter, PyCUDA, ArrayFire and Thrust


https://github.com/QuantScientist/Data-Science-ArrayFire-GPU

In [1]:
import pycuda
from pycuda import compiler
import pycuda.driver as drv

In [2]:
drv.init()
print("%d device(s) found." % drv.Device.count())
           
for ordinal in range(drv.Device.count()):
    dev = drv.Device(ordinal)
    print "Device #%d: %s" % (ordinal, dev.name())
    print "  Compute Capability: %d.%d" % dev.compute_capability()
    print "  Total Memory: %s KB" % (dev.total_memory()//(1024))
    atts = [(str(att), value) 
            for att, value in dev.get_attributes().iteritems()]
    atts.sort()
  
    for att, value in atts:
        print "  %s: %s" % (att, value)

1 device(s) found.
Device #0: GeForce GTX 1080
  Compute Capability: 6.1
  Total Memory: 8308736 KB
  ASYNC_ENGINE_COUNT: 2
  CAN_MAP_HOST_MEMORY: 1
  CLOCK_RATE: 1733500
  COMPUTE_CAPABILITY_MAJOR: 6
  COMPUTE_CAPABILITY_MINOR: 1
  COMPUTE_MODE: DEFAULT
  CONCURRENT_KERNELS: 1
  ECC_ENABLED: 0
  GLOBAL_L1_CACHE_SUPPORTED: 1
  GLOBAL_MEMORY_BUS_WIDTH: 256
  GPU_OVERLAP: 1
  INTEGRATED: 0
  KERNEL_EXEC_TIMEOUT: 0
  L2_CACHE_SIZE: 2097152
  LOCAL_L1_CACHE_SUPPORTED: 1
  MANAGED_MEMORY: 1
  MAXIMUM_SURFACE1D_LAYERED_LAYERS: 2048
  MAXIMUM_SURFACE1D_LAYERED_WIDTH: 32768
  MAXIMUM_SURFACE1D_WIDTH: 32768
  MAXIMUM_SURFACE2D_HEIGHT: 65536
  MAXIMUM_SURFACE2D_LAYERED_HEIGHT: 32768
  MAXIMUM_SURFACE2D_LAYERED_LAYERS: 2048
  MAXIMUM_SURFACE2D_LAYERED_WIDTH: 32768
  MAXIMUM_SURFACE2D_WIDTH: 131072
  MAXIMUM_SURFACE3D_DEPTH: 16384
  MAXIMUM_SURFACE3D_HEIGHT: 16384
  MAXIMUM_SURFACE3D_WIDTH: 16384
  MAXIMUM_SURFACECUBEMAP_LAYERED_LAYERS: 2046
  MAXIMUM_SURFACECUBEMAP_LAYERED_WIDTH: 32768
  MAXIMUM_

In [3]:
import pycuda.autoinit
import pycuda.driver as cuda

(free,total)=cuda.mem_get_info()
print("Global memory occupancy:%f%% free"%(free*100/total))

for devicenum in range(cuda.Device.count()):
    device=cuda.Device(devicenum)
    attrs=device.get_attributes()

    #Beyond this point is just pretty printing
    print("\n===Attributes for device %d"%devicenum)
    for (key,value) in attrs.iteritems():
        print("%s:%s"%(str(key),str(value)))

Global memory occupancy:98.000000% free

===Attributes for device 0
MAX_THREADS_PER_BLOCK:1024
MAX_BLOCK_DIM_X:1024
MAX_BLOCK_DIM_Y:1024
MAX_BLOCK_DIM_Z:64
MAX_GRID_DIM_X:2147483647
MAX_GRID_DIM_Y:65535
MAX_GRID_DIM_Z:65535
MAX_SHARED_MEMORY_PER_BLOCK:49152
TOTAL_CONSTANT_MEMORY:65536
WARP_SIZE:32
MAX_PITCH:2147483647
MAX_REGISTERS_PER_BLOCK:65536
CLOCK_RATE:1733500
TEXTURE_ALIGNMENT:512
GPU_OVERLAP:1
MULTIPROCESSOR_COUNT:20
KERNEL_EXEC_TIMEOUT:0
INTEGRATED:0
CAN_MAP_HOST_MEMORY:1
COMPUTE_MODE:DEFAULT
MAXIMUM_TEXTURE1D_WIDTH:131072
MAXIMUM_TEXTURE2D_WIDTH:131072
MAXIMUM_TEXTURE2D_HEIGHT:65536
MAXIMUM_TEXTURE3D_WIDTH:16384
MAXIMUM_TEXTURE3D_HEIGHT:16384
MAXIMUM_TEXTURE3D_DEPTH:16384
MAXIMUM_TEXTURE2D_ARRAY_WIDTH:32768
MAXIMUM_TEXTURE2D_ARRAY_HEIGHT:32768
MAXIMUM_TEXTURE2D_ARRAY_NUMSLICES:2048
SURFACE_ALIGNMENT:512
CONCURRENT_KERNELS:1
ECC_ENABLED:0
PCI_BUS_ID:1
PCI_DEVICE_ID:0
TCC_DRIVER:0
MEMORY_CLOCK_RATE:5005000
GLOBAL_MEMORY_BUS_WIDTH:256
L2_CACHE_SIZE:2097152
MAX_THREADS_PER_MULTIP