In [1]:
print '123'
print 'hello world'

%load_ext autoreload
%autoreload 2

123
hello world


In [12]:


"""
Multiplies two square matrices together using a *single* block of threads and
global memory only. Each thread computes one element of the resulting matrix.
"""

import numpy as np
from pycuda import driver, compiler, gpuarray, tools

# -- initialize the device
import pycuda.autoinit

kernel_code_template = """
#include "math.h"
#include <float.h>

__device__ void normalize_cc(float *a, float *b, float *c)
{
    float len;
    float ta, tb, tc;
    
    ta = *a;
    tb = *b;
    tc = *c;
    len = ta*ta + tb*tb + tc*tc;
    
    if(len == 0.0)
        return;
    len = sqrt(len);
    *a = ta/len;
    *b = tb/len;
    *c = tc/len;
}

__global__ void MatrixMulKernel(float *a, float *b, float *c)
{
    // 2D Thread ID (assuming that only *one* block will be executed)
    int tx = threadIdx.x;
    int ty = threadIdx.y;

    // Pvalue is used to store the element of the matrix
    // that is computed by the thread
    float Pvalue = 0;

    // Each thread loads one row of M and one column of N,
    //   to produce one element of P.
    for (int k = 0; k < %(MATRIX_SIZE)s; ++k) {
        float Aelement = a[ty * %(MATRIX_SIZE)s + k];
        float Belement = b[k * %(MATRIX_SIZE)s + tx];
        Pvalue += Aelement * Belement;
    }
    
    float aa = 12.3;
    float bb = 10.0;
    
    normalize_cc(&Pvalue, &aa, &bb);

    // Write the matrix to device memory;
    // each thread writes one element
    c[ty * %(MATRIX_SIZE)s + tx] = Pvalue;
}


"""

# define the (square) matrix size
#  note that we'll only use *one* block of threads here
#  as a consequence this number (squared) can't exceed max_threads,
#  see http://documen.tician.de/pycuda/util.html#pycuda.tools.DeviceData
#  for more information on how to get this number for your device
MATRIX_SIZE = 4

# create two random square matrices
a_cpu = np.random.randn(MATRIX_SIZE, MATRIX_SIZE).astype(np.float32)
b_cpu = np.random.randn(MATRIX_SIZE, MATRIX_SIZE).astype(np.float32)

# compute reference on the CPU to verify GPU computation
c_cpu = np.dot(a_cpu, b_cpu)

# transfer host (CPU) memory to device (GPU) memory
a_gpu = gpuarray.to_gpu(a_cpu)
b_gpu = gpuarray.to_gpu(b_cpu)

# create empty gpu array for the result (C = A * B)
c_gpu = gpuarray.empty((MATRIX_SIZE, MATRIX_SIZE), np.float32)

# get the kernel code from the template
# by specifying the constant MATRIX_SIZE
kernel_code = kernel_code_template % {
    'MATRIX_SIZE': MATRIX_SIZE
    }

# compile the kernel code
mod = compiler.SourceModule(kernel_code)

# get the kernel function from the compiled module
matrixmul = mod.get_function("MatrixMulKernel")

# call the kernel on the card
matrixmul(
    # inputs
    a_gpu, b_gpu,
    # output
    c_gpu,
    # (only one) block of MATRIX_SIZE x MATRIX_SIZE threads
    block = (MATRIX_SIZE, MATRIX_SIZE, 1),
    )

# print the results
print "-" * 80
print "Matrix A (GPU):"
print a_gpu.get()

print "-" * 80
print "Matrix B (GPU):"
print b_gpu.get()

print "-" * 80
print "Matrix C (GPU):"
print c_gpu.get()

print "-" * 80
print "CPU-GPU difference:"
print c_cpu - c_gpu.get()

print np.allclose(c_cpu, c_gpu.get())


--------------------------------------------------------------------------------
Matrix A (GPU):
[[ 0.5741099   0.61968637  0.66915023 -0.4434276 ]
 [-2.50126    -0.21544841  1.5955222  -2.2915165 ]
 [ 1.263323    1.9567116   0.5307086   0.679667  ]
 [-0.24789377  0.8190734  -1.2508807  -0.40601698]]
--------------------------------------------------------------------------------
Matrix B (GPU):
[[ 1.2625614   0.8190915  -0.06611554 -1.0133846 ]
 [-1.1254909  -0.0239238   1.041059    0.24825367]
 [-0.43982163  0.05808067 -1.8432946  -0.46903804]
 [-0.99039817 -0.20767169  0.02409087  0.3452121 ]]
--------------------------------------------------------------------------------
Matrix C (GPU):
[[ 0.01086609  0.03696504 -0.0401484  -0.0563625 ]
 [-0.08471391 -0.09265063 -0.18924509  0.05930904]
 [-0.09506237  0.0552796   0.06243412 -0.05095302]
 [-0.01782177 -0.01330787  0.19579656  0.05675238]]
--------------------------------------------------------------------------------
CPU-GPU diffe

In [3]:
# raytracer.py - basic Python raytracer
# Micha Hanselmann, 2017
# ---
# based on http://www.scratchapixel.com/lessons/3d-basic-rendering/introduction-to-ray-tracing

from camera import Camera
from material import Material
from sphere import Sphere
from renderer import Renderer
from renderobject import RenderObject
from vector3 import Vector3

# render settings
width = 640
height = 480
super_sampling = 1

# create demo scene
light_mat = Material(emission_color=Vector3(4, 4, 4))
light_sphere = Sphere(Vector3(5, 50, 20), 5)
light_obj = RenderObject(light_sphere, light_mat)

light2_mat = Material(emission_color=Vector3(2, 2, 2))
light2_sphere = Sphere(Vector3(0, 10, -5), 5)
light2_obj = RenderObject(light2_sphere, light2_mat)

blue_light_mat = Material(emission_color=Vector3(0, 0, 1))
blue_light_sphere = Sphere(Vector3(-40, 5, 10), 3)
blue_light_obj = RenderObject(blue_light_sphere, blue_light_mat)

ground_mat = Material(surface_color=Vector3(0.2, 0.2, 0.2))
ground_sphere = Sphere(Vector3(0, -10010, 20), 10000)
ground_obj = RenderObject(ground_sphere, ground_mat)

mat1 = Material(surface_color=Vector3(0.8, 0.2, 0.2))
sph1 = Sphere(Vector3(10, 1, 50), 5)
obj1 = RenderObject(sph1, mat1)

mat2 = Material(surface_color=Vector3(1, 1, 1), transparency=1, ior=1.1)
sph2 = Sphere(Vector3(1, -1, 10), 1)
obj2 = RenderObject(sph2, mat2)

mat3 = Material(surface_color=Vector3(0.8, 0.8, 1), reflectivity=0.5, transparency=0.8)
sph3 = Sphere(Vector3(-4, -1, 20), 2)
obj3 = RenderObject(sph3, mat3)

mat4 = Material(surface_color=Vector3(1, 1, 1), reflectivity=1.0)
sph4 = Sphere(Vector3(0, 0, 60), 8)
obj4 = RenderObject(sph4, mat4)

scene = [ground_obj, light_obj, light2_obj, blue_light_obj, obj1, obj2, obj3, obj4]

In [4]:
print scene

[<renderobject.RenderObject instance at 0x00000000073E9F48>, <renderobject.RenderObject instance at 0x00000000073E9B48>, <renderobject.RenderObject instance at 0x00000000073E9CC8>, <renderobject.RenderObject instance at 0x00000000073E9E08>, <renderobject.RenderObject instance at 0x00000000073F00C8>, <renderobject.RenderObject instance at 0x00000000073F0208>, <renderobject.RenderObject instance at 0x00000000073F0388>, <renderobject.RenderObject instance at 0x00000000073F04C8>]


In [5]:
camera = Camera(Vector3(), 30)

In [None]:
renderer = Renderer(tilesize=64)


In [5]:
import numpy as np

aa = np.array([True, False, False], np.float32)
print aa[0]

1.0


In [10]:
bb = np.array([[1,2,4],[1,2,3]], np.float32)
bb[0,1]

2.0

In [15]:
from tracer_gpu import Tracer_gpu

tracer = Tracer_gpu()

rendered_tile = {}
ray_array = []
ray_from_array = []
for y in range(0, 5):
    for x in range(0, 5):
        sum_color = Vector3()
        # sampled_rays = 0
        ray = camera.calcRay(x, y, width, height)
        ray_array.append([ray.origin, ray.direction, ray.current_ior])
        ray_from_array.append([x, y])
        # for ss_x in range(-super_sampling + 1, super_sampling):
        #     for ss_y in range(-super_sampling + 1, super_sampling):
        #         ray = camera.calcRay(x + ss_x, y + ss_y, width, height)
        #         sum_color += tracer.trace(ray, scene)
        #         sampled_rays += 1
        # rendered_tile[x, y] = sum_color * (1 / sampled_rays)

print tracer.trace
rendered_tile = tracer.trace(ray_array, ray_from_array, scene)

CompileError: nvcc compilation of c:\users\xsy88\appdata\local\temp\tmpgjmmpq\kernel.cu failed
[command: nvcc --cubin -arch sm_61 -m64 -Id:\anaconda\envs\hetero\lib\site-packages\pycuda\cuda kernel.cu]
[stdout:
kernel.cu
]
[stderr:
c:\program files\nvidia gpu computing toolkit\cuda\v9.2\include\crt\math_functions.h: warning C4819: ���ļ����������ڵ�ǰ����ҳ(936)�б�ʾ���ַ����뽫���ļ�����Ϊ Unicode ��ʽ�Է�ֹ���ݶ�ʧ
c:\program files\nvidia gpu computing toolkit\cuda\v9.2\include\crt\math_functions.h(830): warning C4819: ���ļ����������ڵ�ǰ����ҳ(936)�б�ʾ���ַ����뽫���ļ�����Ϊ Unicode ��ʽ�Է�ֹ���ݶ�ʧ
c:\program files\nvidia gpu computing toolkit\cuda\v9.2\include\crt\math_functions.h(1768): warning C4819: ���ļ����������ڵ�ǰ����ҳ(936)�б�ʾ���ַ����뽫���ļ�����Ϊ Unicode ��ʽ�Է�ֹ���ݶ�ʧ
c:\program files\nvidia gpu computing toolkit\cuda\v9.2\include\crt\math_functions.h(2623): warning C4819: ���ļ����������ڵ�ǰ����ҳ(936)�б�ʾ���ַ����뽫���ļ�����Ϊ Unicode ��ʽ�Է�ֹ���ݶ�ʧ
c:\program files\nvidia gpu computing toolkit\cuda\v9.2\include\crt\math_functions.h(3461): warning C4819: ���ļ����������ڵ�ǰ����ҳ(936)�б�ʾ���ַ����뽫���ļ�����Ϊ Unicode ��ʽ�Է�ֹ���ݶ�ʧ
c:\program files\nvidia gpu computing toolkit\cuda\v9.2\include\crt\math_functions.h(4410): warning C4819: ���ļ����������ڵ�ǰ����ҳ(936)�б�ʾ���ַ����뽫���ļ�����Ϊ Unicode ��ʽ�Է�ֹ���ݶ�ʧ
c:\program files\nvidia gpu computing toolkit\cuda\v9.2\include\crt\math_functions.h(5308): warning C4819: ���ļ����������ڵ�ǰ����ҳ(936)�б�ʾ���ַ����뽫���ļ�����Ϊ Unicode ��ʽ�Է�ֹ���ݶ�ʧ
c:\program files\nvidia gpu computing toolkit\cuda\v9.2\include\crt\math_functions.h(6216): warning C4819: ���ļ����������ڵ�ǰ����ҳ(936)�б�ʾ���ַ����뽫���ļ�����Ϊ Unicode ��ʽ�Է�ֹ���ݶ�ʧ
c:\program files\nvidia gpu computing toolkit\cuda\v9.2\include\crt\math_functions.h(7096): warning C4819: ���ļ����������ڵ�ǰ����ҳ(936)�б�ʾ���ַ����뽫���ļ�����Ϊ Unicode ��ʽ�Է�ֹ���ݶ�ʧ
c:\program files\nvidia gpu computing toolkit\cuda\v9.2\include\crt\math_functions.h(7900): warning C4819: ���ļ����������ڵ�ǰ����ҳ(936)�б�ʾ���ַ����뽫���ļ�����Ϊ Unicode ��ʽ�Է�ֹ���ݶ�ʧ
c:\program files\nvidia gpu computing toolkit\cuda\v9.2\include\crt/device_functions.h: warning C4819: ���ļ����������ڵ�ǰ����ҳ(936)�б�ʾ���ַ����뽫���ļ�����Ϊ Unicode ��ʽ�Է�ֹ���ݶ�ʧ
c:\program files\nvidia gpu computing toolkit\cuda\v9.2\include\crt/device_functions.h(775): warning C4819: ���ļ����������ڵ�ǰ����ҳ(936)�б�ʾ���ַ����뽫���ļ�����Ϊ Unicode ��ʽ�Է�ֹ���ݶ�ʧ
c:\program files\nvidia gpu computing toolkit\cuda\v9.2\include\crt/device_functions.h(1635): warning C4819: ���ļ����������ڵ�ǰ����ҳ(936)�б�ʾ���ַ����뽫���ļ�����Ϊ Unicode ��ʽ�Է�ֹ���ݶ�ʧ
c:\program files\nvidia gpu computing toolkit\cuda\v9.2\include\crt\device_double_functions.h: warning C4819: ���ļ����������ڵ�ǰ����ҳ(936)�б�ʾ���ַ����뽫���ļ�����Ϊ Unicode ��ʽ�Է�ֹ���ݶ�ʧ
c:\program files\nvidia gpu computing toolkit\cuda\v9.2\include\sm_20_intrinsics.h: warning C4819: ���ļ����������ڵ�ǰ����ҳ(936)�б�ʾ���ַ����뽫���ļ�����Ϊ Unicode ��ʽ�Է�ֹ���ݶ�ʧ
c:\program files\nvidia gpu computing toolkit\cuda\v9.2\include\sm_20_intrinsics.h(924): warning C4819: ���ļ����������ڵ�ǰ����ҳ(936)�б�ʾ���ַ����뽫���ļ�����Ϊ Unicode ��ʽ�Է�ֹ���ݶ�ʧ
kernel.cu(56): error: expected a ";"

kernel.cu(77): warning: parsing restarts here after previous syntax error

kernel.cu(102): warning: variable "t" is used before its value is set

kernel.cu(105): warning: variable "hit_point1" is used before its value is set

kernel.cu(32): warning: variable "ray_array_index" was declared but never referenced

kernel.cu(33): warning: variable "output_index" was declared but never referenced

kernel.cu(37): warning: variable "current_obj_i" was set but never used

kernel.cu(39): warning: variable "l1" was declared but never referenced

kernel.cu(39): warning: variable "l2" was declared but never referenced

kernel.cu(39): warning: variable "l3" was declared but never referenced

kernel.cu(40): warning: variable "t_ca" was declared but never referenced

kernel.cu(41): warning: variable "d_squared" was declared but never referenced

kernel.cu(42): warning: variable "radius_squared" was declared but never referenced

kernel.cu(43): warning: variable "t_hc" was declared but never referenced

kernel.cu(114): error: this declaration has no storage class or type specifier

kernel.cu(114): error: identifier "current_obj_i" is undefined

kernel.cu(122): error: expected a declaration

4 errors detected in the compilation of "C:/Users/xsy88/AppData/Local/Temp/tmpxft_00012f64_00000000-10_kernel.cpp1.ii".
]