-
Notifications
You must be signed in to change notification settings - Fork 18
/
cuda_pathtracer.cu
793 lines (632 loc) · 31.8 KB
/
cuda_pathtracer.cu
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
/*
* CUDA based triangle mesh path tracer using BVH acceleration by Sam lapere, 2016
* BVH implementation based on real-time CUDA ray tracer by Thanassis Tsiodras,
* http://users.softlab.ntua.gr/~ttsiod/cudarenderer-BVH.html
* Interactive camera with depth of field based on CUDA path tracer code
* by Peter Kutz and Yining Karl Li, https://github.com/peterkutz/GPUPathTracer
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#include <iostream>
#include <fstream>
#include <string>
#include <vector>
#include <cuda.h>
#include <math_functions.h>
#include <vector_types.h>
#include <vector_functions.h>
#include "device_launch_parameters.h"
#include "cutil_math.h"
#include "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v6.5\extras\CUPTI\include\GL\glew.h"
#include "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v6.5\extras\CUPTI\include\GL\glut.h"
#include <cuda_runtime.h>
#include <cuda_gl_interop.h>
#include <curand.h>
#include <curand_kernel.h>
#include "cuda_pathtracer.h"
#define M_PI 3.1415926535897932384626422832795028841971f
#define TWO_PI 6.2831853071795864769252867665590057683943f
#define NUDGE_FACTOR 1e-3f // epsilon
#define samps 1 // samples
#define BVH_STACK_SIZE 32
#define SCREEN_DIST (height*2)
int texturewidth = 0;
int textureheight = 0;
int total_number_of_triangles;
__device__ int depth = 0;
// Textures for vertices, triangles and BVH data
// (see CudaRender() below, as well as main() to see the data setup process)
texture<uint1, 1, cudaReadModeElementType> g_triIdxListTexture;
texture<float2, 1, cudaReadModeElementType> g_pCFBVHlimitsTexture;
texture<uint4, 1, cudaReadModeElementType> g_pCFBVHindexesOrTrilistsTexture;
texture<float4, 1, cudaReadModeElementType> g_trianglesTexture;
Vertex* cudaVertices;
float* cudaTriangleIntersectionData;
int* cudaTriIdxList = NULL;
float* cudaBVHlimits = NULL;
int* cudaBVHindexesOrTrilists = NULL;
Triangle* cudaTriangles = NULL;
Camera* cudaRendercam = NULL;
struct Ray {
float3 orig; // ray origin
float3 dir; // ray direction
__device__ Ray(float3 o_, float3 d_) : orig(o_), dir(d_) {}
};
enum Refl_t { DIFF, METAL, SPEC, REFR, COAT }; // material types
struct Sphere {
float rad; // radius
float3 pos, emi, col; // position, emission, color
Refl_t refl; // reflection type (DIFFuse, SPECular, REFRactive)
__device__ float intersect(const Ray &r) const { // returns distance, 0 if nohit
// Ray/sphere intersection
// Quadratic formula required to solve ax^2 + bx + c = 0
// Solution x = (-b +- sqrt(b*b - 4ac)) / 2a
// Solve t^2*d.d + 2*t*(o-p).d + (o-p).(o-p)-R^2 = 0
float3 op = pos - r.orig; //
float t, epsilon = 0.01f;
float b = dot(op, r.dir);
float disc = b*b - dot(op, op) + rad*rad; // discriminant
if (disc<0) return 0; else disc = sqrtf(disc);
return (t = b - disc)>epsilon ? t : ((t = b + disc)>epsilon ? t : 0);
}
};
__device__ Sphere spheres[] = {
// sun
{ 1.6, { 0.0f, 2.8, 0 }, { 6, 4, 2 }, { 0.f, 0.f, 0.f }, DIFF }, // 37, 34, 30 X: links rechts Y: op neer
//{ 1600, { 3000.0f, 10, 6000 }, { 17, 14, 10 }, { 0.f, 0.f, 0.f }, DIFF },
// horizon sun2
// { 1560, { 3500.0f, 0, 7000 }, { 50, 25, 2.5 }, { 0.f, 0.f, 0.f }, DIFF }, // 150, 75, 7.5
// sky
//{ 10000, { 50.0f, 40.8f, -1060 }, { 0.1, 0.3, 0.55 }, { 0.175f, 0.175f, 0.25f }, DIFF }, // 0.0003, 0.01, 0.15, or brighter: 0.2, 0.3, 0.6
{ 10000, { 50.0f, 40.8f, -1060 }, { 0.51, 0.51, 0.51 }, { 0.175f, 0.175f, 0.25f }, DIFF },
// ground
{ 100000, { 0.0f, -100001.1, 0 }, { 0, 0, 0 }, { 0.5f, 0.0f, 0.0f }, COAT },
{ 100000, { 0.0f, -100001.2, 0 }, { 0, 0, 0 }, { 0.3f, 0.3f, 0.3f }, DIFF }, // double shell to prevent light leaking
// horizon brightener
{ 110000, { 50.0f, -110048.5, 0 }, { 3.6, 2.0, 0.2 }, { 0.f, 0.f, 0.f }, DIFF },
// mountains
//{ 4e4, { 50.0f, -4e4 - 30, -3000 }, { 0, 0, 0 }, { 0.2f, 0.2f, 0.2f }, DIFF },
// white Mirr
{ 1.1, { 1.6, 0, 1.0 }, { 0, 0.0, 0 }, { 0.9f, .9f, 0.9f }, SPEC }
// Glass
//{ 0.3, { 0.0f, -0.4, 4 }, { .0, 0., .0 }, { 0.9f, 0.9f, 0.9f }, DIFF },
// Glass2
//{ 22, { 87.0f, 22, 24 }, { 0, 0, 0 }, { 0.9f, 0.9f, 0.9f }, SPEC },
};
// Create OpenGL BGR value for assignment in OpenGL VBO buffer
__device__ int getColor(Vector3Df& p) // converts Vector3Df colour to int
{
return (((unsigned)p.z) << 16) | (((unsigned)p.y) << 8) | (((unsigned)p.x));
}
// Helper function, that checks whether a ray intersects a bounding box (BVH node)
__device__ bool RayIntersectsBox(const Vector3Df& originInWorldSpace, const Vector3Df& rayInWorldSpace, int boxIdx)
{
// set Tnear = - infinity, Tfar = infinity
//
// For each pair of planes P associated with X, Y, and Z do:
// (example using X planes)
// if direction Xd = 0 then the ray is parallel to the X planes, so
// if origin Xo is not between the slabs ( Xo < Xl or Xo > Xh) then
// return false
// else, if the ray is not parallel to the plane then
// begin
// compute the intersection distance of the planes
// T1 = (Xl - Xo) / Xd
// T2 = (Xh - Xo) / Xd
// If T1 > T2 swap (T1, T2) /* since T1 intersection with near plane */
// If T1 > Tnear set Tnear =T1 /* want largest Tnear */
// If T2 < Tfar set Tfar="T2" /* want smallest Tfar */
// If Tnear > Tfar box is missed so
// return false
// If Tfar < 0 box is behind ray
// return false
// end
// end of for loop
float Tnear, Tfar;
Tnear = -FLT_MAX;
Tfar = FLT_MAX;
float2 limits;
// box intersection routine
#define CHECK_NEAR_AND_FAR_INTERSECTION(c) \
if (rayInWorldSpace.##c == 0.f) { \
if (originInWorldSpace.##c < limits.x) return false; \
if (originInWorldSpace.##c > limits.y) return false; \
} else { \
float T1 = (limits.x - originInWorldSpace.##c)/rayInWorldSpace.##c; \
float T2 = (limits.y - originInWorldSpace.##c)/rayInWorldSpace.##c; \
if (T1>T2) { float tmp=T1; T1=T2; T2=tmp; } \
if (T1 > Tnear) Tnear = T1; \
if (T2 < Tfar) Tfar = T2; \
if (Tnear > Tfar) return false; \
if (Tfar < 0.f) return false; \
}
limits = tex1Dfetch(g_pCFBVHlimitsTexture, 3 * boxIdx); // box.bottom._x/top._x placed in limits.x/limits.y
//limits = make_float2(cudaBVHlimits[6 * boxIdx + 0], cudaBVHlimits[6 * boxIdx + 1]);
CHECK_NEAR_AND_FAR_INTERSECTION(x)
limits = tex1Dfetch(g_pCFBVHlimitsTexture, 3 * boxIdx + 1); // box.bottom._y/top._y placed in limits.x/limits.y
//limits = make_float2(cudaBVHlimits[6 * boxIdx + 2], cudaBVHlimits[6 * boxIdx + 3]);
CHECK_NEAR_AND_FAR_INTERSECTION(y)
limits = tex1Dfetch(g_pCFBVHlimitsTexture, 3 * boxIdx + 2); // box.bottom._z/top._z placed in limits.x/limits.y
//limits = make_float2(cudaBVHlimits[6 * boxIdx + 4], cudaBVHlimits[6 * boxIdx + 5]);
CHECK_NEAR_AND_FAR_INTERSECTION(z)
// If Box survived all above tests, return true with intersection point Tnear and exit point Tfar.
return true;
}
//////////////////////////////////////////
// BVH intersection routine //
// using CUDA texture memory //
//////////////////////////////////////////
// there are 3 forms of the BVH: a "pure" BVH, a cache-friendly BVH (taking up less memory space than the pure BVH)
// and a "textured" BVH which stores its data in CUDA texture memory (which is cached). The last one is gives the
// best performance and is used here.
__device__ bool BVH_IntersectTriangles(
int* cudaBVHindexesOrTrilists, const Vector3Df& origin, const Vector3Df& ray, unsigned avoidSelf,
int& pBestTriIdx, Vector3Df& pointHitInWorldSpace, float& kAB, float& kBC, float& kCA, float& hitdist,
float* cudaBVHlimits, float* cudaTriangleIntersectionData, int* cudaTriIdxList, Vector3Df& boxnormal)
{
// in the loop below, maintain the closest triangle and the point where we hit it:
pBestTriIdx = -1;
float bestTriDist;
// start from infinity
bestTriDist = FLT_MAX;
// create a stack for each ray
// the stack is just a fixed size array of indices to BVH nodes
int stack[BVH_STACK_SIZE];
int stackIdx = 0;
stack[stackIdx++] = 0;
Vector3Df hitpoint;
// while the stack is not empty
while (stackIdx) {
// pop a BVH node (or AABB, Axis Aligned Bounding Box) from the stack
int boxIdx = stack[stackIdx - 1];
//uint* pCurrent = &cudaBVHindexesOrTrilists[boxIdx];
// decrement the stackindex
stackIdx--;
// fetch the data (indices to childnodes or index in triangle list + trianglecount) associated with this node
uint4 data = tex1Dfetch(g_pCFBVHindexesOrTrilistsTexture, boxIdx);
// original, "pure" BVH form...
//if (!pCurrent->IsLeaf()) {
// cache-friendly BVH form...
//if (!(cudaBVHindexesOrTrilists[4 * boxIdx + 0] & 0x80000000)) { // INNER NODE
// texture memory BVH form...
// determine if BVH node is an inner node or a leaf node by checking the highest bit (bitwise AND operation)
// inner node if highest bit is 1, leaf node if 0
if (!(data.x & 0x80000000)) { // INNER NODE
// if ray intersects inner node, push indices of left and right child nodes on the stack
if (RayIntersectsBox(origin, ray, boxIdx)) {
//stack[stackIdx++] = pCurrent->u.inner._idxRight;
//stack[stackIdx++] = cudaBVHindexesOrTrilists[4 * boxIdx + 1];
stack[stackIdx++] = data.y; // right child node index
//stack[stackIdx++] = pCurrent->u.inner._idxLeft;
//stack[stackIdx++] = cudaBVHindexesOrTrilists[4 * boxIdx + 2];
stack[stackIdx++] = data.z; // left child node index
// return if stack size is exceeded
if (stackIdx>BVH_STACK_SIZE)
{
return false;
}
}
}
else { // LEAF NODE
// original, "pure" BVH form...
// BVHLeaf *p = dynamic_cast<BVHLeaf*>(pCurrent);
// for(std::list<const Triangle*>::iterator it=p->_triangles.begin();
// it != p->_triangles.end();
// it++)
// cache-friendly BVH form...
// for(unsigned i=pCurrent->u.leaf._startIndexInTriIndexList;
// i<pCurrent->u.leaf._startIndexInTriIndexList + (pCurrent->u.leaf._count & 0x7fffffff);
// texture memory BVH form...
// for (unsigned i = cudaBVHindexesOrTrilists[4 * boxIdx + 3]; i< cudaBVHindexesOrTrilists[4 * boxIdx + 3] + (cudaBVHindexesOrTrilists[4 * boxIdx + 0] & 0x7fffffff); i++) { // data.w = number of triangles in leaf
// loop over every triangle in the leaf node
// data.w is start index in triangle list
// data.x stores number of triangles in leafnode (the bitwise AND operation extracts the triangle number)
for (unsigned i = data.w; i < data.w + (data.x & 0x7fffffff); i++) {
// original, "pure" BVH form...
//const Triangle& triangle = *(*it);
// cache-friendly BVH form...
//const Triangle& triangle = pTriangles[cudaTriIdxList[i]];
// texture memory BVH form...
// fetch the index of the current triangle
int idx = tex1Dfetch(g_triIdxListTexture, i).x;
//int idx = cudaTriIdxList[i];
// check if triangle is the same as the one intersected by previous ray
// to avoid self-reflections/refractions
if (avoidSelf == idx)
continue;
// fetch triangle center and normal from texture memory
float4 center = tex1Dfetch(g_trianglesTexture, 5 * idx);
float4 normal = tex1Dfetch(g_trianglesTexture, 5 * idx + 1);
// use the pre-computed triangle intersection data: normal, d, e1/d1, e2/d2, e3/d3
float k = dot(normal, ray);
if (k == 0.0f)
continue; // this triangle is parallel to the ray, ignore it.
float s = (normal.w - dot(normal, origin)) / k;
if (s <= 0.0f) // this triangle is "behind" the origin.
continue;
if (s <= NUDGE_FACTOR) // epsilon
continue;
Vector3Df hit = ray * s;
hit += origin;
// ray triangle intersection
// Is the intersection of the ray with the triangle's plane INSIDE the triangle?
float4 ee1 = tex1Dfetch(g_trianglesTexture, 5 * idx + 2);
//float4 ee1 = make_float4(cudaTriangleIntersectionData[20 * idx + 8], cudaTriangleIntersectionData[20 * idx + 9], cudaTriangleIntersectionData[20 * idx + 10], cudaTriangleIntersectionData[20 * idx + 11]);
float kt1 = dot(ee1, hit) - ee1.w;
if (kt1<0.0f) continue;
float4 ee2 = tex1Dfetch(g_trianglesTexture, 5 * idx + 3);
//float4 ee2 = make_float4(cudaTriangleIntersectionData[20 * idx + 12], cudaTriangleIntersectionData[20 * idx + 13], cudaTriangleIntersectionData[20 * idx + 14], cudaTriangleIntersectionData[20 * idx + 15]);
float kt2 = dot(ee2, hit) - ee2.w;
if (kt2<0.0f) continue;
float4 ee3 = tex1Dfetch(g_trianglesTexture, 5 * idx + 4);
//float4 ee3 = make_float4(cudaTriangleIntersectionData[20 * idx + 16], cudaTriangleIntersectionData[20 * idx + 17], cudaTriangleIntersectionData[20 * idx + 18], cudaTriangleIntersectionData[20 * idx + 19]);
float kt3 = dot(ee3, hit) - ee3.w;
if (kt3<0.0f) continue;
// ray intersects triangle, "hit" is the world space coordinate of the intersection.
{
// is this intersection closer than all the others?
float hitZ = distancesq(origin, hit);
if (hitZ < bestTriDist) {
// maintain the closest hit
bestTriDist = hitZ;
hitdist = sqrtf(bestTriDist);
pBestTriIdx = idx;
pointHitInWorldSpace = hit;
// store barycentric coordinates (for texturing, not used for now)
kAB = kt1;
kBC = kt2;
kCA = kt3;
}
}
}
}
}
return pBestTriIdx != -1;
}
//////////////////////
// PATH TRACING
//////////////////////
__device__ Vector3Df path_trace(curandState *randstate, Vector3Df originInWorldSpace, Vector3Df rayInWorldSpace, int avoidSelf,
Triangle *pTriangles, int* cudaBVHindexesOrTrilists, float* cudaBVHlimits, float* cudaTriangleIntersectionData, int* cudaTriIdxList)
{
// colour mask
Vector3Df mask = Vector3Df(1.0f, 1.0f, 1.0f);
// accumulated colour
Vector3Df accucolor = Vector3Df(0.0f, 0.0f, 0.0f);
for (int bounces = 0; bounces < 5; bounces++){ // iteration up to 4 bounces (instead of recursion in CPU code)
int sphere_id = -1;
int triangle_id = -1;
int pBestTriIdx = -1;
int geomtype = -1;
const Triangle *pBestTri = NULL;
Vector3Df pointHitInWorldSpace;
float kAB = 0.f, kBC = 0.f, kCA = 0.f; // distances from the 3 edges of the triangle (from where we hit it), to be used for texturing
float tmin = 1e20;
float tmax = -1e20;
float d = 1e20;
float scene_t = 1e20;
float inf = 1e20;
float hitdistance = 1e20;
Vector3Df f = Vector3Df(0, 0, 0);
Vector3Df emit = Vector3Df(0, 0, 0);
Vector3Df x; // intersection point
Vector3Df n; // normal
Vector3Df nl; // oriented normal
Vector3Df boxnormal = Vector3Df(0, 0, 0);
Vector3Df dw; // ray direction of next path segment
Refl_t refltype;
float3 rayorig = make_float3(originInWorldSpace.x, originInWorldSpace.y, originInWorldSpace.z);
float3 raydir = make_float3(rayInWorldSpace.x, rayInWorldSpace.y, rayInWorldSpace.z);
// intersect all triangles in the scene stored in BVH
BVH_IntersectTriangles(
cudaBVHindexesOrTrilists, originInWorldSpace, rayInWorldSpace, avoidSelf,
pBestTriIdx, pointHitInWorldSpace, kAB, kBC, kCA, hitdistance, cudaBVHlimits,
cudaTriangleIntersectionData, cudaTriIdxList, boxnormal);
// intersect all spheres in the scene
float numspheres = sizeof(spheres) / sizeof(Sphere);
for (int i = int(numspheres); i--;){ // for all spheres in scene
// keep track of distance from origin to closest intersection point
if ((d = spheres[i].intersect(Ray(rayorig, raydir))) && d < scene_t){ scene_t = d; sphere_id = i; geomtype = 1; }
}
// set avoidSelf to current triangle index to avoid intersection between this triangle and the next ray,
// so that we don't get self-shadow or self-reflection from this triangle...
avoidSelf = pBestTriIdx;
if (hitdistance < scene_t && hitdistance > 0.002) // EPSILON
{
scene_t = hitdistance;
triangle_id = pBestTriIdx;
geomtype = 2;
}
if (scene_t > 1e20) return Vector3Df(0.0f, 0.0f, 0.0f);
// SPHERES:
if (geomtype == 1){
Sphere &sphere = spheres[sphere_id]; // hit object with closest intersection
x = originInWorldSpace + rayInWorldSpace * scene_t; // intersection point on object
n = Vector3Df(x.x - sphere.pos.x, x.y - sphere.pos.y, x.z - sphere.pos.z); // normal
n.normalize();
nl = dot(n, rayInWorldSpace) < 0 ? n : n * -1; // correctly oriented normal
f = Vector3Df(sphere.col.x, sphere.col.y, sphere.col.z); // object colour
refltype = sphere.refl;
emit = Vector3Df(sphere.emi.x, sphere.emi.y, sphere.emi.z); // object emission
accucolor += (mask * emit);
}
// TRIANGLES:5
if (geomtype == 2){
pBestTri = &pTriangles[triangle_id];
x = pointHitInWorldSpace; // intersection point
n = pBestTri->_normal; // normal
//n = Vector3Df(0,0,1);
n.normalize();
nl = dot(n, rayInWorldSpace) < 0 ? n : n * -1; // correctly oriented normal
Vector3Df colour = Vector3Df(0.9f, 0.3f, 0.0f); // hardcoded triangle colour
//Vector3Df colour = pBestTri->_colorf;
refltype = COAT;
f = colour;
emit = Vector3Df(0, 0, 0); // object emission
accucolor += (mask * emit);
}
// basic material system, all parameters are hard-coded (such as phong exponent, index of refraction)
// diffuse material, based on smallpt by Kevin Beason
if (refltype == DIFF){
// pick two random numbers
float phi = 2 * M_PI * curand_uniform(randstate);
float r2 = curand_uniform(randstate);
float r2s = sqrtf(r2);
// compute orthonormal coordinate frame uvw with hitpoint as origin
Vector3Df w = nl; w.normalize();
Vector3Df u = cross((fabs(w.x) > .1 ? Vector3Df(0, 1, 0) : Vector3Df(1, 0, 0)), w); u.normalize();
Vector3Df v = cross(w, u);
// compute cosine weighted random ray direction on hemisphere
dw = u*cosf(phi)*r2s + v*sinf(phi)*r2s + w*sqrtf(1 - r2);
dw.normalize();
// offset origin next path segment to prevent self intersection
pointHitInWorldSpace = x + w * 0.01; // scene size dependent
// multiply mask with colour of object
mask *= f;
}
// Phong metal material from "Realistic Ray Tracing", P. Shirley
if (refltype == METAL){
// compute random perturbation of ideal reflection vector
// the higher the phong exponent, the closer the perturbed vector is to the ideal reflection direction
float phi = 2 * M_PI * curand_uniform(randstate);
float r2 = curand_uniform(randstate);
float phongexponent = 20;
float cosTheta = powf(1 - r2, 1.0f / (phongexponent + 1));
float sinTheta = sqrtf(1 - cosTheta * cosTheta);
// create orthonormal basis uvw around reflection vector with hitpoint as origin
// w is ray direction for ideal reflection
Vector3Df w = rayInWorldSpace - n * 2.0f * dot(n, rayInWorldSpace); w.normalize();
Vector3Df u = cross((fabs(w.x) > .1 ? Vector3Df(0, 1, 0) : Vector3Df(1, 0, 0)), w); u.normalize();
Vector3Df v = cross(w, u); // v is normalised by default
// compute cosine weighted random ray direction on hemisphere
dw = u * cosf(phi) * sinTheta + v * sinf(phi) * sinTheta + w * cosTheta;
dw.normalize();
// offset origin next path segment to prevent self intersection
pointHitInWorldSpace = x + w * 0.01; // scene size dependent
// multiply mask with colour of object
mask *= f;
}
// specular material (perfect mirror)
if (refltype == SPEC){
// compute reflected ray direction according to Snell's law
dw = rayInWorldSpace - n * 2.0f * dot(n, rayInWorldSpace);
// offset origin next path segment to prevent self intersection
pointHitInWorldSpace = x + nl * 0.01; // scene size dependent
// multiply mask with colour of object
mask *= f;
}
// COAT material based on https://github.com/peterkutz/GPUPathTracer
// randomly select diffuse or specular reflection
// looks okay-ish but inaccurate (no Fresnel calculation yet)
if (refltype == COAT){
float rouletteRandomFloat = curand_uniform(randstate);
float threshold = 0.05f;
Vector3Df specularColor = Vector3Df(1,1,1); // hard-coded
bool reflectFromSurface = (rouletteRandomFloat < threshold); //computeFresnel(make_Vector3Df(n.x, n.y, n.z), incident, incidentIOR, transmittedIOR, reflectionDirection, transmissionDirection).reflectionCoefficient);
if (reflectFromSurface) { // calculate perfectly specular reflection
// Ray reflected from the surface. Trace a ray in the reflection direction.
// TODO: Use Russian roulette instead of simple multipliers! (Selecting between diffuse sample and no sample (absorption) in this case.)
mask *= specularColor;
dw = rayInWorldSpace - n * 2.0f * dot(n, rayInWorldSpace);
// offset origin next path segment to prevent self intersection
pointHitInWorldSpace = x + nl * 0.01; // scene size dependent
}
else { // calculate perfectly diffuse reflection
float r1 = 2 * M_PI * curand_uniform(randstate);
float r2 = curand_uniform(randstate);
float r2s = sqrtf(r2);
// compute orthonormal coordinate frame uvw with hitpoint as origin
Vector3Df w = nl; w.normalize();
Vector3Df u = cross((fabs(w.x) > .1 ? Vector3Df(0, 1, 0) : Vector3Df(1, 0, 0)), w); u.normalize();
Vector3Df v = cross(w, u);
// compute cosine weighted random ray direction on hemisphere
dw = u*cosf(r1)*r2s + v*sinf(r1)*r2s + w*sqrtf(1 - r2);
dw.normalize();
// offset origin next path segment to prevent self intersection
pointHitInWorldSpace = x + nl * 0.01; // // scene size dependent
// multiply mask with colour of object
mask *= f;
//mask *= make_Vector3Df(0.15f, 0.15f, 0.15f); // gold metal
}
} // end COAT
// perfectly refractive material (glass, water)
if (refltype == REFR){
bool into = dot(n, nl) > 0; // is ray entering or leaving refractive material?
float nc = 1.0f; // Index of Refraction air
float nt = 1.5f; // Index of Refraction glass/water
float nnt = into ? nc / nt : nt / nc; // IOR ratio of refractive materials
float ddn = dot(rayInWorldSpace, nl);
float cos2t = 1.0f - nnt*nnt * (1.f - ddn*ddn);
if (cos2t < 0.0f) // total internal reflection
{
dw = rayInWorldSpace;
dw -= n * 2.0f * dot(n, rayInWorldSpace);
// offset origin next path segment to prevent self intersection
pointHitInWorldSpace = x + nl * 0.01; // scene size dependent
}
else // cos2t > 0
{
// compute direction of transmission ray
Vector3Df tdir = rayInWorldSpace * nnt;
tdir -= n * ((into ? 1 : -1) * (ddn*nnt + sqrtf(cos2t)));
tdir.normalize();
float R0 = (nt - nc)*(nt - nc) / (nt + nc)*(nt + nc);
float c = 1.f - (into ? -ddn : dot(tdir, n));
float Re = R0 + (1.f - R0) * c * c * c * c * c;
float Tr = 1 - Re; // Transmission
float P = .25f + .5f * Re;
float RP = Re / P;
float TP = Tr / (1.f - P);
// randomly choose reflection or transmission ray
if (curand_uniform(randstate) < 0.25) // reflection ray
{
mask *= RP;
dw = rayInWorldSpace;
dw -= n * 2.0f * dot(n, rayInWorldSpace);
pointHitInWorldSpace = x + nl * 0.01; // scene size dependent
}
else // transmission ray
{
mask *= TP;
dw = tdir; //r = Ray(x, tdir);
pointHitInWorldSpace = x + nl * 0.001f; // epsilon must be small to avoid artefacts
}
}
}
// set up origin and direction of next path segment
originInWorldSpace = pointHitInWorldSpace;
rayInWorldSpace = dw;
}
return Vector3Df(accucolor.x, accucolor.y, accucolor.z);
}
union Colour // 4 bytes = 4 chars = 1 float
{
float c;
uchar4 components;
};
// the core path tracing kernel,
// running in parallel for all pixels
__global__ void CoreLoopPathTracingKernel(Vector3Df* output, Vector3Df* accumbuffer, Triangle* pTriangles, Camera* cudaRendercam,
int* cudaBVHindexesOrTrilists, float* cudaBVHlimits, float* cudaTriangleIntersectionData,
int* cudaTriIdxList, unsigned int framenumber, unsigned int hashedframenumber)
{
// assign a CUDA thread to every pixel by using the threadIndex
unsigned int x = blockIdx.x*blockDim.x + threadIdx.x;
unsigned int y = blockIdx.y*blockDim.y + threadIdx.y;
// global threadId, see richiesams blogspot
int threadId = (blockIdx.x + blockIdx.y * gridDim.x) * (blockDim.x * blockDim.y) + (threadIdx.y * blockDim.x) + threadIdx.x;
// create random number generator and initialise with hashed frame number, see RichieSams blogspot
curandState randState; // state of the random number generator, to prevent repetition
curand_init(hashedframenumber + threadId, 0, 0, &randState);
Vector3Df finalcol; // final pixel colour
Vector3Df rendercampos = Vector3Df(cudaRendercam->position.x, cudaRendercam->position.y, cudaRendercam->position.z);
int i = (height - y - 1)*width + x; // pixel index in buffer
int pixelx = x; // pixel x-coordinate on screen
int pixely = height - y - 1; // pixel y-coordintate on screen
finalcol = Vector3Df(0.0f, 0.0f, 0.0f); // reset colour to zero for every pixel
for (int s = 0; s < samps; s++){
// compute primary ray direction
// use camera view of current frame (transformed on CPU side) to create local orthonormal basis
Vector3Df rendercamview = Vector3Df(cudaRendercam->view.x, cudaRendercam->view.y, cudaRendercam->view.z); rendercamview.normalize(); // view is already supposed to be normalized, but normalize it explicitly just in case.
Vector3Df rendercamup = Vector3Df(cudaRendercam->up.x, cudaRendercam->up.y, cudaRendercam->up.z); rendercamup.normalize();
Vector3Df horizontalAxis = cross(rendercamview, rendercamup); horizontalAxis.normalize(); // Important to normalize!
Vector3Df verticalAxis = cross(horizontalAxis, rendercamview); verticalAxis.normalize(); // verticalAxis is normalized by default, but normalize it explicitly just for good measure.
Vector3Df middle = rendercampos + rendercamview;
Vector3Df horizontal = horizontalAxis * tanf(cudaRendercam->fov.x * 0.5 * (M_PI / 180)); // Now treating FOV as the full FOV, not half, so I multiplied it by 0.5. I also normzlized A and B, so there's no need to divide by the length of A or B anymore. Also normalized view and removed lengthOfView. Also removed the cast to float.
Vector3Df vertical = verticalAxis * tanf(-cudaRendercam->fov.y * 0.5 * (M_PI / 180)); // Now treating FOV as the full FOV, not half, so I multiplied it by 0.5. I also normzlized A and B, so there's no need to divide by the length of A or B anymore. Also normalized view and removed lengthOfView. Also removed the cast to float.
// anti-aliasing
// calculate center of current pixel and add random number in X and Y dimension
// based on https://github.com/peterkutz/GPUPathTracer
float jitterValueX = curand_uniform(&randState) - 0.5;
float jitterValueY = curand_uniform(&randState) - 0.5;
float sx = (jitterValueX + pixelx) / (cudaRendercam->resolution.x - 1);
float sy = (jitterValueY + pixely) / (cudaRendercam->resolution.y - 1);
// compute pixel on screen
Vector3Df pointOnPlaneOneUnitAwayFromEye = middle + ( horizontal * ((2 * sx) - 1)) + ( vertical * ((2 * sy) - 1));
Vector3Df pointOnImagePlane = rendercampos + ((pointOnPlaneOneUnitAwayFromEye - rendercampos) * cudaRendercam->focalDistance); // Important for depth of field!
// calculation of depth of field / camera aperture
// based on https://github.com/peterkutz/GPUPathTracer
Vector3Df aperturePoint;
if (cudaRendercam->apertureRadius > 0.00001) { // the small number is an epsilon value.
// generate random numbers for sampling a point on the aperture
float random1 = curand_uniform(&randState);
float random2 = curand_uniform(&randState);
// randomly pick a point on the circular aperture
float angle = TWO_PI * random1;
float distance = cudaRendercam->apertureRadius * sqrtf(random2);
float apertureX = cos(angle) * distance;
float apertureY = sin(angle) * distance;
aperturePoint = rendercampos + (horizontalAxis * apertureX) + (verticalAxis * apertureY);
}
else { // zero aperture
aperturePoint = rendercampos;
}
// calculate ray direction of next ray in path
Vector3Df apertureToImagePlane = pointOnImagePlane - aperturePoint;
apertureToImagePlane.normalize(); // ray direction, needs to be normalised
Vector3Df rayInWorldSpace = apertureToImagePlane;
// in theory, this should not be required
rayInWorldSpace.normalize();
// origin of next ray in path
Vector3Df originInWorldSpace = aperturePoint;
finalcol += path_trace(&randState, originInWorldSpace, rayInWorldSpace, -1, pTriangles,
cudaBVHindexesOrTrilists, cudaBVHlimits, cudaTriangleIntersectionData, cudaTriIdxList) * (1.0f/samps);
}
// add pixel colour to accumulation buffer (accumulates all samples)
accumbuffer[i] += finalcol;
// averaged colour: divide colour by the number of calculated frames so far
Vector3Df tempcol = accumbuffer[i] / framenumber;
Colour fcolour;
Vector3Df colour = Vector3Df(clamp(tempcol.x, 0.0f, 1.0f), clamp(tempcol.y, 0.0f, 1.0f), clamp(tempcol.z, 0.0f, 1.0f));
// convert from 96-bit to 24-bit colour + perform gamma correction
fcolour.components = make_uchar4((unsigned char)(powf(colour.x, 1 / 2.2f) * 255), (unsigned char)(powf(colour.y, 1 / 2.2f) * 255), (unsigned char)(powf(colour.z, 1 / 2.2f) * 255), 1);
// store pixel coordinates and pixelcolour in OpenGL readable outputbuffer
output[i] = Vector3Df(x, y, fcolour.c);
}
bool g_bFirstTime = true;
// the gateway to CUDA, called from C++ (in void disp() in main.cpp)
void cudarender(Vector3Df* dptr, Vector3Df* accumulatebuffer, Triangle* cudaTriangles, int* cudaBVHindexesOrTrilists,
float* cudaBVHlimits, float* cudaTriangleIntersectionData, int* cudaTriIdxList,
unsigned framenumber, unsigned hashedframes, Camera* cudaRendercam){
if (g_bFirstTime) {
// if this is the first time cudarender() is called,
// bind the scene data to CUDA textures!
g_bFirstTime = false;
printf("g_triIndexListNo: %d\n", g_triIndexListNo);
printf("g_pCFBVH_No: %d\n", g_pCFBVH_No);
printf("g_verticesNo: %d\n", g_verticesNo);
printf("g_trianglesNo: %d\n", g_trianglesNo);
cudaChannelFormatDesc channel1desc = cudaCreateChannelDesc<uint1>();
cudaBindTexture(NULL, &g_triIdxListTexture, cudaTriIdxList, &channel1desc, g_triIndexListNo * sizeof(uint1));
cudaChannelFormatDesc channel2desc = cudaCreateChannelDesc<float2>();
cudaBindTexture(NULL, &g_pCFBVHlimitsTexture, cudaBVHlimits, &channel2desc, g_pCFBVH_No * 6 * sizeof(float));
cudaChannelFormatDesc channel3desc = cudaCreateChannelDesc<uint4>();
cudaBindTexture(NULL, &g_pCFBVHindexesOrTrilistsTexture, cudaBVHindexesOrTrilists, &channel3desc,
g_pCFBVH_No * sizeof(uint4));
//cudaChannelFormatDesc channel4desc = cudaCreateChannelDesc<float4>();
//cudaBindTexture(NULL, &g_verticesTexture, cudaPtrVertices, &channel4desc, g_verticesNo * 8 * sizeof(float));
cudaChannelFormatDesc channel5desc = cudaCreateChannelDesc<float4>();
cudaBindTexture(NULL, &g_trianglesTexture, cudaTriangleIntersectionData, &channel5desc, g_trianglesNo * 20 * sizeof(float));
}
dim3 block(16, 16, 1); // dim3 CUDA specific syntax, block and grid are required to schedule CUDA threads over streaming multiprocessors
dim3 grid(width / block.x, height / block.y, 1);
// Configure grid and block sizes:
int threadsPerBlock = 256;
// Compute the number of blocks required, performing a ceiling operation to make sure there are enough:
int fullBlocksPerGrid = ((width * height) + threadsPerBlock - 1) / threadsPerBlock;
// <<<fullBlocksPerGrid, threadsPerBlock>>>
CoreLoopPathTracingKernel << <grid, block >> >(dptr, accumulatebuffer, cudaTriangles, cudaRendercam, cudaBVHindexesOrTrilists,
cudaBVHlimits, cudaTriangleIntersectionData, cudaTriIdxList, framenumber, hashedframes);
}