Skip to content

Commit ada3e77

Browse files
committed
- Tested on AMD64 Linux and fixed for older GPU issue with memory location (no longer shared for RTX2080)
- Fixed clangd for Linux ARM64 (ignore CUDA_ARCHITECTURE) - More diagnostics for debugging - Formatting with clangd for missing files
1 parent efb897c commit ada3e77

19 files changed

+698
-615
lines changed

.clangd

Lines changed: 19 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -15,25 +15,28 @@ CompileFlags:
1515
- -rdynamic
1616
- -lineinfo
1717
- -maxrregcount*
18+
- -arch=*
19+
- --gpu-architecture=*
20+
- -gencode=*
21+
- --use_fast_math
1822
Add:
1923
- --no-cuda-version-check
2024
# The include section is automatically generated my cmake at build, do NOT modify.
21-
- -I/home/pmudry/git/302_raytracer/src
22-
- -I/home/pmudry/git/302_raytracer/src/external
23-
- -I/home/pmudry/git/302_raytracer/src/rayon
24-
- -I/home/pmudry/git/302_raytracer/src/rayon/camera
25-
- -I/home/pmudry/git/302_raytracer/src/rayon/camera/sdl
26-
- -I/home/pmudry/git/302_raytracer/src/rayon/cpu_renderers
27-
- -I/home/pmudry/git/302_raytracer/src/rayon/cpu_renderers/cpu_shapes
28-
- -I/home/pmudry/git/302_raytracer/src/rayon/cpu_renderers/utils
29-
- -I/home/pmudry/git/302_raytracer/src/rayon/data_structures
30-
- -I/home/pmudry/git/302_raytracer/src/rayon/gpu_renderers
31-
- -I/home/pmudry/git/302_raytracer/src/rayon/gpu_renderers/materials
32-
- -I/home/pmudry/git/302_raytracer/src/rayon/gpu_renderers/materials/advanced
33-
- -I/home/pmudry/git/302_raytracer/src/rayon/gpu_renderers/materials/legacy
34-
- -I/home/pmudry/git/302_raytracer/src/rayon/gpu_renderers/shaders
35-
- -I/home/pmudry/git/302_raytracer/src/rayon/render
36-
- -I/home/pmudry/git/302_raytracer/src/rayon/scenes
25+
- -I/home/pyrrhus/git/302_raytracer/src
26+
- -I/home/pyrrhus/git/302_raytracer/src/external
27+
- -I/home/pyrrhus/git/302_raytracer/src/rayon
28+
- -I/home/pyrrhus/git/302_raytracer/src/rayon/camera
29+
- -I/home/pyrrhus/git/302_raytracer/src/rayon/camera/sdl
30+
- -I/home/pyrrhus/git/302_raytracer/src/rayon/cpu_renderers
31+
- -I/home/pyrrhus/git/302_raytracer/src/rayon/cpu_renderers/cpu_shapes
32+
- -I/home/pyrrhus/git/302_raytracer/src/rayon/cpu_renderers/utils
33+
- -I/home/pyrrhus/git/302_raytracer/src/rayon/data_structures
34+
- -I/home/pyrrhus/git/302_raytracer/src/rayon/gpu_renderers
35+
- -I/home/pyrrhus/git/302_raytracer/src/rayon/gpu_renderers/materials
36+
- -I/home/pyrrhus/git/302_raytracer/src/rayon/gpu_renderers/materials/legacy
37+
- -I/home/pyrrhus/git/302_raytracer/src/rayon/gpu_renderers/shaders
38+
- -I/home/pyrrhus/git/302_raytracer/src/rayon/render
39+
- -I/home/pyrrhus/git/302_raytracer/src/rayon/scenes
3740
Diagnostics:
3841
UnusedIncludes: Strict
3942
Suppress:

CMakeLists.txt

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,8 @@ endif()
1919

2020
project(RAYON_RAYTRACER LANGUAGES CXX CUDA)
2121

22+
# Option to enable diagnostic output
23+
option(ENABLE_DIAGS "Enable diagnostic output in CUDA and CPU renderers" OFF)
2224

2325
# Ensure compile_commands.json is generated in the source directory
2426
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
@@ -55,7 +57,7 @@ if(CMAKE_CUDA_COMPILER)
5557
set(CMAKE_CUDA_STANDARD 17)
5658
set(CMAKE_CUDA_STANDARD_REQUIRED ON)
5759
set(CMAKE_CUDA_SEPARABLE_COMPILATION ON)
58-
set(CMAKE_CUDA_ARCHITECTURES 90) # Set CUDA architectures (adjust based on your GPU - this covers common GPUs)
60+
set(CMAKE_CUDA_ARCHITECTURES "native") # Set CUDA architecture to the machine where it's built
5961
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -t 0") # Enable parallel CUDA compilation for faster builds
6062
set(CUDA_FOUND TRUE)
6163
else()
@@ -133,6 +135,14 @@ target_compile_definitions(rayon PRIVATE
133135
RT_BUILD_TYPE_STRING="$<IF:$<BOOL:${CMAKE_BUILD_TYPE}>,${CMAKE_BUILD_TYPE},$<CONFIG>>"
134136
)
135137

138+
# Add DIAGS definition if enabled
139+
if(ENABLE_DIAGS)
140+
target_compile_definitions(rayon PRIVATE DIAGS)
141+
message(STATUS "Diagnostics enabled (ENABLE_DIAGS=ON)")
142+
else()
143+
message(STATUS "Diagnostics disabled (set ENABLE_DIAGS=ON to enable)")
144+
endif()
145+
136146
# Generate .clangd with -I flags
137147
# This updates the .clangd file's Add section with the current include directories
138148
# The rest of .clangd remains untouched

src/rayon/constants.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ namespace constants
66
{
77
const std::string ver_major = "1";
88
const std::string ver_minor = "2";
9-
const std::string ver_patch = "1";
9+
const std::string ver_patch = "3";
1010
const std::string version = ver_major + "." + ver_minor + "." + ver_patch;
1111

1212
// Image specifics settings

src/rayon/data_structures/material.hpp

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -22,9 +22,9 @@ class Constant : public Material
2222
Constant(const Color &a) : color(a) {}
2323

2424
virtual bool scatter(const Ray &r_in, const Hit_record &rec, Color &attenuation, Ray &scattered) const override
25-
{
25+
{
2626
attenuation = color;
27-
scattered = Ray(rec.p, Vec3(0,0,0)); // No scattering, the ray is absorbed
27+
scattered = Ray(rec.p, Vec3(0, 0, 0)); // No scattering, the ray is absorbed
2828
return true;
2929
}
3030

@@ -38,7 +38,7 @@ class ShowNormals : public Material
3838
ShowNormals(const Color &a) : albedo(a) {}
3939

4040
virtual bool scatter(const Ray &r_in, const Hit_record &rec, Color &attenuation, Ray &scattered) const override
41-
{
41+
{
4242
attenuation = 0.5 * (rec.normal + Vec3_ONES);
4343
scattered = Ray(rec.p, Vec3_ZEROES); // No scattering
4444
return true;
@@ -62,12 +62,11 @@ class Lambertian : public Material
6262
scatter_direction = rec.normal;
6363

6464
scattered = Ray(rec.p, scatter_direction);
65-
65+
6666
attenuation = albedo;
6767
return true;
6868
}
6969

7070
public:
7171
Color albedo; // The amount of reflected light, 0 for no reflection, 1 for full reflection
7272
};
73-

src/rayon/gpu_renderers/cuda_float3.cuh

Lines changed: 10 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,8 @@
77
*/
88
#pragma once
99

10-
#include <cuda_runtime.h>
1110
#include <cmath>
11+
#include <cuda_runtime.h>
1212

1313
//==============================================================================
1414
// VECTOR MATH AND UTILITY STRUCTURES
@@ -24,26 +24,17 @@ struct f2
2424
__host__ __device__ f2() : x(0), y(0) {}
2525
__host__ __device__ f2(float x_, float y_) : x(x_), y(y_) {}
2626

27-
__host__ __device__ f2 operator+(const f2 &other) const
28-
{
29-
return f2(x + other.x, y + other.y);
30-
}
27+
__host__ __device__ f2 operator+(const f2 &other) const { return f2(x + other.x, y + other.y); }
3128

32-
__host__ __device__ f2 operator-(const f2 &other) const
33-
{
34-
return f2(x - other.x, y - other.y);
35-
}
29+
__host__ __device__ f2 operator-(const f2 &other) const { return f2(x - other.x, y - other.y); }
3630

3731
__host__ __device__ f2 operator*(float t) const { return f2(x * t, y * t); }
3832

3933
__host__ __device__ f2 operator/(float t) const { return f2(x / t, y / t); }
4034
};
4135

4236
/** @brief Scalar multiplication from left */
43-
__device__ __forceinline__ f2 operator*(float t, const f2 &v)
44-
{
45-
return f2(t * v.x, t * v.y);
46-
}
37+
__device__ __forceinline__ f2 operator*(float t, const f2 &v) { return f2(t * v.x, t * v.y); }
4738

4839
/**
4940
* @brief Simple 3D vector structure optimized for CUDA
@@ -54,18 +45,12 @@ struct f3
5445
float x, y, z;
5546

5647
__host__ __device__ f3() : x(0), y(0), z(0) {}
57-
48+
5849
__host__ __device__ f3(float x_, float y_, float z_) : x(x_), y(y_), z(z_) {}
5950

60-
__host__ __device__ f3 operator+(const f3 &other) const
61-
{
62-
return f3(x + other.x, y + other.y, z + other.z);
63-
}
51+
__host__ __device__ f3 operator+(const f3 &other) const { return f3(x + other.x, y + other.y, z + other.z); }
6452

65-
__host__ __device__ f3 operator-(const f3 &other) const
66-
{
67-
return f3(x - other.x, y - other.y, z - other.z);
68-
}
53+
__host__ __device__ f3 operator-(const f3 &other) const { return f3(x - other.x, y - other.y, z - other.z); }
6954

7055
__host__ __device__ f3 operator*(float t) const { return f3(x * t, y * t, z * t); }
7156

@@ -84,16 +69,10 @@ const f3 f3_ZEROES(0.0f, 0.0f, 0.0f);
8469
const f3 f3_ONES(1.0f, 1.0f, 1.0f);
8570

8671
/** @brief Scalar multiplication from left */
87-
__device__ __forceinline__ f3 operator*(float t, const f3 &v)
88-
{
89-
return v * t;
90-
}
72+
__device__ __forceinline__ f3 operator*(float t, const f3 &v) { return v * t; }
9173

9274
/** @brief Compute dot product of two vectors */
93-
__device__ __forceinline__ float dot(const f3 &a, const f3 &b)
94-
{
95-
return a.x * b.x + a.y * b.y + a.z * b.z;
96-
}
75+
__device__ __forceinline__ float dot(const f3 &a, const f3 &b) { return a.x * b.x + a.y * b.y + a.z * b.z; }
9776

9877
/** @brief Compute cross product of two vectors */
9978
__device__ __forceinline__ f3 cross(const f3 &a, const f3 &b)
@@ -102,10 +81,7 @@ __device__ __forceinline__ f3 cross(const f3 &a, const f3 &b)
10281
}
10382

10483
/** @brief Normalize a vector to unit length */
105-
__device__ __forceinline__ f3 normalize(const f3 &v)
106-
{
107-
return v / v.length();
108-
}
84+
__device__ __forceinline__ f3 normalize(const f3 &v) { return v / v.length(); }
10985

11086
/** @brief Convert a normal to a debug RGB color */
11187
__device__ __forceinline__ f3 normal_to_color(const f3 &n)

src/rayon/gpu_renderers/cuda_raytracer.cuh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -578,7 +578,7 @@ __device__ inline f3 ray_color(const ray_simple &r, const CudaScene::Scene &scen
578578
// Sky/background
579579
f3 unit_direction = normalize(current_ray.dir);
580580
float t = 0.5f * (unit_direction.y + 1.0f);
581-
f3 sky_color = (1.0f - t) * f3(1.0f, 1.0f, 1.0f) + t * f3(0.5f, 0.7f, 1.0f);
581+
f3 sky_color = (1.0f - t) * f3(1.0f, 1.0f, 1.0f) + t * f3(0.5f, 0.7f, 1.0f);
582582
accumulated_color = accumulated_color + accumulated_attenuation * sky_color * g_background_intensity;
583583
return accumulated_color;
584584
}

src/rayon/gpu_renderers/cuda_utils.cu

Lines changed: 21 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -3,26 +3,26 @@
33
// Implement kernel in a single translation unit to avoid nvlink multiple definition errors
44
__global__ void init_random_states(curandState *rand_states, int num_states, unsigned long long seed, int width)
55
{
6-
// Support both 1D and 2D grid launches
7-
int idx;
8-
if (gridDim.y == 1)
9-
{
10-
// 1D launch
11-
idx = blockIdx.x * blockDim.x + threadIdx.x;
12-
}
13-
else
14-
{
15-
// 2D launch - compute proper 1D index
16-
int x = blockIdx.x * blockDim.x + threadIdx.x;
17-
int y = blockIdx.y * blockDim.y + threadIdx.y;
18-
idx = y * width + x;
19-
}
6+
// Support both 1D and 2D grid launches
7+
int idx;
8+
if (gridDim.y == 1)
9+
{
10+
// 1D launch
11+
idx = blockIdx.x * blockDim.x + threadIdx.x;
12+
}
13+
else
14+
{
15+
// 2D launch - compute proper 1D index
16+
int x = blockIdx.x * blockDim.x + threadIdx.x;
17+
int y = blockIdx.y * blockDim.y + threadIdx.y;
18+
idx = y * width + x;
19+
}
2020

21-
if (idx < num_states)
22-
{
23-
// Initialize fast RNG state - we repurpose curandState storage
24-
// Simple but effective: combine seed with index for per-pixel unique sequences
25-
unsigned int *fast_state = (unsigned int*)&rand_states[idx];
26-
*fast_state = (unsigned int)(seed + idx * 747796405u);
27-
}
21+
if (idx < num_states)
22+
{
23+
// Initialize fast RNG state - we repurpose curandState storage
24+
// Simple but effective: combine seed with index for per-pixel unique sequences
25+
unsigned int *fast_state = (unsigned int *)&rand_states[idx];
26+
*fast_state = (unsigned int)(seed + idx * 747796405u);
27+
}
2828
}

src/rayon/gpu_renderers/cuda_utils.cuh

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -99,10 +99,10 @@ static __device__ inline f3 randPosInSphere(curandState *state, f3 center, float
9999
static __device__ inline void build_orthonormal_basis(const f3 &n, f3 &u, f3 &v)
100100
{
101101
// from "Building an Orthonormal Basis, Pixar" / Shirley
102-
if (fabs(n.x) > fabs(n.z))
103-
u = normalize(f3(-n.y, n.x, 0.0f));
104-
else
105-
u = normalize(f3(0.0f, -n.z, n.y));
102+
if (fabs(n.x) > fabs(n.z))
103+
u = normalize(f3(-n.y, n.x, 0.0f));
104+
else
105+
u = normalize(f3(0.0f, -n.z, n.y));
106106
v = cross(n, u);
107107
}
108108

src/rayon/gpu_renderers/materials/legacy/show_normals.cuh

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -52,9 +52,10 @@ struct ShowNormals : public MaterialBase<ShowNormals>
5252
* @brief Get emitted color (displays the surface normal as color)
5353
* @return Normal vector mapped to RGB color space [0,1]³
5454
*/
55-
__device__ __forceinline__ f3 emission() const {
55+
__device__ __forceinline__ f3 emission() const
56+
{
5657
// Map normal from [-1,1] to [0,1] for color display
57-
return 0.5f * (params.normal + f3(1.0f, 1.0f, 1.0f));
58+
return 0.5f * (params.normal + f3(1.0f, 1.0f, 1.0f));
5859
}
5960
};
6061

0 commit comments

Comments
 (0)