Improved memory usage, thread safety, world generator speed

kosshi-net · Jul 12, 2019 · 08245e1 · 08245e1
1 parent f2ddfaa
commit 08245e1
Show file tree

Hide file tree

Showing 19 changed files with 817 additions and 332 deletions.
diff --git a/README.md b/README.md
@@ -4,8 +4,9 @@ A hybrid voxel rendering engine, written with C and OpenGL. The goal of this
 project was to make a engine capable of rendering a large amount of
 voxels on mid-tier graphics cards.
 
-8192x8192 area with over 100 000 trees, rendered at 170fps on Radeon RX 480 4GB
-![Pretty picture](https://i.imgur.com/tnNTgnH.jpg)
+8192x512x8192 map with over 120 000 trees, rendered at 130fps on Radeon RX 480. 128^3 chunks, 4.5 GB peak ram usage, FPS limited by draw calls.
+![Pretty picture](https://kosshi.net/u/jzn6y.png)
+![Pretty picture](https://kosshi.net/u/jzldm.png)
 
 ## Rendering method
 
@@ -14,18 +15,21 @@ testing in a fragment shader. This reduces the vertex count to just 1 per voxel,
 but with a somewhat slow fragment shader. This is a real probelm with nearby
 voxels, so hybrid rendering is used: meshes for nearby, splatting farther away.
 
+Engine implements SVO-like level of detail, with first level starting at 1024, 
+voxels away.
+
 ### Relevant
 This [Nvidia paper](http://www.jcgt.org/published/0007/03/04/) has a different, 
 more refined version of the splatting part.
 
 ### Work in progress!
-This my first large C project, there is a lot of code and a lot of it is ugly.
-Development is mostly halted for now, but not abandoned. 
+This is also my first large C project, there is a lot of code and a lot of it 
+is ugly and temporary. Especially the mesher. Don't look at the mesher.
 
 ### Launch options
-List of some launch options and their default values
+List of some launch options and their default values:
 
-``--heap 1G``
+``--heap 512M``
 
 How much memory to allocate. 
 You must adjust this for larger worlds.
@@ -48,7 +52,7 @@ Use OpenGL compatability mode. Might help if you have trouble running the engine
 - Standard meshed rendering for nearby voxels for additional effects and speed
 - Fast culler and mesher (a lot of it is still very WIP)
 	- 64^3 in 1-4 milliseconds singlethreaded on a Ryzen 1600 @ 3.8GHz
-- Barely noticeable level of detail (starts at 1024 voxels away)
+- Subtle level of detail (starts at 1024 voxels away)
 - Block placement and removal
 - Completetly fake shadows and ambient occlusion
 - Custom world generator for pretty screenshots

diff --git a/resources/mesh.frag.glsl b/resources/mesh.frag.glsl
@@ -26,7 +26,8 @@ void main(void) {
 
 	gl_FragDepth = vDepth;
 
-	float fog = pow(gl_FragDepth,0.7);
+	float fog_start = 256.0/u_far;
+	float fog = pow( max(gl_FragDepth-fog_start,0.0001), 0.5);
 
 	out_Color = vec4( mix( 
 		color, 

diff --git a/resources/splat.frag.glsl b/resources/splat.frag.glsl
@@ -138,7 +138,8 @@ void main(void) {
 
 		gl_FragDepth = result.x/u_far;
 
-		float fog = pow(gl_FragDepth,0.7);
+		float fog_start = 256.0/u_far;
+		float fog = pow( max(gl_FragDepth-fog_start,0.0001), 0.5);
 
 		out_Color = vec4( mix( 
 			color, 

diff --git a/src/cfg.c b/src/cfg.c
@@ -58,7 +58,7 @@ void cfg_init( int argc, char **argv ){
 	config.opengl_debug = 0;
 
 
-	config.heap = parse_num( "1G" );
+	config.heap = parse_num( "512M" );
 
 	uint32_t default_chunk_size = 64;
 	uint32_t default_world_size[] = { 2048, 256, 2048 };

diff --git a/src/chunkset.c b/src/chunkset.c
@@ -24,7 +24,7 @@
 #include <omp.h>
 
 
-//#define DISABLE_SHADOWS 1
+//#define DISABLE_SHADOWS
 
 
 struct ChunkSet * 
@@ -92,6 +92,7 @@ void chunkset_clear( struct ChunkSet *set )
 	c->last_access = ctx_time();
 	c->count = num_voxels;
 	c->voxels = mem_calloc( num_voxels * sizeof(Voxel) );
+	c->rle = rle_compress( c->voxels, c->count);
 
 	for( vec[2] = 0; vec[2] < set->max[2]; vec[2]++  )
 	for( vec[1] = 0; vec[1] < set->max[1]; vec[1]++  )
@@ -105,14 +106,18 @@ void chunkset_clear( struct ChunkSet *set )
 		c->lod = -1;
 		c->last_access = ctx_time();
 		c->count = num_voxels;
-		c->voxels = mem_calloc( num_voxels * sizeof(Voxel) );
+
+
 		memcpy( c->offset, vec, 3*sizeof(uint16_t) );
 
 		c->dirty = 1;
 
 		c->gl_vbo =  0;
 
-		chunk_compress(c);
+
+		//c->voxels = mem_calloc( num_voxels * sizeof(Voxel) );
+		//chunk_compress(c);
+		c->rle = set->null_chunk->rle;
 
 		i++;
 	}
@@ -271,7 +276,7 @@ void chunk_touch_ro( struct ChunkMD *c  )
 	logf_warn("RO Touch is depricated. Segfault!");
 }
 
-
+/*
 void chunk_decompress( struct ChunkMD *c ){
 
 	if( c->voxels != NULL ) return;
@@ -285,15 +290,15 @@ void chunk_decompress( struct ChunkMD *c ){
 		// Uncompress chunk!
 		
 		c->voxels = rle_decompress(c->rle);
-		mem_free(c->rle);
-		c->rle = NULL;
+		//mem_free(c->rle);
+		//c->rle = NULL;
 		
 		return;
 	}
 }
+*/
 
-
-void chunk_open_ro( struct ChunkMD *c )
+void chunk_open_ro( struct ChunkSet *set, struct ChunkMD *c )
 {
 	pthread_mutex_lock( &c->mutex_read ); 
 	c->last_access = (uint32_t)ctx_time();
@@ -305,52 +310,88 @@ void chunk_open_ro( struct ChunkMD *c )
 		pthread_mutex_lock( &c->mutex_write );
 	}
 
-	chunk_decompress( c );
+	if(!c->voxels) {
 
+		// Null chunk is a shared fake chunk with only air
+		if( c->rle == set->null_chunk->rle ){
+			c->voxels = set->null_chunk->voxels;
+		} else {
+			c->voxels = rle_decompress( c->rle );
+			//chunk_decompress( c );
+		}
+	}
 	pthread_mutex_unlock( &c->mutex_read );
 }
 
-void chunk_close_ro( struct ChunkMD *c )
+void chunk_close_ro( struct ChunkSet *set, struct ChunkMD *c )
 {
 	pthread_mutex_lock( &c->mutex_read ); 
 	c->readers--;
 	if(	c->readers == 0 ) {
 		pthread_mutex_unlock( &c->mutex_write ); 
-		//chunk_compress( c );
 	}
 	pthread_mutex_unlock( &c->mutex_read ); 
 
-//	c->last_access = (uint32_t)ctx_time();
+	c->last_access = (uint32_t)ctx_time();
 }
 
 
-void chunk_open_rw( struct ChunkMD *c )
+void chunk_open_rw( struct ChunkSet *set, struct ChunkMD *c )
 {
 	pthread_mutex_lock( &c->mutex_write ); 
-	chunk_decompress( c );
+
+	// Null chunk
+	if( c->voxels == set->null_chunk->voxels
+	||  c->rle    == set->null_chunk->rle ) {
+		c->rle = NULL;
+		c->voxels = mem_alloc( c->count*sizeof(Voxel) );
+		memcpy(c->voxels, set->null_chunk->voxels, c->count*sizeof(Voxel) );
+		return;
+	}
+
+	if( c->rle && c->voxels ) {
+		c->rle = mem_free(c->rle);
+		return;
+	}
+
+	if( c->rle && !c->voxels ){
+		c->voxels = rle_decompress( c->rle );
+		c->rle = mem_free( c->rle );
+		return;
+	}
+
+	if(c->voxels) return;
+
+	logf_error("Uninitalized chunk?");
+	logf_info("vxl %p rle %p", c->voxels, c->rle);
+	logf_info("nullvxl %p rle %p", set->null_chunk->voxels, set->null_chunk->rle);
+	panic();
 }
 
-void chunk_close_rw( struct ChunkMD *c )
+void chunk_close_rw( struct ChunkSet *set, struct ChunkMD *c )
 {
 	pthread_mutex_unlock( &c->mutex_write );
 }
 
 
 
-void chunkset_force_compress( struct ChunkSet* set ){
+void chunkset_force_compress( struct ChunkSet *set ){
 
 	for (int i = 0; i < set->count; ++i)
 	{
 		struct ChunkMD *c = set->chunks+i;
 
-		chunk_open_rw(c);
-		chunk_compress(c);
-		chunk_close_rw(c);
+		//chunk_open_rw(set, c);
+		chunk_compress(set, c);
+		//chunk_close_rw(set, c);
 
 	}
 
 }
 
+/*
+ * FOLLOWING CHECKING FUNCTIONS ARE DEPRICATED
+ */
 
 // Checks chunk safety
 // Is the voxel inside of a chunk, or on its edge? 
@@ -471,14 +512,17 @@ int voxel_visible(
 }
 
 
-void chunk_compress(struct ChunkMD *c){
-	void *vxl = c->voxels;
-	c->voxels = NULL;
-
-	c->rle = rle_compress( vxl, c->count  );
-	mem_free( vxl );
+void chunk_compress(struct ChunkSet *set, struct ChunkMD *c){
+		if(!c->voxels) return;
 
-	chunk_close_rw( c );
+		if( c->voxels == set->null_chunk->voxels ) {
+			c->voxels = NULL;
+		} else if( c->rle ) {
+			c->voxels = mem_free(c->voxels);
+		} else {
+			c->rle = rle_compress(c->voxels, c->count);
+			c->voxels = mem_free(c->voxels);
+		}
 }
 
 
@@ -539,20 +583,19 @@ void chunkset_manage(
 		if( c->dirty == 0 && 
 			!c->gl_vbo_local_lod == !c->lod
 		) {
-			if( c->rle == NULL 
+			if( c->voxels
 			&&	c->last_access+1.0 < ctx_time() ){
-				chunk_open_rw(c);
-				chunk_compress(c);
-				chunk_close_rw(c);
-				meshed_count++;
+
+				chunk_compress(set, c);
+
 			}
 			continue;
 		}
 
 		if( c->lod == -1 ) continue;
 		if( c->gl_vbo_local ) continue;
 
-		chunk_open_ro(c);
+		chunk_open_ro(set, c);
 
 		// Mark now, if theres a write while we do stuff, let it happen
 		c->dirty = 0;
@@ -605,9 +648,65 @@ void chunkset_manage(
 				&geom_items2
 			);
 			c->gl_vbo_local_segments[2] = geom_items2;
+			geom_items+=geom_items2;
+
+
+
 
+			memset( mesher[omp_id].mask[buf_id], 0, mesher[omp_id].mask_size );
+			chunk_mask_downsample( set, 1, 
+				mesher[omp_id].work[buf_id],
+				mesher[omp_id].mask[buf_id]
+			);
+
+			geom_items2 = 0;
+			chunk_make_splatlist(
+				set, c, 2,
+				mesher[omp_id].mask[buf_id],
+				&mesher[omp_id].geom[buf_id][geom_items*sizeof(uint16_t)], 
+				&geom_items2
+			);
+			c->gl_vbo_local_segments[3] = geom_items2;
+			geom_items+=geom_items2;
+
+
+
+
+			memset( mesher[omp_id].work[buf_id], 0, mesher[omp_id].work_size );
+			chunk_mask_downsample( set, 1, 
+				mesher[omp_id].mask[buf_id],
+				mesher[omp_id].work[buf_id]
+			);
+
+			geom_items2 = 0;
+			chunk_make_splatlist(
+				set, c, 3,
+				mesher[omp_id].work[buf_id],
+				&mesher[omp_id].geom[buf_id][geom_items*sizeof(uint16_t)], 
+				&geom_items2
+			);
+			c->gl_vbo_local_segments[4] = geom_items2;
 			geom_items+=geom_items2;
 
+
+			memset( mesher[omp_id].mask[buf_id], 0, mesher[omp_id].mask_size );
+			chunk_mask_downsample( set, 1, 
+				mesher[omp_id].work[buf_id],
+				mesher[omp_id].mask[buf_id]
+			);
+
+			geom_items2 = 0;
+			chunk_make_splatlist(
+				set, c, 4,
+				mesher[omp_id].mask[buf_id],
+				&mesher[omp_id].geom[buf_id][geom_items*sizeof(uint16_t)], 
+				&geom_items2
+			);
+			c->gl_vbo_local_segments[5] = geom_items2;
+			geom_items+=geom_items2;
+
+
+
 			//logf_info( "%i %i", c->gl_vbo_local_segments[1], c->gl_vbo_local_segments[2] );
 
 		}
@@ -636,7 +735,7 @@ void chunkset_manage(
 		// POTENTIAL BUG: IF CHUNK CLEARED, MAKE SURE YOU CLEAR THE GEOMETRY!
 		// Making this into a zero can be used to signal about this state!
 
-		chunk_close_ro(c);
+		chunk_close_ro(set, c);
 		meshed_count++;
 	}
 }