-
Notifications
You must be signed in to change notification settings - Fork 200
/
VulkanGaussianBlur.cpp
691 lines (558 loc) · 34.5 KB
/
VulkanGaussianBlur.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
/*!
\brief Shows how to perform a separated Gaussian Blur using a Compute shader and Fragment shader for carrying
out the horizontal and vertical passes respectively.
\file GaussianBlur.cpp
\author PowerVR by Imagination, Developer Technology Team
\copyright Copyright (c) Imagination Technologies Limited.
*/
#include "PVRShell/PVRShell.h"
#include "PVRVk/ApiObjectsVk.h"
#include "PVRUtils/PVRUtilsVk.h"
// Source and binary shaders
const char FragShaderSrcFile[] = "FragShader.fsh.spv";
const char VertShaderSrcFile[] = "VertShader.vsh.spv";
const char CompShaderSrcFile[] = "CompShader.csh.spv";
// PVR texture files
const char StatueTexFile[] = "Lenna.pvr";
const uint32_t GaussianKernelSize = 19;
/// <summary>Prints the Gaussian weights and offsets provided in the vectors.</summary>
/// <param name="gaussianOffsets">The list of Gaussian offsets to print.</param>
/// <param name="gaussianWeights">The list of Gaussian weights to print.</param>
void printGaussianWeightsAndOffsets(std::vector<double>& gaussianOffsets, std::vector<double>& gaussianWeights)
{
Log(LogLevel::Information, "Number of Gaussian Weights and Offsets = %u;", gaussianWeights.size());
Log(LogLevel::Information, "Weights =");
Log(LogLevel::Information, "{");
for (uint32_t i = 0; i < gaussianWeights.size(); i++) { Log(LogLevel::Information, "%.15f,", gaussianWeights[i]); }
Log(LogLevel::Information, "};");
Log(LogLevel::Information, "Offsets =");
Log(LogLevel::Information, "{");
for (uint32_t i = 0; i < gaussianOffsets.size(); i++) { Log(LogLevel::Information, "%.15f,", gaussianOffsets[i]); }
Log(LogLevel::Information, "};");
}
struct DeviceResources
{
pvrvk::Instance instance;
pvr::utils::DebugUtilsCallbacks debugUtilsCallbacks;
pvrvk::Device device;
pvrvk::Queue queues[2];
pvr::utils::vma::Allocator vmaAllocator;
pvrvk::Swapchain swapchain;
pvrvk::DescriptorPool descriptorPool;
pvrvk::CommandPool commandPool;
pvrvk::Buffer graphicsGaussianConfigBuffer;
std::vector<pvrvk::Semaphore> imageAcquiredSemaphores;
std::vector<pvrvk::Semaphore> presentationSemaphores;
std::vector<pvrvk::Fence> perFrameResourcesFences;
std::vector<pvrvk::Framebuffer> onScreenFramebuffer;
std::vector<pvrvk::CommandBuffer> mainCommandBuffers;
std::vector<pvrvk::SecondaryCommandBuffer> uiRendererCommandBuffers;
std::vector<pvrvk::SecondaryCommandBuffer> graphicsCommandBuffers;
std::vector<pvrvk::SecondaryCommandBuffer> computeCommandBuffers;
std::vector<pvrvk::ImageView> horizontallyBlurredImageViews;
// Compute based Horizontal Gaussian Blur pass
std::vector<pvrvk::DescriptorSet> computeDescriptorSets;
// Compute based Horizontal Gaussian Blur pass
std::vector<pvrvk::DescriptorSet> graphicsDescriptorSets;
// Descriptor set layouts
pvrvk::DescriptorSetLayout computeDescriptorSetLayout;
pvrvk::DescriptorSetLayout graphicsDescriptorSetLayout;
pvrvk::ImageView inputImageView;
pvrvk::GraphicsPipeline graphicsPipeline;
pvrvk::ComputePipeline computePipeline;
pvrvk::PipelineLayout computePipelinelayout;
pvrvk::PipelineLayout graphicsPipelinelayout;
pvrvk::Sampler nearestSampler;
pvrvk::Sampler bilinearSampler;
// UIRenderer used to display text
pvr::ui::UIRenderer uiRenderer;
pvrvk::PipelineCache pipelineCache;
~DeviceResources()
{
if (device)
{
device->waitIdle();
uint32_t l = swapchain->getSwapchainLength();
for (uint32_t i = 0; i < l; ++i)
{
if (perFrameResourcesFences[i]) perFrameResourcesFences[i]->wait();
}
}
}
};
/// <summary>implementing the Shell functions.</summary>
class VulkanGaussianBlur : public pvr::Shell
{
private:
std::unique_ptr<DeviceResources> _deviceResources;
uint32_t _frameId;
uint32_t _queueIndex;
// Linear Optimised Gaussian offsets and weights
std::vector<double> _linearGaussianOffsets;
std::vector<double> _linearGaussianWeights;
// Gaussian offsets and weights
std::vector<double> _gaussianOffsets;
std::vector<double> _gaussianWeights;
uint32_t _graphicsSsboSize;
bool _useMultiQueue;
uint32_t _swapchainLength;
public:
virtual pvr::Result initApplication();
virtual pvr::Result initView();
virtual pvr::Result releaseView();
virtual pvr::Result quitApplication();
virtual pvr::Result renderFrame();
void initialiseGaussianWeightsAndOffsets();
void loadTextures(pvrvk::CommandBuffer& cmdBuffers);
void createResources();
void createPipelines();
void updateResources();
void recordCommandBuffer();
};
/// <summary>Loads the textures used throughout the demo. The commands required for uploading image data into the
/// texture are recorded into the provided command buffer.</summary>
/// <param name="cmdBuffers">The commands required for uploading image data into the texture are recorded into this command buffer.</param>
void VulkanGaussianBlur::loadTextures(pvrvk::CommandBuffer& cmdBuffers)
{
// Load the Texture PVR file from the disk
pvr::Texture texture = pvr::textureLoad(*getAssetStream(StatueTexFile), pvr::TextureFileFormat::PVR);
pvr::ImageDataFormat imageformat;
imageformat.colorSpace = texture.getColorSpace();
imageformat.format = texture.getPixelFormat();
// Create and Allocate Textures.
_deviceResources->inputImageView =
pvr::utils::uploadImageAndView(_deviceResources->device, texture, true, cmdBuffers, pvrvk::ImageUsageFlags::e_SAMPLED_BIT | pvrvk::ImageUsageFlags::e_STORAGE_BIT,
pvrvk::ImageLayout::e_SHADER_READ_ONLY_OPTIMAL, _deviceResources->vmaAllocator, _deviceResources->vmaAllocator);
// Create 1 intermediate image per frame.
for (uint32_t i = 0; i < _swapchainLength; i++)
{
pvrvk::Image intermediateTexture = pvr::utils::createImage(_deviceResources->device,
pvrvk::ImageCreateInfo(pvrvk::ImageType::e_2D, pvr::utils::convertToPVRVkPixelFormat(texture.getPixelFormat(), texture.getColorSpace(), texture.getChannelType()),
pvrvk::Extent3D(texture.getWidth(), texture.getHeight(), 1u), pvrvk::ImageUsageFlags::e_STORAGE_BIT | pvrvk::ImageUsageFlags::e_SAMPLED_BIT),
pvrvk::MemoryPropertyFlags::e_DEVICE_LOCAL_BIT, pvrvk::MemoryPropertyFlags::e_DEVICE_LOCAL_BIT, _deviceResources->vmaAllocator,
pvr::utils::vma::AllocationCreateFlags::e_DEDICATED_MEMORY_BIT);
// transfer the layout from UNDEFINED to SHADER_READ_ONLY_OPTIMAL
pvr::utils::setImageLayout(intermediateTexture, pvrvk::ImageLayout::e_UNDEFINED, pvrvk::ImageLayout::e_SHADER_READ_ONLY_OPTIMAL, cmdBuffers);
_deviceResources->horizontallyBlurredImageViews[i] = _deviceResources->device->createImageView(pvrvk::ImageViewCreateInfo(intermediateTexture));
}
}
/// <summary>Code in createResources() loads the compute, fragment and vertex shaders and associated buffers used by them.</ summary>
void VulkanGaussianBlur::createResources()
{
// Create the compute descriptor set layout
{
pvrvk::DescriptorSetLayoutCreateInfo descriptorSetLayoutParams;
descriptorSetLayoutParams.setBinding(0, pvrvk::DescriptorType::e_STORAGE_IMAGE, 1, pvrvk::ShaderStageFlags::e_COMPUTE_BIT);
descriptorSetLayoutParams.setBinding(1, pvrvk::DescriptorType::e_STORAGE_IMAGE, 1, pvrvk::ShaderStageFlags::e_COMPUTE_BIT);
_deviceResources->computeDescriptorSetLayout = _deviceResources->device->createDescriptorSetLayout(descriptorSetLayoutParams);
}
// Create the Compute Pipeline layout
{
pvrvk::PipelineLayoutCreateInfo createInfo;
createInfo.addDescSetLayout(_deviceResources->computeDescriptorSetLayout);
_deviceResources->computePipelinelayout = _deviceResources->device->createPipelineLayout(createInfo);
}
// Create the graphics descriptor set layout
{
pvrvk::DescriptorSetLayoutCreateInfo descriptorSetLayoutParams;
descriptorSetLayoutParams.setBinding(0, pvrvk::DescriptorType::e_UNIFORM_BUFFER, 1, pvrvk::ShaderStageFlags::e_VERTEX_BIT | pvrvk::ShaderStageFlags::e_FRAGMENT_BIT);
descriptorSetLayoutParams.setBinding(1, pvrvk::DescriptorType::e_COMBINED_IMAGE_SAMPLER, 1, pvrvk::ShaderStageFlags::e_FRAGMENT_BIT);
descriptorSetLayoutParams.setBinding(2, pvrvk::DescriptorType::e_COMBINED_IMAGE_SAMPLER, 1, pvrvk::ShaderStageFlags::e_FRAGMENT_BIT);
_deviceResources->graphicsDescriptorSetLayout = _deviceResources->device->createDescriptorSetLayout(descriptorSetLayoutParams);
}
// Create the Graphics Pipeline layout
{
pvrvk::PipelineLayoutCreateInfo createInfo;
createInfo.addDescSetLayout(_deviceResources->graphicsDescriptorSetLayout);
_deviceResources->graphicsPipelinelayout = _deviceResources->device->createPipelineLayout(createInfo);
}
// Create the samplers
{
pvrvk::SamplerCreateInfo samplerInfo;
samplerInfo.wrapModeU = samplerInfo.wrapModeV = samplerInfo.wrapModeW = pvrvk::SamplerAddressMode::e_CLAMP_TO_EDGE;
samplerInfo.magFilter = pvrvk::Filter::e_NEAREST;
samplerInfo.minFilter = pvrvk::Filter::e_NEAREST;
samplerInfo.mipMapMode = pvrvk::SamplerMipmapMode::e_NEAREST;
_deviceResources->nearestSampler = _deviceResources->device->createSampler(samplerInfo);
samplerInfo.magFilter = pvrvk::Filter::e_LINEAR;
samplerInfo.minFilter = pvrvk::Filter::e_LINEAR;
_deviceResources->bilinearSampler = _deviceResources->device->createSampler(samplerInfo);
}
// Create the buffer used in the vertical fragment pass
{
_graphicsSsboSize = static_cast<uint32_t>(pvr::getSize(pvr::GpuDatatypes::vec2));
_deviceResources->graphicsGaussianConfigBuffer =
pvr::utils::createBuffer(_deviceResources->device, pvrvk::BufferCreateInfo(_graphicsSsboSize, pvrvk::BufferUsageFlags::e_UNIFORM_BUFFER_BIT),
pvrvk::MemoryPropertyFlags::e_HOST_VISIBLE_BIT, pvrvk::MemoryPropertyFlags::e_DEVICE_LOCAL_BIT | pvrvk::MemoryPropertyFlags::e_HOST_COHERENT_BIT,
_deviceResources->vmaAllocator, pvr::utils::vma::AllocationCreateFlags::e_MAPPED_BIT);
}
{
// Update the descriptor sets
std::vector<pvrvk::WriteDescriptorSet> writeDescSets;
for (uint32_t i = 0; i < _swapchainLength; ++i)
{
// Compute descriptor sets
{
_deviceResources->computeDescriptorSets[i] = _deviceResources->descriptorPool->allocateDescriptorSet(_deviceResources->computeDescriptorSetLayout);
writeDescSets.push_back(pvrvk::WriteDescriptorSet(pvrvk::DescriptorType::e_STORAGE_IMAGE, _deviceResources->computeDescriptorSets[i], 0)
.setImageInfo(0, pvrvk::DescriptorImageInfo(_deviceResources->inputImageView, pvrvk::ImageLayout::e_GENERAL)));
writeDescSets.push_back(pvrvk::WriteDescriptorSet(pvrvk::DescriptorType::e_STORAGE_IMAGE, _deviceResources->computeDescriptorSets[i], 1)
.setImageInfo(0, pvrvk::DescriptorImageInfo(_deviceResources->horizontallyBlurredImageViews[i], pvrvk::ImageLayout::e_GENERAL)));
}
// Graphics descriptor sets
{
_deviceResources->graphicsDescriptorSets[i] = _deviceResources->descriptorPool->allocateDescriptorSet(_deviceResources->graphicsDescriptorSetLayout);
writeDescSets.push_back(pvrvk::WriteDescriptorSet(pvrvk::DescriptorType::e_UNIFORM_BUFFER, _deviceResources->graphicsDescriptorSets[i], 0)
.setBufferInfo(0, pvrvk::DescriptorBufferInfo(_deviceResources->graphicsGaussianConfigBuffer, 0, _graphicsSsboSize)));
writeDescSets.push_back(
pvrvk::WriteDescriptorSet(pvrvk::DescriptorType::e_COMBINED_IMAGE_SAMPLER, _deviceResources->graphicsDescriptorSets[i], 1)
.setImageInfo(0, pvrvk::DescriptorImageInfo(_deviceResources->inputImageView, _deviceResources->nearestSampler, pvrvk::ImageLayout::e_SHADER_READ_ONLY_OPTIMAL)));
writeDescSets.push_back(pvrvk::WriteDescriptorSet(pvrvk::DescriptorType::e_COMBINED_IMAGE_SAMPLER, _deviceResources->graphicsDescriptorSets[i], 2)
.setImageInfo(0,
pvrvk::DescriptorImageInfo(_deviceResources->horizontallyBlurredImageViews[i], _deviceResources->bilinearSampler,
pvrvk::ImageLayout::e_SHADER_READ_ONLY_OPTIMAL)));
}
}
_deviceResources->device->updateDescriptorSets(writeDescSets.data(), static_cast<uint32_t>(writeDescSets.size()), nullptr, 0);
}
}
/// <summary>Updates the buffers used by the compute and graphics passes for controlling the Gaussian Blurs.</summary>
void VulkanGaussianBlur::updateResources()
{
// Update the Gaussian configuration buffer used for the graphics based vertical pass
{
uint32_t windowWidth = this->getWidth();
float inverseImageHeight = 1.0f / _deviceResources->inputImageView->getCreateInfo().getImage()->getHeight();
glm::vec2 config = glm::vec2(windowWidth, inverseImageHeight);
memcpy(static_cast<char*>(_deviceResources->graphicsGaussianConfigBuffer->getDeviceMemory()->getMappedData()), &config,
static_cast<size_t>(pvr::getSize(pvr::GpuDatatypes::vec2)));
// if the memory property flags used by the buffers' device memory do not contain e_HOST_COHERENT_BIT then we must flush the memory
if (static_cast<uint32_t>(_deviceResources->graphicsGaussianConfigBuffer->getDeviceMemory()->getMemoryFlags() & pvrvk::MemoryPropertyFlags::e_HOST_COHERENT_BIT) == 0)
{
_deviceResources->graphicsGaussianConfigBuffer->getDeviceMemory()->flushRange(0, _graphicsSsboSize);
}
}
}
/// <summary>Loads and compiles the shaders and create the pipelines.</summary>
void VulkanGaussianBlur::createPipelines()
{
// Load the shaders from their files
pvrvk::ShaderModule computeShader = _deviceResources->device->createShaderModule(pvrvk::ShaderModuleCreateInfo(getAssetStream(CompShaderSrcFile)->readToEnd<uint32_t>()));
pvrvk::ShaderModule vertexShader = _deviceResources->device->createShaderModule(pvrvk::ShaderModuleCreateInfo(getAssetStream(VertShaderSrcFile)->readToEnd<uint32_t>()));
pvrvk::ShaderModule fragShader = _deviceResources->device->createShaderModule(pvrvk::ShaderModuleCreateInfo(getAssetStream(FragShaderSrcFile)->readToEnd<uint32_t>()));
// Create the compute pipeline
{
pvrvk::ComputePipelineCreateInfo createInfo;
createInfo.computeShader.setShader(computeShader);
createInfo.pipelineLayout = _deviceResources->computePipelinelayout;
_deviceResources->computePipeline = _deviceResources->device->createComputePipeline(createInfo, _deviceResources->pipelineCache);
}
// Create the graphics pipeline
{
pvrvk::GraphicsPipelineCreateInfo createInfo;
const pvrvk::Rect2D rect(0, 0, _deviceResources->swapchain->getDimension().getWidth(), _deviceResources->swapchain->getDimension().getHeight());
createInfo.viewport.setViewportAndScissor(0,
pvrvk::Viewport(static_cast<float>(rect.getOffset().getX()), static_cast<float>(rect.getOffset().getY()), static_cast<float>(rect.getExtent().getWidth()),
static_cast<float>(rect.getExtent().getHeight())),
rect);
pvrvk::PipelineColorBlendAttachmentState colorAttachmentState;
colorAttachmentState.setBlendEnable(false);
createInfo.vertexShader.setShader(vertexShader);
createInfo.fragmentShader.setShader(fragShader);
// enable back face culling
createInfo.rasterizer.setCullMode(pvrvk::CullModeFlags::e_FRONT_BIT);
// set counter clockwise winding order for front faces
createInfo.rasterizer.setFrontFaceWinding(pvrvk::FrontFace::e_COUNTER_CLOCKWISE);
// setup vertex inputs
createInfo.vertexInput.clear();
createInfo.inputAssembler.setPrimitiveTopology(pvrvk::PrimitiveTopology::e_TRIANGLE_STRIP);
createInfo.colorBlend.setAttachmentState(0, colorAttachmentState);
createInfo.pipelineLayout = _deviceResources->graphicsPipelinelayout;
createInfo.renderPass = _deviceResources->onScreenFramebuffer[0]->getRenderPass();
createInfo.subpass = 0;
_deviceResources->graphicsPipeline = _deviceResources->device->createGraphicsPipeline(createInfo, _deviceResources->pipelineCache);
}
}
/// <summary>Pre record the commands.</summary>
void VulkanGaussianBlur::recordCommandBuffer()
{
const pvrvk::ClearValue clearValue[] = { pvrvk::ClearValue(0.0f, 0.0f, 0.0f, 1.0f) };
for (uint32_t i = 0; i < _swapchainLength; ++i)
{
// UI Renderer
_deviceResources->uiRendererCommandBuffers[i]->begin(_deviceResources->onScreenFramebuffer[i], 0, pvrvk::CommandBufferUsageFlags::e_RENDER_PASS_CONTINUE_BIT);
_deviceResources->uiRenderer.beginRendering(_deviceResources->uiRendererCommandBuffers[i]);
_deviceResources->uiRenderer.getSdkLogo()->render();
_deviceResources->uiRenderer.getDefaultTitle()->render();
_deviceResources->uiRenderer.getDefaultDescription()->render();
_deviceResources->uiRenderer.endRendering();
_deviceResources->uiRendererCommandBuffers[i]->end();
// Compute Command Buffer
{
_deviceResources->computeCommandBuffers[i]->begin();
pvr::utils::beginCommandBufferDebugLabel(_deviceResources->computeCommandBuffers[i], pvrvk::DebugUtilsLabel("Compute Blur Horizontal"));
{
pvrvk::MemoryBarrierSet barrierSet;
// Set up a barrier to transition the image layouts from e_SHADER_READ_ONLY_OPTIMAL to e_GENERAL
barrierSet.addBarrier(pvrvk::ImageMemoryBarrier(pvrvk::AccessFlags::e_SHADER_READ_BIT, pvrvk::AccessFlags::e_SHADER_WRITE_BIT,
_deviceResources->horizontallyBlurredImageViews[i]->getImage(), pvrvk::ImageSubresourceRange(pvrvk::ImageAspectFlags::e_COLOR_BIT),
pvrvk::ImageLayout::e_SHADER_READ_ONLY_OPTIMAL, pvrvk::ImageLayout::e_GENERAL, _deviceResources->queues[0]->getFamilyIndex(),
_deviceResources->queues[0]->getFamilyIndex()));
barrierSet.addBarrier(pvrvk::ImageMemoryBarrier(pvrvk::AccessFlags::e_SHADER_READ_BIT, pvrvk::AccessFlags::e_SHADER_WRITE_BIT,
_deviceResources->inputImageView->getImage(), pvrvk::ImageSubresourceRange(pvrvk::ImageAspectFlags::e_COLOR_BIT), pvrvk::ImageLayout::e_SHADER_READ_ONLY_OPTIMAL,
pvrvk::ImageLayout::e_GENERAL, _deviceResources->queues[0]->getFamilyIndex(), _deviceResources->queues[0]->getFamilyIndex()));
_deviceResources->computeCommandBuffers[i]->pipelineBarrier(pvrvk::PipelineStageFlags::e_FRAGMENT_SHADER_BIT, pvrvk::PipelineStageFlags::e_COMPUTE_SHADER_BIT, barrierSet);
}
// Bind the compute pipeline & the descriptor set.
_deviceResources->computeCommandBuffers[i]->bindPipeline(_deviceResources->computePipeline);
_deviceResources->computeCommandBuffers[i]->bindDescriptorSet(
pvrvk::PipelineBindPoint::e_COMPUTE, _deviceResources->computePipelinelayout, 0, _deviceResources->computeDescriptorSets[i]);
// dispatch x = image.height / 32
// dispatch y = 1
// dispatch z = 1
_deviceResources->computeCommandBuffers[i]->dispatch(getHeight() / 32, 1, 1);
{
pvrvk::MemoryBarrierSet barrierSet;
// Set up a barrier to pass the image from our compute shader to fragment shader.
barrierSet.addBarrier(pvrvk::ImageMemoryBarrier(pvrvk::AccessFlags::e_SHADER_WRITE_BIT, pvrvk::AccessFlags::e_SHADER_READ_BIT,
_deviceResources->horizontallyBlurredImageViews[i]->getImage(), pvrvk::ImageSubresourceRange(pvrvk::ImageAspectFlags::e_COLOR_BIT), pvrvk::ImageLayout::e_GENERAL,
pvrvk::ImageLayout::e_SHADER_READ_ONLY_OPTIMAL, _deviceResources->queues[0]->getFamilyIndex(), _deviceResources->queues[0]->getFamilyIndex()));
barrierSet.addBarrier(pvrvk::ImageMemoryBarrier(pvrvk::AccessFlags::e_SHADER_READ_BIT, pvrvk::AccessFlags::e_SHADER_READ_BIT,
_deviceResources->inputImageView->getImage(), pvrvk::ImageSubresourceRange(pvrvk::ImageAspectFlags::e_COLOR_BIT), pvrvk::ImageLayout::e_GENERAL,
pvrvk::ImageLayout::e_SHADER_READ_ONLY_OPTIMAL, _deviceResources->queues[0]->getFamilyIndex(), _deviceResources->queues[0]->getFamilyIndex()));
_deviceResources->computeCommandBuffers[i]->pipelineBarrier(pvrvk::PipelineStageFlags::e_COMPUTE_SHADER_BIT, pvrvk::PipelineStageFlags::e_FRAGMENT_SHADER_BIT, barrierSet);
}
pvr::utils::endCommandBufferDebugLabel(_deviceResources->computeCommandBuffers[i]);
_deviceResources->computeCommandBuffers[i]->end();
}
// Graphics Command Buffer
{
_deviceResources->graphicsCommandBuffers[i]->begin(_deviceResources->onScreenFramebuffer[i], 0, pvrvk::CommandBufferUsageFlags::e_RENDER_PASS_CONTINUE_BIT);
pvr::utils::beginCommandBufferDebugLabel(_deviceResources->graphicsCommandBuffers[i], pvrvk::DebugUtilsLabel("Linear Gaussian Blur (vertical)"));
_deviceResources->graphicsCommandBuffers[i]->bindPipeline(_deviceResources->graphicsPipeline);
_deviceResources->graphicsCommandBuffers[i]->bindDescriptorSet(
pvrvk::PipelineBindPoint::e_GRAPHICS, _deviceResources->graphicsPipelinelayout, 0, _deviceResources->graphicsDescriptorSets[i]);
_deviceResources->graphicsCommandBuffers[i]->draw(0, 3);
pvr::utils::endCommandBufferDebugLabel(_deviceResources->graphicsCommandBuffers[i]);
_deviceResources->graphicsCommandBuffers[i]->end();
}
// Begin recording to the command buffer
_deviceResources->mainCommandBuffers[i]->begin();
_deviceResources->mainCommandBuffers[i]->executeCommands(_deviceResources->computeCommandBuffers[i]);
_deviceResources->mainCommandBuffers[i]->beginRenderPass(
_deviceResources->onScreenFramebuffer[i], pvrvk::Rect2D(0, 0, getWidth(), getHeight()), false, clearValue, ARRAY_SIZE(clearValue));
_deviceResources->mainCommandBuffers[i]->executeCommands(_deviceResources->graphicsCommandBuffers[i]);
// enqueue the command buffer containing ui renderer commands
_deviceResources->mainCommandBuffers[i]->executeCommands(_deviceResources->uiRendererCommandBuffers[i]);
// End RenderPass and recording.
_deviceResources->mainCommandBuffers[i]->endRenderPass();
_deviceResources->mainCommandBuffers[i]->end();
}
}
/// <summary>Initialises the Gaussian weights and offsets used in the compute shader and vertex/fragment shader carrying out the
/// horizontal and vertical Gaussian blur passes respectively.</summary>
void VulkanGaussianBlur::initialiseGaussianWeightsAndOffsets()
{
// Generate a full set of Gaussian weights and offsets to be used in our compute shader
{
pvr::math::generateGaussianKernelWeightsAndOffsets(GaussianKernelSize, false, false, _gaussianWeights, _gaussianOffsets);
Log(LogLevel::Information, "Gaussian Weights and Offsets:");
printGaussianWeightsAndOffsets(_gaussianOffsets, _gaussianWeights);
}
// Generate a set of Gaussian weights and offsets optimised for linear sampling
{
pvr::math::generateGaussianKernelWeightsAndOffsets(GaussianKernelSize, false, true, _linearGaussianWeights, _linearGaussianOffsets);
Log(LogLevel::Information, "Linear Sampling Optimized Gaussian Weights and Offsets:");
printGaussianWeightsAndOffsets(_linearGaussianOffsets, _linearGaussianWeights);
}
}
/// <summary>Code in initApplication() will be called by Shell once per run, before the rendering context is created.
/// Used to initialize variables that are not dependent on it (e.g. external modules, loading meshes, etc.) If the rendering
/// context is lost, initApplication() will not be called again.</summary>
/// <returns>Result::Success if no error occurred.</returns>
pvr::Result VulkanGaussianBlur::initApplication()
{
_frameId = 0;
_queueIndex = 0;
this->setDepthBitsPerPixel(0);
this->setStencilBitsPerPixel(0);
initialiseGaussianWeightsAndOffsets();
return pvr::Result::Success;
}
/// <summary>Code in initView() will be called by Shell upon initialization or after a change in the rendering context.
/// Used to initialize variables that are dependent on the rendering context(e.g.textures, vertex buffers, etc.)</summary>
/// <returns>Result::Success if no error occurred.</returns>
pvr::Result VulkanGaussianBlur::initView()
{
_deviceResources = std::make_unique<DeviceResources>();
// Create a Vulkan 1.0 instance and retrieve compatible physical devices
pvr::utils::VulkanVersion VulkanVersion(1, 0, 0);
_deviceResources->instance = pvr::utils::createInstance(this->getApplicationName(), VulkanVersion, pvr::utils::InstanceExtensions(VulkanVersion));
if (_deviceResources->instance->getNumPhysicalDevices() == 0)
{
setExitMessage("Unable not find a compatible Vulkan physical device.");
return pvr::Result::UnknownError;
}
// Create the surface
pvrvk::Surface surface =
pvr::utils::createSurface(_deviceResources->instance, _deviceResources->instance->getPhysicalDevice(0), this->getWindow(), this->getDisplay(), this->getConnection());
// Create a default set of debug utils messengers or debug callbacks using either VK_EXT_debug_utils or VK_EXT_debug_report respectively
_deviceResources->debugUtilsCallbacks = pvr::utils::createDebugUtilsCallbacks(_deviceResources->instance);
pvr::utils::QueuePopulateInfo queueCreateInfos[] = {
{ pvrvk::QueueFlags::e_GRAPHICS_BIT | pvrvk::QueueFlags::e_COMPUTE_BIT, surface }, // Queue 0
{ pvrvk::QueueFlags::e_GRAPHICS_BIT | pvrvk::QueueFlags::e_COMPUTE_BIT, surface } // Queue 1
};
pvr::utils::QueueAccessInfo queueAccessInfos[2];
_deviceResources->device = pvr::utils::createDeviceAndQueues(_deviceResources->instance->getPhysicalDevice(0), queueCreateInfos, 2, queueAccessInfos);
_deviceResources->queues[0] = _deviceResources->device->getQueue(queueAccessInfos[0].familyId, queueAccessInfos[0].queueId);
// In the future we may want to improve our flexibility with regards to making use of multiple queues but for now to support multi queue the queue must support
// Graphics + Compute + WSI support.
// Other multi queue approaches may be possible i.e. making use of additional queues which do not support graphics/WSI
_useMultiQueue = false;
if (queueAccessInfos[1].familyId != -1 && queueAccessInfos[1].queueId != -1)
{
_deviceResources->queues[1] = _deviceResources->device->getQueue(queueAccessInfos[1].familyId, queueAccessInfos[1].queueId);
if (_deviceResources->queues[0]->getFamilyIndex() == _deviceResources->queues[1]->getFamilyIndex())
{
_useMultiQueue = true;
Log(LogLevel::Information, "Multiple queues support e_GRAPHICS_BIT + e_COMPUTE_BIT + WSI. These queues will be used to ping-pong work each frame");
}
else
{
Log(LogLevel::Information, "Queues are from a different Family. We cannot ping-pong work each frame");
}
}
else
{
Log(LogLevel::Information, "Only a single queue supports e_GRAPHICS_BIT + e_COMPUTE_BIT + WSI. We cannot ping-pong work each frame");
}
_deviceResources->vmaAllocator = pvr::utils::vma::createAllocator(pvr::utils::vma::AllocatorCreateInfo(_deviceResources->device));
pvrvk::SurfaceCapabilitiesKHR surfaceCapabilities = _deviceResources->instance->getPhysicalDevice(0)->getSurfaceCapabilities(surface);
// validate the supported swapchain image usage
pvrvk::ImageUsageFlags swapchainImageUsage = pvrvk::ImageUsageFlags::e_COLOR_ATTACHMENT_BIT;
if (pvr::utils::isImageUsageSupportedBySurface(surfaceCapabilities, pvrvk::ImageUsageFlags::e_TRANSFER_SRC_BIT))
{
swapchainImageUsage |= pvrvk::ImageUsageFlags::e_TRANSFER_SRC_BIT;
} // Create the swapchain
auto swapChainCreateOutput = pvr::utils::createSwapchainRenderpassFramebuffers(_deviceResources->device, surface, getDisplayAttributes(),
pvr::utils::CreateSwapchainParameters().setAllocator(_deviceResources->vmaAllocator).setColorImageUsageFlags(swapchainImageUsage).enableDepthBuffer(false));
_deviceResources->swapchain = swapChainCreateOutput.swapchain;
_deviceResources->onScreenFramebuffer = swapChainCreateOutput.framebuffer;
_swapchainLength = _deviceResources->swapchain->getSwapchainLength();
_deviceResources->imageAcquiredSemaphores.resize(_swapchainLength);
_deviceResources->presentationSemaphores.resize(_swapchainLength);
_deviceResources->perFrameResourcesFences.resize(_swapchainLength);
_deviceResources->mainCommandBuffers.resize(_swapchainLength);
_deviceResources->uiRendererCommandBuffers.resize(_swapchainLength);
_deviceResources->graphicsCommandBuffers.resize(_swapchainLength);
_deviceResources->computeCommandBuffers.resize(_swapchainLength);
_deviceResources->horizontallyBlurredImageViews.resize(_swapchainLength);
_deviceResources->computeDescriptorSets.resize(_swapchainLength);
_deviceResources->graphicsDescriptorSets.resize(_swapchainLength);
_deviceResources->commandPool = _deviceResources->device->createCommandPool(pvrvk::CommandPoolCreateInfo(_deviceResources->queues[0]->getFamilyIndex()));
_deviceResources->descriptorPool = _deviceResources->device->createDescriptorPool(
pvrvk::DescriptorPoolCreateInfo(static_cast<uint16_t>(8 * _swapchainLength)).addDescriptorInfo(pvrvk::DescriptorType::e_STORAGE_IMAGE, static_cast<uint16_t>(8 * _swapchainLength)));
// Create per frame resource
for (uint32_t i = 0; i < _swapchainLength; ++i)
{
_deviceResources->mainCommandBuffers[i] = _deviceResources->commandPool->allocateCommandBuffer();
_deviceResources->uiRendererCommandBuffers[i] = _deviceResources->commandPool->allocateSecondaryCommandBuffer();
_deviceResources->graphicsCommandBuffers[i] = _deviceResources->commandPool->allocateSecondaryCommandBuffer();
_deviceResources->computeCommandBuffers[i] = _deviceResources->commandPool->allocateSecondaryCommandBuffer();
_deviceResources->presentationSemaphores[i] = _deviceResources->device->createSemaphore();
_deviceResources->imageAcquiredSemaphores[i] = _deviceResources->device->createSemaphore();
_deviceResources->perFrameResourcesFences[i] = _deviceResources->device->createFence(pvrvk::FenceCreateFlags::e_SIGNALED_BIT);
}
// Allocate a single use command buffer to upload the texture to the GPU
pvrvk::CommandBuffer uploadBuffer = _deviceResources->commandPool->allocateCommandBuffer();
uploadBuffer->setObjectName("InitView : Resource Upload Command Buffer");
uploadBuffer->begin(pvrvk::CommandBufferUsageFlags::e_ONE_TIME_SUBMIT_BIT);
loadTextures(uploadBuffer);
uploadBuffer->end();
// Submit the image upload command buffer
pvrvk::SubmitInfo submit;
submit.commandBuffers = &uploadBuffer;
submit.numCommandBuffers = 1;
_deviceResources->queues[0]->submit(&submit, 1);
_deviceResources->queues[0]->waitIdle();
// Create the pipeline cache
_deviceResources->pipelineCache = _deviceResources->device->createPipelineCache();
createResources();
createPipelines();
updateResources();
_deviceResources->uiRenderer.init(getWidth(), getHeight(), isFullScreen(), _deviceResources->onScreenFramebuffer[0]->getRenderPass(), 0,
getBackBufferColorspace() == pvr::ColorSpace::sRGB, _deviceResources->commandPool, _deviceResources->queues[0]);
_deviceResources->uiRenderer.getDefaultTitle()->setText("GaussianBlur");
_deviceResources->uiRenderer.getDefaultTitle()->commitUpdates();
_deviceResources->uiRenderer.getDefaultDescription()->setText("Left: Original Texture\n"
"Right: Gaussian Blurred Texture");
_deviceResources->uiRenderer.getDefaultDescription()->commitUpdates();
recordCommandBuffer();
return pvr::Result::Success;
}
/// <summary>Code in releaseView() will be called by Shell when the application quits or before a change in the rendering context.</summary>
/// <returns>Result::Success if no error occurred.</returns>
pvr::Result VulkanGaussianBlur::releaseView()
{
_deviceResources.reset();
return pvr::Result::Success;
}
/// <summary>Code in quitApplication() will be called by Shell once per run, just before exiting the program.
/// quitApplication() will not be called every time the rendering context is lost, only before application exit.</summary>
/// <returns>Result::Success if no error occurred.</returns>
pvr::Result VulkanGaussianBlur::quitApplication() { return pvr::Result::Success; }
/// <summary>Main rendering loop function of the program. The shell will call this function every frame</summary>
/// <returns>Result::Success if no error occurred.</summary>
pvr::Result VulkanGaussianBlur::renderFrame()
{
_deviceResources->swapchain->acquireNextImage(uint64_t(-1), _deviceResources->imageAcquiredSemaphores[_frameId]);
const uint32_t swapchainIndex = _deviceResources->swapchain->getSwapchainIndex();
_deviceResources->perFrameResourcesFences[swapchainIndex]->wait();
_deviceResources->perFrameResourcesFences[swapchainIndex]->reset();
// Submit
pvrvk::SubmitInfo submitInfo;
pvrvk::PipelineStageFlags pipeWaitStageFlags = pvrvk::PipelineStageFlags::e_FRAGMENT_SHADER_BIT | pvrvk::PipelineStageFlags::e_COMPUTE_SHADER_BIT;
submitInfo.commandBuffers = &_deviceResources->mainCommandBuffers[swapchainIndex];
submitInfo.numCommandBuffers = 1;
submitInfo.waitSemaphores = &_deviceResources->imageAcquiredSemaphores[_frameId];
submitInfo.numWaitSemaphores = 1;
submitInfo.signalSemaphores = &_deviceResources->presentationSemaphores[_frameId];
submitInfo.numSignalSemaphores = 1;
submitInfo.waitDstStageMask = &pipeWaitStageFlags;
// Ping pong between multiple VkQueues
// It's important to realise that in Vulkan, pipeline barriers only observe their barriers within the VkQueue they are submitted to.
// This demo uses a Compute -> Fragment chain, which if left
// unattended can cause compute/graphics pipeline bubbles meaning we can quite easily hit into per frame workload serialisation as shown below:
// Compute Workload |1----| |2----|
// Fragment Workload |1----| |2---| |3---|
// The Compute -> Fragment pipeline used after our Compute pipeline stage for synchronising between the pipeline stages has further, less obvious unintended consequences
// in that when using only a single VkQueue this pipeline barrier enforces a barrier between all Compute work *before* the barrier and all Fragment work *after* the barrier.
// This barrier means that even though we can see compute pipeline bubbles that could potentially be interleaved with Fragment work the barrier enforces against this behaviour.
// This is where Vulkan really shines over OpenGL ES in terms of giving explicit control of work submission to the application.
// We make use of two Vulkan VkQueue objects which are submitted to in a ping-ponged fashion. Each VkQueue only needs to observe barriers used in command buffers which
// are submitted to them meaning there are no barriers enforced between the two sets of separate commands other than the presentation synchronisation logic.
// This simple change allows us to observe the following workload scheduling:
// Compute Workload |1----||2----||3----|
// Fragment Workload |1----||2----||3----||4----|
_deviceResources->queues[_queueIndex]->submit(&submitInfo, 1, _deviceResources->perFrameResourcesFences[swapchainIndex]);
if (this->shouldTakeScreenshot())
{
pvr::utils::takeScreenshot(_deviceResources->queues[_queueIndex], _deviceResources->commandPool, _deviceResources->swapchain, swapchainIndex, this->getScreenshotFileName(),
_deviceResources->vmaAllocator, _deviceResources->vmaAllocator);
}
// Present
pvrvk::PresentInfo presentInfo;
presentInfo.imageIndices = &swapchainIndex;
presentInfo.swapchains = &_deviceResources->swapchain;
presentInfo.numWaitSemaphores = 1;
presentInfo.waitSemaphores = &_deviceResources->presentationSemaphores[_frameId];
presentInfo.numSwapchains = 1;
// As above we must present using the same VkQueue as submitted to previously
_deviceResources->queues[_queueIndex]->present(presentInfo);
_frameId = (_frameId + 1) % _swapchainLength;
if (_useMultiQueue) { _queueIndex = (_queueIndex + 1) % 2; }
return pvr::Result::Success;
}
/// <summary>This function must be implemented by the user of the shell. The user should return its pvr::Shell object defining the behaviour of the application.</summary>
/// <returns>Return a unique ptr to the demo supplied by the user.</returns>
std::unique_ptr<pvr::Shell> pvr::newDemo() { return std::make_unique<VulkanGaussianBlur>(); }