Skip to content

Commit

Permalink
GPUクロックがアイドル状態まで低下し、エンコード速度が著しく低下するのを防止するオプションを追加。(--avoid-idle-clock)
Browse files Browse the repository at this point in the history
  • Loading branch information
rigaya committed Feb 20, 2024
1 parent 5e2a985 commit 031a959
Show file tree
Hide file tree
Showing 26 changed files with 499 additions and 12 deletions.
2 changes: 2 additions & 0 deletions QSVEnc/QSVEnc.en.lng
Original file line number Diff line number Diff line change
Expand Up @@ -578,6 +578,7 @@ AuofcgCBVppResize=Resize
AuofcgLBVppResize=x
AuofcgCBPsnr=psnr
AuofcgCBSsim=ssim
AuofcgCBAvoidIdleClock=Avoid Idle GPU Clock
AuotabPageExOpt=Others
AuofcgCBAuoTcfileout=Output timecode
AuofcgCBD3DMemAlloc=Video Memory Mode
Expand Down Expand Up @@ -779,6 +780,7 @@ AuofrmTTfcgNUInputBufSize=Input buffer size. (async depth)
AuofrmTTfcgCBD3DMemAlloc=Use D3D GPU memory to increase encode speed.
AuofrmTTfcgCBSsim=Calculate ssim of the encoded video.
AuofrmTTfcgCBPsnr=Calculate psnr of the encoded video.
AuofrmTTfcgCBAvoidIdleClock=Prevent GPU clock from going to idle clock during encoding, to avoid significant drop in encoding speed.
AuofrmTTfcgCBOutputAud=Insert Access Unit Delimiter NAL.
AuofrmTTfcgCBOutputPicStruct=Insert picture timing SEI.
AuofrmTTfcgCBDeblock=Enable H.264 deblock filter.
Expand Down
2 changes: 2 additions & 0 deletions QSVEnc/QSVEnc.ja.lng
Original file line number Diff line number Diff line change
Expand Up @@ -576,6 +576,7 @@ AuofcgCBVppResize=リサイズ
AuofcgLBVppResize=x
AuofcgCBPsnr=psnr
AuofcgCBSsim=ssim
AuofcgCBAvoidIdleClock=エンコード中、GPUがアイドルクロックになるのを防止
AuotabPageExOpt=その他
AuofcgCBAuoTcfileout=タイムコード出力
AuofcgCBD3DMemAlloc=ビデオメモリモード (QSV使用時推奨)
Expand Down Expand Up @@ -777,6 +778,7 @@ AuofrmTTfcgNUInputBufSize=入力バッファサイズを指定します。
AuofrmTTfcgCBD3DMemAlloc=GPUメモリを使用して高速化を図ります。
AuofrmTTfcgCBSsim=エンコード結果のSSIMを計算します。
AuofrmTTfcgCBPsnr=エンコード結果のPSNRを計算します。
AuofrmTTfcgCBAvoidIdleClock=エンコード中GPUクロックがアイドルクロックになるのを防止し、\nエンコード速度が大きく低下するのを防ぎます。
AuofrmTTfcgCBOutputAud=Access Unit Delimiter NALを挿入します。
AuofrmTTfcgCBOutputPicStruct=picture timing SEIを挿入します。
AuofrmTTfcgCBDeblock=H.264のデブロックフィルタを有効にします。
Expand Down
2 changes: 2 additions & 0 deletions QSVEnc/QSVEnc.zh.lng
Original file line number Diff line number Diff line change
Expand Up @@ -576,6 +576,7 @@ AuofcgCBVppResize=尺寸调整
AuofcgLBVppResize=x
AuofcgCBPsnr=峰值信噪比
AuofcgCBSsim=结构相似性
AuofcgCBAvoidIdleClock=避免闲置GPU时钟
AuotabPageExOpt=其他
AuofcgCBAuoTcfileout=导出时间码
AuofcgCBD3DMemAlloc=显存模式 (推荐使用QSV时启用)
Expand Down Expand Up @@ -777,6 +778,7 @@ AuofrmTTfcgNUInputBufSize=指定导入缓冲大小。
AuofrmTTfcgCBD3DMemAlloc=使用GPU显存提高速度。
AuofrmTTfcgCBSsim=计算编码结果的结构相似性。
AuofrmTTfcgCBPsnr=计算编码结果的峰值信噪比。
AuofrmTTfcgCBAvoidIdleClock=防止 GPU 时钟在编码期间进入空闲时钟,以避免编码速度大幅下降。
AuofrmTTfcgCBOutputAud=插入访问单元分割符NAL单元。
AuofrmTTfcgCBOutputPicStruct=插入图像时序SEI信息。
AuofrmTTfcgCBDeblock=启用H.264去块过滤器。
Expand Down
4 changes: 4 additions & 0 deletions QSVEnc/QSVEnc_readme.txt
Original file line number Diff line number Diff line change
Expand Up @@ -313,6 +313,10 @@ API v1.1 … Intel Media SDK v2.0


【どうでもいいメモ】
2024.02.20 (7.61)
- Resizable BARが無効の状態でArc GPUでエンコードした際に、GPUクロックが低下し
エンコードが著しく遅くなる問題を回避。(--avoid-idle-clock)

2024.02.16 (7.60)
- 7.59で、getWorkSurfに失敗することがある問題を修正。
- 7.56以降で--disable-d3dが正常に動作しなかった問題を修正。
Expand Down
2 changes: 2 additions & 0 deletions QSVEnc/frm/auo_mes.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -630,6 +630,7 @@ static const char * AUO_MES_ID_NAME_STR[] = {
"AuofcgLBVppResize",
"AuofcgCBPsnr",
"AuofcgCBSsim",
"AuofcgCBAvoidIdleClock",
"AuotabPageExOpt",
"AuofcgCBAuoTcfileout",
"AuofcgLBInputBufSize",
Expand Down Expand Up @@ -826,6 +827,7 @@ static const char * AUO_MES_ID_NAME_STR[] = {
"AuofrmTTfcgCBD3DMemAlloc",
"AuofrmTTfcgCBSsim",
"AuofrmTTfcgCBPsnr",
"AuofrmTTfcgCBAvoidIdleClock",
"AuofrmTTfcgCBOutputAud",
"AuofrmTTfcgCBOutputPicStruct",
"AuofrmTTfcgCBDeblock",
Expand Down
2 changes: 2 additions & 0 deletions QSVEnc/frm/auo_mes.h
Original file line number Diff line number Diff line change
Expand Up @@ -713,6 +713,7 @@ enum AuoMes {
AuofcgLBVppResize,
AuofcgCBPsnr,
AuofcgCBSsim,
AuofcgCBAvoidIdleClock,
AuotabPageExOpt,
AuofcgCBAuoTcfileout,
AuofcgLBInputBufSize,
Expand Down Expand Up @@ -918,6 +919,7 @@ enum AuoMes {
AuofrmTTfcgCBD3DMemAlloc,
AuofrmTTfcgCBSsim,
AuofrmTTfcgCBPsnr,
AuofrmTTfcgCBAvoidIdleClock,
AuofrmTTfcgCBOutputAud,
AuofrmTTfcgCBOutputPicStruct,
AuofrmTTfcgCBDeblock,
Expand Down
4 changes: 4 additions & 0 deletions QSVEnc/frm/frmConfig.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1646,6 +1646,7 @@ System::Void frmConfig::LoadLangText() {
LOAD_CLI_TEXT(fcgLBVppResize);
LOAD_CLI_TEXT(fcgCBPsnr);
LOAD_CLI_TEXT(fcgCBSsim);
LOAD_CLI_TEXT(fcgCBAvoidIdleClock);
LOAD_CLI_TEXT(tabPageExOpt);
LOAD_CLI_TEXT(fcgCBAuoTcfileout);
LOAD_CLI_TEXT(fcgLBInputBufSize);
Expand Down Expand Up @@ -1919,6 +1920,7 @@ System::Void frmConfig::ConfToFrm(CONF_GUIEX *cnf) {

fcgCBSsim->Checked = prm_qsv.common.metric.ssim;
fcgCBPsnr->Checked = prm_qsv.common.metric.psnr;
fcgCBAvoidIdleClock->Checked = prm_qsv.ctrl.avoidIdleClock.mode != RGYParamAvoidIdleClockMode::Disabled;

//SetCXIndex(fcgCXX264Priority, cnf->vid.priority);
const bool enable_tc2mp4_muxer = (0 != str_has_char(sys_dat->exstg->s_mux[MUXER_TC2MP4].base_cmd));
Expand Down Expand Up @@ -2182,6 +2184,7 @@ System::String^ frmConfig::FrmToConf(CONF_GUIEX *cnf) {

prm_qsv.common.metric.ssim = fcgCBSsim->Checked;
prm_qsv.common.metric.psnr = fcgCBPsnr->Checked;
prm_qsv.ctrl.avoidIdleClock.mode = fcgCBAvoidIdleClock->Checked ? RGYParamAvoidIdleClockMode::Auto : RGYParamAvoidIdleClockMode::Disabled;

//拡張部
const bool enable_tc2mp4_muxer = (0 != str_has_char(sys_dat->exstg->s_mux[MUXER_TC2MP4].base_cmd));
Expand Down Expand Up @@ -2457,6 +2460,7 @@ System::Void frmConfig::SetHelpToolTips() {
SET_TOOL_TIP_EX(fcgCBD3DMemAlloc);
SET_TOOL_TIP_EX(fcgCBSsim);
SET_TOOL_TIP_EX(fcgCBPsnr);
SET_TOOL_TIP_EX(fcgCBAvoidIdleClock);
SET_TOOL_TIP_EX(fcgCBOutputAud);
SET_TOOL_TIP_EX(fcgCBOutputPicStruct);
SET_TOOL_TIP_EX(fcgCBDeblock);
Expand Down
14 changes: 14 additions & 0 deletions QSVEnc/frm/frmConfig.h
Original file line number Diff line number Diff line change
Expand Up @@ -1233,6 +1233,7 @@ private: System::Windows::Forms::Label^ fcgLBVppDenoiseDctBlockSize;
private: System::Windows::Forms::Label^ fcgLBVppDenoiseDctSigma;
private: System::Windows::Forms::ComboBox^ fcgCXVppDenoiseDctStep;
private: System::Windows::Forms::Label^ fcgLBVppDenoiseDctStep;
private: System::Windows::Forms::CheckBox^ fcgCBAvoidIdleClock;



Expand Down Expand Up @@ -1813,6 +1814,7 @@ private: System::Windows::Forms::Label^ fcgLBVppDenoiseDctStep;
this->fcgLBAudioPriority = (gcnew System::Windows::Forms::Label());
this->fcgTXCmd = (gcnew System::Windows::Forms::TextBox());
this->fcgPNHideToolStripBorder = (gcnew System::Windows::Forms::Panel());
this->fcgCBAvoidIdleClock = (gcnew System::Windows::Forms::CheckBox());
this->fcgtoolStripSettings->SuspendLayout();
this->fcgtabControlMux->SuspendLayout();
this->fcgtabPageMP4->SuspendLayout();
Expand Down Expand Up @@ -5642,6 +5644,7 @@ private: System::Windows::Forms::Label^ fcgLBVppDenoiseDctStep;
//
// tabPageExOpt
//
this->tabPageExOpt->Controls->Add(this->fcgCBAvoidIdleClock);
this->tabPageExOpt->Controls->Add(this->fcgCBPsnr);
this->tabPageExOpt->Controls->Add(this->fcgCBSsim);
this->tabPageExOpt->Controls->Add(this->fcgCBOutputPicStruct);
Expand Down Expand Up @@ -6708,6 +6711,17 @@ private: System::Windows::Forms::Label^ fcgLBVppDenoiseDctStep;
this->fcgPNHideToolStripBorder->TabIndex = 90;
this->fcgPNHideToolStripBorder->Visible = false;
//
// fcgCBAvoidIdleClock
//
this->fcgCBAvoidIdleClock->AutoSize = true;
this->fcgCBAvoidIdleClock->Location = System::Drawing::Point(18, 217);
this->fcgCBAvoidIdleClock->Name = L"fcgCBAvoidIdleClock";
this->fcgCBAvoidIdleClock->Size = System::Drawing::Size(245, 18);
this->fcgCBAvoidIdleClock->TabIndex = 108;
this->fcgCBAvoidIdleClock->Tag = L"reCmd";
this->fcgCBAvoidIdleClock->Text = L"エンコード中、GPUがアイドルクロックになるのを防止";
this->fcgCBAvoidIdleClock->UseVisualStyleBackColor = true;
//
// frmConfig
//
this->AutoScaleDimensions = System::Drawing::SizeF(96, 96);
Expand Down
2 changes: 2 additions & 0 deletions QSVEncC/QSVEncC_version.rc
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,8 @@ RGY_FILTER_CLRNG_MRG31K3P_PRIVATE_CH EXE_DATA DISCARDABLE "..\\clRNG\\src\\inclu

RGY_FILTER_SSIM_CL EXE_DATA DISCARDABLE "..\\QSVPipeline\\rgy_filter_ssim.cl"

RGY_DUMMY_LOAD_CL EXE_DATA DISCARDABLE "..\\QSVPipeline\\rgy_dummy_load.cl"

PERF_MONITOR_PYW PERF_MONITOR_SRC DISCARDABLE "..\\PerfMonitor\\perf_monitor.pyw"

APP_OSCODEPAGE_MANIFEST EXE_DATA DISCARDABLE "QSVEncC.oscodepage.manifest"
Expand Down
23 changes: 23 additions & 0 deletions QSVEncC_Options.en.md
Original file line number Diff line number Diff line change
Expand Up @@ -255,6 +255,7 @@
- [--thread-throttling \[\<string1\>=\]\<string2\>\[#\<int\>\[:\<int\>\]\[\]...\]](#--thread-throttling-string1string2intint)
- [--option-file \<string\>](#--option-file-string)
- [--max-procfps \<int\>](#--max-procfps-int)
- [--avoid-idle-clock \<string\>\[=\<float\>\]](#--avoid-idle-clock-stringfloat)
- [--lowlatency](#--lowlatency)
- [--avsdll \<string\>](#--avsdll-string)
- [--process-codepage \<string\> \[Windows OS only\]](#--process-codepage-string-windows-os-only)
Expand Down Expand Up @@ -2749,6 +2750,28 @@ This could be used when you want to encode multiple stream and you do not want o
--max-procfps 90
```

### --avoid-idle-clock &lt;string&gt;[=&lt;float&gt;]
Add slight light load to prevent GPU clock from going to idle clock during encoding, thus preventing significant drop in encoding speed. The option value is target dummy load percentage. Requires OpenCL.

- **mode** (&lt;string&gt;)
- off
- auto [default]
- on

"auto" will run this function only when some conditions which makes this function appropriate, such as "dGPU used" and "OpenCL filter unused".

- **value** (&lt;int&gt;)
target load to be added. Default is 0.01% utilization.

- examples
```
Example: disable this feature
--avoid-idle-clock off
Example: always use the feature, and change target load to 0.02%
--avoid-idle-clock on=0.02
```

### --lowlatency
Tune for lower transcoding latency, but will hurt transcoding throughput. Not recommended in most cases.

Expand Down
23 changes: 23 additions & 0 deletions QSVEncC_Options.ja.md
Original file line number Diff line number Diff line change
Expand Up @@ -255,6 +255,7 @@
- [--benchmark \<string\>](#--benchmark-string)
- [--bench-quality "all" or \<int\>\[,\<int\>\]...](#--bench-quality-all-or-intint)
- [--max-procfps \<int\>](#--max-procfps-int)
- [--avoid-idle-clock \<string\>\[=\<float\>\]](#--avoid-idle-clock-stringfloat)
- [--lowlatency](#--lowlatency)
- [--avsdll \<string\>](#--avsdll-string)
- [--process-codepage \<string\>](#--process-codepage-string)
Expand Down Expand Up @@ -2812,6 +2813,28 @@ avsw/avhw読み込み時のデバッグ情報出力。
--max-procfps 90
```

### --avoid-idle-clock &lt;string&gt;[=&lt;float&gt;]
わずかな軽い負荷をかけ、エンコード中GPUクロックがアイドルクロックになるのを防止し、エンコード速度が大きく低下するのを防ぐ。OpenCLが必要。

- **mode** (&lt;string&gt;)
- off
- auto [default]
- on

"auto"は、「dGPU使用」かつ「OpenCLフィルタを使用しない」など、本機能が有効と思われるいくつかの条件を満たすときのみ本機能を動作させる。

- **value** (&lt;int&gt;)
省略可。目標とするダミー負荷のパーセント。値を省略した時は、0.01%程度のわずかな負荷をかける。

- 使用例
```
Example: この機能を無効に
--avoid-idle-clock off
Example: この機能を常に使用し、目標負荷を0.02%に変更する例
--avoid-idle-clock on=0.02
```

### --lowlatency
エンコード遅延を低減するモード。最大エンコード速度(スループット)は低下するので、通常は不要。

Expand Down
8 changes: 8 additions & 0 deletions QSVPipeline/QSVPipeline.vcxproj
Original file line number Diff line number Diff line change
Expand Up @@ -602,6 +602,12 @@ if exist rgy_rev.h.%PID%.tmp del rgy_rev.h.%PID%.tmp &gt; nul 2&gt;&amp;1</Comma
<ClCompile Include="rgy_cmd.cpp" />
<ClCompile Include="rgy_codepage.cpp" />
<ClCompile Include="rgy_def.cpp" />
<ClCompile Include="rgy_dummy_load.cpp">
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
</ClCompile>
<ClCompile Include="rgy_env.cpp" />
<ClCompile Include="rgy_event.cpp">
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</ExcludedFromBuild>
Expand Down Expand Up @@ -999,6 +1005,7 @@ if exist rgy_rev.h.%PID%.tmp del rgy_rev.h.%PID%.tmp &gt; nul 2&gt;&amp;1</Comma
<ClInclude Include="rgy_cmd.h" />
<ClInclude Include="rgy_codepage.h" />
<ClInclude Include="rgy_def.h" />
<ClInclude Include="rgy_dummy_load.h" />
<ClInclude Include="rgy_env.h" />
<ClInclude Include="rgy_event.h" />
<ClInclude Include="rgy_faw.h" />
Expand Down Expand Up @@ -1066,6 +1073,7 @@ if exist rgy_rev.h.%PID%.tmp del rgy_rev.h.%PID%.tmp &gt; nul 2&gt;&amp;1</Comma
<ClInclude Include="rgy_wav_parser.h" />
</ItemGroup>
<ItemGroup>
<None Include="rgy_dummy_load.cl" />
<None Include="rgy_filter.cl" />
<None Include="rgy_filter_afs_analyze.cl" />
<None Include="rgy_filter_afs_filter.cl" />
Expand Down
9 changes: 9 additions & 0 deletions QSVPipeline/QSVPipeline.vcxproj.filters
Original file line number Diff line number Diff line change
Expand Up @@ -345,6 +345,9 @@
<ClCompile Include="rgy_frame_info.cpp">
<Filter>ソース ファイル</Filter>
</ClCompile>
<ClCompile Include="rgy_dummy_load.cpp">
<Filter>ソース ファイル</Filter>
</ClCompile>
</ItemGroup>
<ItemGroup>
<ClInclude Include="qsv_prm.h">
Expand Down Expand Up @@ -659,6 +662,9 @@
<ClInclude Include="rgy_filter_cl.h">
<Filter>ヘッダー ファイル</Filter>
</ClInclude>
<ClInclude Include="rgy_dummy_load.h">
<Filter>ヘッダー ファイル</Filter>
</ClInclude>
</ItemGroup>
<ItemGroup>
<None Include="rgy_filter.cl">
Expand Down Expand Up @@ -751,5 +757,8 @@
<None Include="rgy_filter_denoise_dct.cl">
<Filter>ソース ファイル</Filter>
</None>
<None Include="rgy_dummy_load.cl">
<Filter>ソース ファイル</Filter>
</None>
</ItemGroup>
</Project>
6 changes: 6 additions & 0 deletions QSVPipeline/qsv_device.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -143,6 +143,12 @@ QSV_CPU_GEN QSVDevice::CPUGen() {
return getCPUGen(&m_session);
}

int QSVDevice::adapterType() {
mfxPlatform platform = { 0 };
m_session.QueryPlatform(&platform);
return platform.MediaAdapterType;
}

LUID QSVDevice::luid() {
return (m_hwdev) ? m_hwdev->GetLUID() : LUID();
}
Expand Down
1 change: 1 addition & 0 deletions QSVPipeline/qsv_device.h
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ class QSVDevice {
tstring name() const;
LUID luid();
QSV_CPU_GEN CPUGen();
int adapterType();

QSVDeviceNum deviceNum() const { return m_devNum; };
MemType memType() const { return m_memType; };
Expand Down
Loading

0 comments on commit 031a959

Please sign in to comment.