diff --git a/auoCommon/auoCommon.vcxproj b/auoCommon/auoCommon.vcxproj new file mode 100644 index 0000000..dfc6d9d --- /dev/null +++ b/auoCommon/auoCommon.vcxproj @@ -0,0 +1,205 @@ + + + + + Debug + Win32 + + + Release + Win32 + + + Debug + x64 + + + Release + x64 + + + + + + + + AdvancedVectorExtensions2 + AdvancedVectorExtensions2 + + + + AdvancedVectorExtensions2 + AdvancedVectorExtensions2 + + + + + + + + + + + + + + + + + + + + + 16.0 + Win32Proj + {c7c2269e-87f0-45d4-a5f3-2c6c158d65c5} + auoCommon + 10.0 + + + + StaticLibrary + true + $(DefaultPlatformToolset) + MultiByte + + + StaticLibrary + false + $(DefaultPlatformToolset) + true + MultiByte + + + StaticLibrary + true + $(DefaultPlatformToolset) + MultiByte + + + StaticLibrary + false + $(DefaultPlatformToolset) + true + MultiByte + + + + + + + + + + + + + + + + + + + + + $(SolutionDir)$(Configuration)\$(ProjectName)\temp\ + + + $(SolutionDir)$(Configuration)\$(ProjectName)\temp\ + + + $(SolutionDir)$(Configuration)\ + $(SolutionDir)$(Configuration)\$(ProjectName)\temp\ + + + $(SolutionDir)$(Configuration)\ + $(SolutionDir)$(Configuration)\$(ProjectName)\temp\ + + + + Level4 + false + WIN32;_DEBUG;_LIB;%(PreprocessorDefinitions) + true + + + stdcpp17 + true + .\; + 4505;4564;4091; + + + + + true + + + + + Level4 + true + true + false + WIN32;NDEBUG;_LIB;%(PreprocessorDefinitions) + true + + + stdcpp17 + true + .\; + 4505;4564;4091; + + + + + true + true + true + + + + + Level4 + false + _DEBUG;_LIB;%(PreprocessorDefinitions) + true + Use + pch.h + stdcpp17 + true + .\; + 4505;4564;4091; + + + + + true + + + + + Level4 + true + true + false + NDEBUG;_LIB;%(PreprocessorDefinitions) + true + + + stdcpp17 + true + .\; + 4505;4564;4091; + + + + + true + true + true + + + + + + \ No newline at end of file diff --git a/auoCommon/auoCommon.vcxproj.filters b/auoCommon/auoCommon.vcxproj.filters new file mode 100644 index 0000000..cadac01 --- /dev/null +++ b/auoCommon/auoCommon.vcxproj.filters @@ -0,0 +1,84 @@ + + + + + {4FC737F1-C7A5-4376-A066-2A32D752A2FF} + cpp;c;cc;cxx;c++;cppm;ixx;def;odl;idl;hpj;bat;asm;asmx + + + {93995380-89BD-4b04-88EB-625FBE52EBFB} + h;hh;hpp;hxx;h++;hm;inl;inc;ipp;xsd + + + {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} + rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms + + + + + ソース ファイル + + + ソース ファイル + + + ソース ファイル + + + ソース ファイル + + + ソース ファイル + + + ソース ファイル + + + ソース ファイル + + + ソース ファイル + + + ソース ファイル + + + ソース ファイル + + + + + ヘッダー ファイル + + + ヘッダー ファイル + + + ヘッダー ファイル + + + ヘッダー ファイル + + + ヘッダー ファイル + + + ヘッダー ファイル + + + ヘッダー ファイル + + + ヘッダー ファイル + + + ヘッダー ファイル + + + ヘッダー ファイル + + + ヘッダー ファイル + + + \ No newline at end of file diff --git a/auoCommon/cpu_info.cpp b/auoCommon/cpu_info.cpp new file mode 100644 index 0000000..f71cdad --- /dev/null +++ b/auoCommon/cpu_info.cpp @@ -0,0 +1,1033 @@ +// ----------------------------------------------------------------------------------------- +// QSVEnc/NVEnc/VCEEnc/rkmppenc by rigaya +// ----------------------------------------------------------------------------------------- +// The MIT License +// +// Copyright (c) 2011-2020 rigaya +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. +// +// -------------------------------------------------------------------------------------------- + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "rgy_tchar.h" +#if defined(_M_IX86) || defined(_M_X64) || defined(__x86_64) +#ifdef _MSC_VER +#include +#else +#include +#endif +#include +#endif //#if defined(_M_IX86) || defined(_M_X64) || defined(__x86_64) +#include "rgy_osdep.h" +#include "rgy_arch.h" +#include "rgy_util.h" +#include "cpu_info.h" +#if ENCODER_QSV +#include "qsv_query.h" +#endif + + +#pragma warning(push) +#pragma warning(disable: 4127) //warning C4127: 条件式が定数です。 +static inline int CountSetBits(size_t bits_) { + if (sizeof(size_t) > 4) { + uint64_t bits = (uint64_t)bits_; + bits = (bits & 0x5555555555555555) + (bits >> 1 & 0x5555555555555555); + bits = (bits & 0x3333333333333333) + (bits >> 2 & 0x3333333333333333); + bits = (bits & 0x0f0f0f0f0f0f0f0f) + (bits >> 4 & 0x0f0f0f0f0f0f0f0f); + bits = (bits & 0x00ff00ff00ff00ff) + (bits >> 8 & 0x00ff00ff00ff00ff); + bits = (bits & 0x0000ffff0000ffff) + (bits >> 16 & 0x0000ffff0000ffff); + bits = (bits & 0x00000000ffffffff) + (bits >> 32 & 0x00000000ffffffff); + return (int)bits; + } else { + uint32_t bits = (uint32_t)bits_; + bits = (bits & 0x55555555) + (bits >> 1 & 0x55555555); + bits = (bits & 0x33333333) + (bits >> 2 & 0x33333333); + bits = (bits & 0x0f0f0f0f) + (bits >> 4 & 0x0f0f0f0f); + bits = (bits & 0x00ff00ff) + (bits >> 8 & 0x00ff00ff); + bits = (bits & 0x0000ffff) + (bits >>16 & 0x0000ffff); + return (int)bits; + } +} +#pragma warning(pop) + +#if (defined(_M_ARM64) || defined(__aarch64__) || defined(__arm64__) || defined(__ARM_ARCH)) + +std::map getCPUPartARM() { + std::map cpu_architecture; + std::map cpu_variant; + std::map cpu_part; + + std::ifstream inputFile("/proc/cpuinfo"); + std::istreambuf_iterator data_begin(inputFile); + std::istreambuf_iterator data_end; + std::string script_data = std::string(data_begin, data_end); + inputFile.close(); + + int processorID = -1; + + for (auto line : split(script_data, "\n")) { + auto pos = line.find("processor"); + if (pos != std::string::npos) { + int i = 0; + if (1 == sscanf(line.substr(line.find(":") + 1).c_str(), " %d", &i)) { + processorID = i; + } + continue; + } + pos = line.find("CPU architecture"); + if (pos != std::string::npos) { + int i = 0; + if (1 == sscanf(line.substr(line.find(":") + 1).c_str(), " %d", &i)) { + if (cpu_architecture.count(i) == 0) { + cpu_architecture[i] = 0; + } + cpu_architecture[i] |= (1llu << processorID); + } + continue; + } + pos = line.find("CPU variant"); + if (pos != std::string::npos) { + uint32_t i = 0; + if (1 == sscanf(line.substr(line.find(":") + 1).c_str(), " 0x%x", &i)) { + if (cpu_variant.count(i) == 0) { + cpu_variant[i] = 0; + } + cpu_variant[i] |= (1llu << processorID); + } + continue; + } + pos = line.find("CPU part"); + if (pos != std::string::npos) { + uint32_t i = 0; + if (1 == sscanf(line.substr(line.find(":") + 1).c_str(), " 0x%x", &i)) { + if (cpu_part.count(i) == 0) { + cpu_part[i] = 0; + } + cpu_part[i] |= (1llu << processorID); + } + continue; + } + } + return cpu_part; +} + +std::string getCPUNameARM() { + std::unordered_map cpu_architecture; + std::unordered_map cpu_variant; + std::unordered_map cpu_part; + + std::ifstream inputFile("/proc/cpuinfo"); + std::istreambuf_iterator data_begin(inputFile); + std::istreambuf_iterator data_end; + std::string script_data = std::string(data_begin, data_end); + inputFile.close(); + + for (auto line : split(script_data, "\n")) { + auto pos = line.find("CPU architecture"); + if (pos != std::string::npos) { + int i = 0; + if (1 == sscanf(line.substr(line.find(":") + 1).c_str(), " %d", &i)) { + if (cpu_architecture.count(i) == 0) { + cpu_architecture[i] = 1; + } else { + cpu_architecture[i]++; + } + } + continue; + } + pos = line.find("CPU variant"); + if (pos != std::string::npos) { + uint32_t i = 0; + if (1 == sscanf(line.substr(line.find(":") + 1).c_str(), " 0x%x", &i)) { + if (cpu_variant.count(i) == 0) { + cpu_variant[i] = 1; + } else { + cpu_variant[i]++; + } + } + continue; + } + pos = line.find("CPU part"); + if (pos != std::string::npos) { + uint32_t i = 0; + if (1 == sscanf(line.substr(line.find(":") + 1).c_str(), " 0x%x", &i)) { + if (cpu_part.count(i) == 0) { + cpu_part[i] = 1; + } else { + cpu_part[i]++; + } + } + continue; + } + } + + std::string name; + if (cpu_part.size() > 0) { + for (auto& [part, count] : cpu_part) { + //https://en.wikipedia.org/wiki/Comparison_of_ARM_processors#ARMv8-A + const char *part_name = nullptr; + switch (part) { + case 0xD01: part_name = "Cortex-A32"; break; + case 0xD02: part_name = "Cortex-A34"; break; + case 0xD03: part_name = "Cortex-A53"; break; + case 0xD04: part_name = "Cortex-A35"; break; + case 0xD05: part_name = "Cortex-A55"; break; + case 0xD06: part_name = "Cortex-A65"; break; + case 0xD07: part_name = "Cortex-A57"; break; + case 0xD08: part_name = "Cortex-A72"; break; + case 0xD09: part_name = "Cortex-A73"; break; + case 0xD0A: part_name = "Cortex-A75"; break; + case 0xD0B: part_name = "Cortex-A76"; break; + case 0xD0D: part_name = "Cortex-A77"; break; + case 0xD0E: part_name = "Cortex-A76AE"; break; + case 0xD41: part_name = "Cortex-A78"; break; + case 0xD43: part_name = "Cortex-A65AE"; break; + case 0xD44: part_name = "Cortex-X1"; break; + } + if (part_name) { + if (name.length() > 0) { + name += " + "; + } + name += strsprintf("%sx%d", part_name, count); + } + } + } + return name; +} +#endif + +int getCPUName(char *buffer, size_t nSize) { +#if defined(_M_IX86) || defined(_M_X64) || defined(__x86_64) + int CPUInfo[4] = {-1}; + __cpuid(CPUInfo, 0x80000000); + unsigned int nExIds = CPUInfo[0]; + if (nSize < 0x40) + return 1; + + memset(buffer, 0, 0x40); + for (unsigned int i = 0x80000000; i <= nExIds; i++) { + __cpuid(CPUInfo, i); + int offset = 0; + switch (i) { + case 0x80000002: offset = 0; break; + case 0x80000003: offset = 16; break; + case 0x80000004: offset = 32; break; + default: + continue; + } + memcpy(buffer + offset, CPUInfo, sizeof(CPUInfo)); + } + auto remove_string =[](char *target_str, const char *remove_str) { + char *ptr = strstr(target_str, remove_str); + if (nullptr != ptr) { + memmove(ptr, ptr + strlen(remove_str), (strlen(ptr) - strlen(remove_str) + 1) * sizeof(target_str[0])); + } + }; + remove_string(buffer, "(R)"); + remove_string(buffer, "(TM)"); + remove_string(buffer, "CPU"); + //crop space beforce string + for (int i = 0; buffer[i]; i++) { + if (buffer[i] != ' ') { + if (i) + memmove(buffer, buffer + i, strlen(buffer + i) + 1); + break; + } + } + //remove space which continues. + for (int i = 0; buffer[i]; i++) { + if (buffer[i] == ' ') { + int space_idx = i; + while (buffer[i+1] == ' ') + i++; + if (i != space_idx) + memmove(buffer + space_idx + 1, buffer + i + 1, strlen(buffer + i + 1) + 1); + } + } + //delete last blank + if (0 < strlen(buffer)) { + char *last_ptr = buffer + strlen(buffer) - 1; + if (' ' == *last_ptr) + *last_ptr = '\0'; + } + return 0; +#else + std::string arch; + std::string name; + memset(buffer, 0, 0x40); + FILE *fp = NULL; + const char *cmdline = "lscpu"; + if ((fp = popen(cmdline, "r")) == NULL) { + return 1; + } + char buf[1024]; + while (!feof(fp)) { + if (fgets(buf, sizeof(buf), fp) == nullptr) { + break; + } + if (strstr(buf, "Architecture:") != nullptr) { + //改行の削除 + char *ptr = buf + strlen(buf) - 1; + if (*ptr == '\n') *ptr = '\0'; + //Architectureの部分の取得 + ptr = buf + strlen("Architecture:"); + while (*ptr == ' ') + ptr++; + arch = ptr; + } + if (strstr(buf, "Model name:") != nullptr) { + //改行の削除 + char *ptr = buf + strlen(buf) - 1; + if (*ptr == '\n') *ptr = '\0'; + //Model nameの部分の取得 + ptr = buf + strlen("Model name:"); + while (*ptr == ' ') + ptr++; + name = ptr; + } + } +#if (defined(_M_ARM64) || defined(__aarch64__) || defined(__arm64__) || defined(__ARM_ARCH)) + if (name.length() == 0) { + name = getCPUNameARM(); + } +#endif + sprintf(buffer, "%s %s", name.c_str(), arch.c_str()); + return 0; +#endif +} + +bool getCPUHybridMasks(cpu_info_t *info) { + info->maskSystem = 0; + info->maskCoreP = 0; + info->maskCoreE = 0; +#if _MSC_VER + DWORD_PTR maskProcess = 0; + DWORD_PTR maskSysAff = 0; + if (GetProcessAffinityMask(GetCurrentProcess(), &maskProcess, &maskSysAff) == 0) { + info->maskSystem = 0; + info->maskCoreP = 0; + info->maskCoreE = 0; + for (uint64_t i = 0; i < info->logical_cores; i++) { + info->maskSystem |= (1llu << i); + } + return false; + } + info->maskSystem = maskSysAff; + const auto threadCount = CountSetBits(info->maskSystem); +#else + const auto threadCount = info->physical_cores; +#endif +#if defined(__x86__) || defined(__x86_64__) || defined(_M_X86) || defined(_M_IX86) || defined(_M_X64) + const auto hThread = GetCurrentThread(); + size_t maskOriginal = 0; + for (int ith = 0; ith < threadCount; ith++) { + const auto maskTarget = (size_t)1u << ith; + auto maskPrev = SetThreadAffinityMask(hThread, maskTarget); + if (maskOriginal == 0) { + maskOriginal = maskPrev; + } + std::this_thread::sleep_for(std::chrono::microseconds(0)); + int CPUInfo[4] = { 0 }; + __cpuid(CPUInfo, 0x1A); + const auto hybridInfo = CPUInfo[0 /*EAX*/] >> 24; + if (hybridInfo == 0x20) { + info->maskCoreE |= maskTarget; + } else if (hybridInfo == 0x40) { + info->maskCoreP |= maskTarget; + } + } + SetThreadAffinityMask(hThread, maskOriginal); // 元に戻す + + info->physical_cores_e = 0; + info->physical_cores_p = 0; + for (int i = 0; i < info->physical_cores; i++) { + const auto maskTarget = info->proc_list[i].mask; + if (info->maskCoreP & maskTarget) { + info->physical_cores_p++; + } else if (info->maskCoreE & maskTarget) { + info->physical_cores_e++; + } + } +#endif //#if defined(__x86__) || defined(__x86_64__) || defined(_M_X86) || defined(_M_X64) +#if (defined(_M_ARM64) || defined(__aarch64__) || defined(__arm64__) || defined(__ARM_ARCH)) + const auto cpu_part = getCPUPartARM(); + if (cpu_part.size() > 1) { + for (auto it = cpu_part.begin(); it != cpu_part.end(); it++) { + if (it == cpu_part.begin()) { //partが一番小さいのがEcore + info->maskCoreE |= it->second; + } else { + info->maskCoreP |= it->second; + } + } + } + + info->physical_cores_e = 0; + info->physical_cores_p = 0; + for (int i = 0; i < info->physical_cores; i++) { + const auto maskTarget = info->proc_list[i].mask; + if (info->maskCoreP & maskTarget) { + info->physical_cores_p++; + } else if (info->maskCoreE & maskTarget) { + info->physical_cores_e++; + } + } +#endif + return true; +} + +#if _MSC_VER +static int getCPUName(wchar_t *buffer, size_t nSize) { + int ret = 0; + char *buf = (char *)calloc(nSize, sizeof(char)); + if (NULL == buf) { + buffer[0] = L'\0'; + ret = 1; + } else { + if (0 == (ret = getCPUName(buf, nSize))) { + if (MultiByteToWideChar(CP_ACP, 0, buf, -1, buffer, (DWORD)nSize) == 0) { + buffer[0] = L'\0'; + ret = 1; + } + } + free(buf); + } + return ret; +} +#endif //#if _MSC_VER + +double getCPUDefaultClockFromCPUName() { + double defaultClock = 0.0; + TCHAR buffer[1024] = { 0 }; + getCPUName(buffer, _countof(buffer)); + TCHAR *ptr_mhz = _tcsstr(buffer, _T("MHz")); + TCHAR *ptr_ghz = _tcsstr(buffer, _T("GHz")); + TCHAR *ptr = _tcschr(buffer, _T('@')); + bool clockInfoAvailable = (NULL != ptr_mhz || ptr_ghz != NULL) && NULL != ptr; + if (clockInfoAvailable && 1 == _stscanf_s(ptr+1, _T("%lf"), &defaultClock)) { + return defaultClock * ((NULL == ptr_ghz) ? 1000.0 : 1.0); + } + return 0.0; +} + +#if defined(_WIN32) || defined(_WIN64) + +typedef BOOL (WINAPI *LPFN_GLPI)(PSYSTEM_LOGICAL_PROCESSOR_INFORMATION, PDWORD); + +bool get_cpu_info(cpu_info_t *cpu_info) { + if (cpu_info == nullptr) + return false; + + static cpu_info_t s_cpu_info = { 0 }; + if (s_cpu_info.physical_cores > 0) { + *cpu_info = s_cpu_info; + return true; + } + + s_cpu_info = cpu_info_t({ 0 }); + + LPFN_GLPI glpi = (LPFN_GLPI)GetProcAddress(GetModuleHandle(_T("kernel32")), "GetLogicalProcessorInformation"); + if (nullptr == glpi) + return false; + + DWORD returnLength = 0; + PSYSTEM_LOGICAL_PROCESSOR_INFORMATION buffer = nullptr; + while (FALSE == glpi(buffer, &returnLength)) { + if (GetLastError() == ERROR_INSUFFICIENT_BUFFER) { + if (buffer) + free(buffer); + if (NULL == (buffer = (PSYSTEM_LOGICAL_PROCESSOR_INFORMATION)malloc(returnLength))) + return FALSE; + } + } + + DWORD processorPackageCount = 0; + PSYSTEM_LOGICAL_PROCESSOR_INFORMATION ptr = buffer; + for (DWORD byteOffset = 0; byteOffset + sizeof(SYSTEM_LOGICAL_PROCESSOR_INFORMATION) <= returnLength; + byteOffset += sizeof(SYSTEM_LOGICAL_PROCESSOR_INFORMATION)) { + switch (ptr->Relationship) { + case RelationNumaNode: + // Non-NUMA systems report a single record of this type. + s_cpu_info.nodes[s_cpu_info.node_count++].mask = ptr->ProcessorMask; + break; + case RelationProcessorCore: { + auto& proc = s_cpu_info.proc_list[s_cpu_info.physical_cores]; + proc.core_id = s_cpu_info.physical_cores; + proc.processor_id = s_cpu_info.physical_cores; + proc.logical_cores = CountSetBits(ptr->ProcessorMask); + proc.mask = ptr->ProcessorMask; + // A hyperthreaded core supplies more than one logical processor. + s_cpu_info.logical_cores += proc.logical_cores; + s_cpu_info.physical_cores++; + } break; + case RelationCache: + { + // Cache data is in ptr->Cache, one CACHE_DESCRIPTOR structure for each cache. + PCACHE_DESCRIPTOR Cache = &ptr->Cache; + if (1 <= Cache->Level && Cache->Level <= _countof(s_cpu_info.cache_count)) { + const int cacheIdx = s_cpu_info.cache_count[Cache->Level - 1]++; + cache_info_t *cache = &s_cpu_info.caches[Cache->Level-1][cacheIdx]; + cache->type = (RGYCacheType)Cache->Type; + cache->level = (RGYCacheLevel)Cache->Level; + cache->linesize = Cache->LineSize; + cache->size = Cache->Size; + cache->associativity = Cache->Associativity; + cache->mask = ptr->ProcessorMask; + s_cpu_info.max_cache_level = (std::max)(s_cpu_info.max_cache_level, (int)cache->level); + } + break; + } + case RelationProcessorPackage: + // Logical processors share a physical package. + processorPackageCount++; + break; + + default: + //Unsupported LOGICAL_PROCESSOR_RELATIONSHIP value. + break; + } + ptr++; + } + if (buffer) + free(buffer); + + getCPUHybridMasks(&s_cpu_info); + *cpu_info = s_cpu_info; + return true; +} + +#else //#if defined(_WIN32) || defined(_WIN64) +#include +#include + +bool get_cpu_info(cpu_info_t *cpu_info) { + memset(cpu_info, 0, sizeof(cpu_info[0])); + std::ifstream inputFile("/proc/cpuinfo"); + std::istreambuf_iterator data_begin(inputFile); + std::istreambuf_iterator data_end; + std::string script_data = std::string(data_begin, data_end); + inputFile.close(); + + std::vector processor_list; + processor_info_t info = { 0 }; + info.processor_id = info.core_id = info.socket_id = -1; + + for (auto line : split(script_data, "\n")) { + auto pos = line.find("processor"); + if (pos != std::string::npos) { + int i = 0; + if (1 == sscanf(line.substr(line.find(":") + 1).c_str(), "%d", &i)) { + if (info.processor_id >= 0) { + if (info.socket_id < 0) info.socket_id = 0; // physical id がない場合 + if (info.core_id < 0) info.core_id = info.processor_id; // core id がない場合 + processor_list.push_back(info); + info.processor_id = info.core_id = info.socket_id = -1; // 次に備えて初期化 + } + info.processor_id = i; + } + continue; + } + pos = line.find("core id"); + if (pos != std::string::npos) { + int i = 0; + if (1 == sscanf(line.substr(line.find(":") + 1).c_str(), "%d", &i)) { + info.core_id = i; + } + continue; + } + pos = line.find("physical id"); + if (pos != std::string::npos) { + int i = 0; + if (1 == sscanf(line.substr(line.find(":") + 1).c_str(), "%d", &i)) { + info.socket_id = i; + } + continue; + } + } + if (info.processor_id >= 0) { + if (info.socket_id < 0) info.socket_id = 0; // physical id がない場合 + if (info.core_id < 0) info.core_id = info.processor_id; // core id がない場合 + processor_list.push_back(info); + } + + //ここまでで論理コアの情報を作った + //cpu_infoに登録するのは物理コアの情報なので、整理しなおす + //いったんsocket→core→processorの順でソート + std::sort(processor_list.begin(), processor_list.end(), [](const processor_info_t& a, const processor_info_t& b) { + if (a.socket_id != b.socket_id) return a.socket_id < b.socket_id; + if (a.core_id != b.core_id) return a.core_id < b.core_id; + return a.processor_id < b.processor_id; + }); + + cpu_info->max_cache_level = 0; + cpu_info->physical_cores = 0; + cpu_info->logical_cores = processor_list.size(); + + processor_info_t *prevCore = nullptr; + for (size_t ip = 0; ip < processor_list.size(); ip++) { + if (prevCore != nullptr + && prevCore->socket_id == processor_list[ip].socket_id + && prevCore->core_id == processor_list[ip].core_id) { + // 同じソケットの同じコアならそれは論理コア + prevCore->logical_cores++; + prevCore->mask |= 1llu << processor_list[ip].processor_id; + } else { + auto targetCore = &cpu_info->proc_list[cpu_info->physical_cores]; + *targetCore = processor_list[ip]; + targetCore->logical_cores = 1; + targetCore->mask = 1llu << processor_list[ip].processor_id; + cpu_info->physical_cores++; + prevCore = targetCore; + } + } + + //キャッシュの情報を作る + std::vector caches; + for (int ip = 0; ip < cpu_info->physical_cores; ip++) { + const auto& targetCore = &cpu_info->proc_list[ip]; + uint64_t mask = 0; + for (int index = 0; ; index++) { + cache_info_t cacheinfo; + + char buffer[256]; + sprintf_s(buffer, "/sys/devices/system/cpu/cpu%d/cache/index%d", targetCore->processor_id, index); + struct stat st; + if (stat(buffer, &st) != 0) break; + + sprintf_s(buffer, "/sys/devices/system/cpu/cpu%d/cache/index%d/shared_cpu_list", targetCore->processor_id, index); + FILE *fp = fopen(buffer, "r"); + if (fp) { + while (fgets(buffer, _countof(buffer), fp) != NULL) { + for (auto numstr : split(buffer, ",")) { + int value0 = 0, value1 = 0; + if (sscanf_s(numstr.c_str(), "%d-%d", &value0, &value1) == 2) { + for (int iv = value0; iv <= value1; iv++) { + mask |= 1llu << iv; + } + } else if (sscanf_s(numstr.c_str(), "%d", &value0) == 1) { + mask |= 1llu << value0; + } + } + } + fclose(fp); + } + cacheinfo.mask = mask; + + sprintf_s(buffer, "/sys/devices/system/cpu/cpu%d/cache/index%d/level", targetCore->processor_id, index); + fp = fopen(buffer, "r"); + if (fp) { + while (fgets(buffer, _countof(buffer), fp) != NULL) { + int value = 0; + if (sscanf_s(buffer, "%d", &value) == 1) { + cacheinfo.level = (RGYCacheLevel)value; + } + } + fclose(fp); + } + + sprintf_s(buffer, "/sys/devices/system/cpu/cpu%d/cache/index%d/size", targetCore->processor_id, index); + fp = fopen(buffer, "r"); + if (fp) { + while (fgets(buffer, _countof(buffer), fp) != NULL) { + int value = 0; + if (sscanf_s(buffer, "%dK", &value) == 1) { + cacheinfo.size = value * 1024; + } else if (sscanf_s(buffer, "%dM", &value) == 1) { + cacheinfo.size = value * 1024 * 1024; + } else if (sscanf_s(buffer, "%dG", &value) == 1) { + cacheinfo.size = value * 1024 * 1024 * 1024; + } else if (sscanf_s(buffer, "%d", &value) == 1) { + cacheinfo.size = value; + } + } + fclose(fp); + } + + sprintf_s(buffer, "/sys/devices/system/cpu/cpu%d/cache/index%d/ways_of_associativity", targetCore->processor_id, index); + fp = fopen(buffer, "r"); + if (fp) { + while (fgets(buffer, _countof(buffer), fp) != NULL) { + int value = 0; + if (sscanf_s(buffer, "%d", &value) == 1) { + cacheinfo.associativity = value; + } + } + fclose(fp); + } + + sprintf_s(buffer, "/sys/devices/system/cpu/cpu%d/cache/index%d/type", targetCore->processor_id, index); + fp = fopen(buffer, "r"); + if (fp) { + while (fgets(buffer, _countof(buffer), fp) != NULL) { + if (strncasecmp(buffer, "Instruction", strlen("Instruction")) == 0) { + cacheinfo.type = RGYCacheType::Instruction; + break; + } else if (strncasecmp(buffer, "Data", strlen("Data")) == 0) { + cacheinfo.type = RGYCacheType::Data; + break; + } else if (strncasecmp(buffer, "Unified", strlen("Unified")) == 0) { + cacheinfo.type = RGYCacheType::Unified; + break; + } + } + fclose(fp); + } + + auto sameCache = std::find_if(caches.begin(), caches.end(), [&cacheinfo](const cache_info_t& c){ + return cacheinfo.type == c.type + && cacheinfo.level == c.level + && ((cacheinfo.mask & c.mask) != 0); + }); + if (sameCache != caches.end()) { + sameCache->mask |= cacheinfo.mask; + } else { + caches.push_back(cacheinfo); + } + } + } + + for (int ilevel = 0; ilevel < MAX_CACHE_LEVEL; ilevel++) { + cpu_info->cache_count[ilevel] = 0; + } + for (const auto& c : caches) { + const int ilevel = (int)c.level - 1; + const int icacheidx = cpu_info->cache_count[ilevel]++; + cpu_info->caches[ilevel][icacheidx] = c; + } + for (int ilevel = 0; ilevel < MAX_CACHE_LEVEL; ilevel++) { + if (cpu_info->cache_count[ilevel] > 0) { + cpu_info->max_cache_level = ilevel+1; + } + } + + //ノードの情報を作る + cpu_info->node_count = processor_list.back().socket_id + 1; + //初期化 + for (int in = 0; in < cpu_info->node_count; in++) { + cpu_info->nodes[in].mask = 0; + } + for (int ip = 0; ip < cpu_info->physical_cores; ip++) { + auto& targetCore = cpu_info->proc_list[ip]; + cpu_info->nodes[targetCore.socket_id].mask |= targetCore.mask; + } + + getCPUHybridMasks(cpu_info); + return true; +} +#endif //#if defined(_WIN32) || defined(_WIN64) + + +const processor_info_t *get_core_info(const cpu_info_t *cpu_info, RGYCoreType type, int id) { + switch (type) { + case RGYCoreType::Physical: return (id < cpu_info->physical_cores) ? &cpu_info->proc_list[id] : nullptr; + case RGYCoreType::Logical: { + const uint64_t mask = 1llu << id; + for (int i = 0; i < cpu_info->physical_cores; i++) { + if (mask & cpu_info->proc_list[i].mask) { + return &cpu_info->proc_list[i]; + } + } + return nullptr; + } + default: return nullptr; + } +} +uint64_t get_core_mask(const cpu_info_t *cpu_info, RGYCoreType type, int id) { + auto ptr = get_core_info(cpu_info, type, id); + return (ptr) ? ptr->mask : 0; +} +const cache_info_t *get_cache_info(const cpu_info_t *cpu_info, RGYCacheLevel level, int id) { + if (RGYCacheLevel::L1 <= level && level <= RGYCacheLevel::L4) { + return (id < cpu_info->cache_count[(int)level - 1]) ? &cpu_info->caches[(int)level - 1][id] : nullptr; + } + return nullptr; +} +uint64_t get_cache_mask(const cpu_info_t *cpu_info, RGYCacheLevel level, int id) { + auto ptr = get_cache_info(cpu_info, level, id); + return (ptr) ? ptr->mask : 0; +} +uint64_t get_mask(const cpu_info_t *cpu_info, RGYUnitType unit_type, int level, int id) { + switch (unit_type) { + case RGYUnitType::Core: return get_core_mask(cpu_info, (RGYCoreType)level, id); + case RGYUnitType::Cache: return get_cache_mask(cpu_info, (RGYCacheLevel)level, id); + case RGYUnitType::Node: return cpu_info->nodes[id].mask; + default: return 0; + } +} + +cpu_info_t get_cpu_info() { + cpu_info_t cpu; + get_cpu_info(&cpu); + return cpu; +} + +#if defined(_M_IX86) || defined(_M_X64) || defined(__x86_64) +const int TEST_COUNT = 5000; +RGY_NOINLINE +int64_t runl_por(int loop_count, int& dummy_dep) { + unsigned int dummy; + const auto ts = __rdtscp(&dummy); + int i = loop_count; +#define ADD_XOR { i += loop_count; i ^= loop_count; } +#define ADD_XOR4 {ADD_XOR;ADD_XOR;ADD_XOR;ADD_XOR;} +#define ADD_XOR16 {ADD_XOR4;ADD_XOR4;ADD_XOR4;ADD_XOR4;} + do { + ADD_XOR16; + ADD_XOR16; + ADD_XOR16; + ADD_XOR16; + loop_count--; + } while (loop_count > 0); + const auto te = __rdtscp(&dummy); + dummy_dep = i; + return te - ts; +} + +//rdtscpを使うと0xc0000096例外 (一般ソフトウェア例外)を発する場合があるらしい +//そこでそれを検出する +bool check_rdtscp_available() { +#if defined(_WIN32) || defined(_WIN64) + __try { + UINT dummy; + __rdtscp(&dummy); + } __except (EXCEPTION_EXECUTE_HANDLER) { + return false; + } +#endif //defined(_WIN32) || defined(_WIN64) + return true; +} + +static double get_tick_per_clock() { + const int outer_loop_count = 100; + const int inner_loop_count = TEST_COUNT; + int dummy = 0; + auto tick_min = runl_por(inner_loop_count, dummy); + for (int i = 0; i < outer_loop_count; i++) { + auto ret = runl_por(inner_loop_count, dummy); + tick_min = std::min(tick_min, ret); + } + return tick_min / (128.0 * inner_loop_count); +} + +static double get_tick_per_sec() { + const int outer_loop_count = TEST_COUNT; + int dummy = 0; + runl_por(outer_loop_count, dummy); + auto start = std::chrono::high_resolution_clock::now(); + auto tick = runl_por(outer_loop_count, dummy); + auto fin = std::chrono::high_resolution_clock::now(); + double second = std::chrono::duration_cast(fin - start).count() * 1e-6; + return tick / second; +} +#endif //#if defined(_M_IX86) || defined(_M_X64) || defined(__x86_64) + +//__rdtscが定格クロックに基づいた値を返すのを利用して、実際の動作周波数を得る +//やや時間がかかるので注意 +double getCPUMaxTurboClock() { +#if defined(_M_IX86) || defined(_M_X64) || defined(__x86_64) + static double turboClock = 0.0; + if (turboClock > 0.0) { + return turboClock; + } + //http://instlatx64.atw.hu/ + //によれば、Sandy/Ivy/Haswell/Silvermont + //いずれでもサポートされているのでノーチェックでも良い気がするが... + //固定クロックのタイマーを持つかチェック (Fn:8000_0007:EDX8) + int CPUInfo[4] = { -1 }; + __cpuid(CPUInfo, 0x80000007); + if (0 == (CPUInfo[3] & (1 << 8))) { + return 0.0; + } + //rdtscp命令のチェック (Fn:8000_0001:EDX27) + __cpuid(CPUInfo, 0x80000001); + if (0 == (CPUInfo[3] & (1 << 27))) { + return 0.0; + } +#if defined(_WIN32) || defined(_WIN64) + //例外が発生するなら処理を中断する + if (!check_rdtscp_available()) { + return 0.0; + } +#endif //#if defined(_WIN32) || defined(_WIN64) + + const double tick_per_clock = get_tick_per_clock(); + const double tick_per_sec = get_tick_per_sec(); + turboClock = (tick_per_sec / tick_per_clock) * 1e-9; + return turboClock; +#else + return 0.0; +#endif //#if defined(_M_IX86) || defined(_M_X64) || defined(__x86_64) +} + +double getCPUDefaultClock() { + return getCPUDefaultClockFromCPUName(); +} + +int getCPUInfo(TCHAR *buffer, size_t nSize +#if ENCODER_QSV + , MFXVideoSession *pSession +#endif +) { + int ret = 0; + buffer[0] = _T('\0'); + cpu_info_t cpu_info; + if (getCPUName(buffer, nSize) || !get_cpu_info(&cpu_info)) { + buffer[0] = _T('\0'); + ret = 1; + } else { +#if defined(_WIN32) || defined(_WIN64) //Linuxでは環境によっては、正常に動作しない場合がある + const double defaultClock = getCPUDefaultClockFromCPUName(); + bool noDefaultClockInCPUName = (0.0 >= defaultClock); + const double maxFrequency = getCPUMaxTurboClock(); + if (defaultClock > 0.0) { + if (noDefaultClockInCPUName) { + _stprintf_s(buffer + _tcslen(buffer), nSize - _tcslen(buffer), _T(" @ %.2fGHz"), defaultClock); + } + //大きな違いがなければ、TurboBoostはないものとして表示しない + if (maxFrequency / defaultClock > 1.01) { + _stprintf_s(buffer + _tcslen(buffer), nSize - _tcslen(buffer), _T(" [TB: %.2fGHz]"), maxFrequency); + } + } else if (maxFrequency > 0.0) { + _stprintf_s(buffer + _tcslen(buffer), nSize - _tcslen(buffer), _T(" [%.2fGHz]"), maxFrequency); + } +#endif //#if defined(_WIN32) || defined(_WIN64) + _tcscpy_s(buffer + _tcslen(buffer), nSize - _tcslen(buffer), _T(" (")); + if (cpu_info.maskCoreP != 0 && cpu_info.maskCoreE != 0 && cpu_info.physical_cores <= 64) { + _stprintf_s(buffer + _tcslen(buffer), nSize - _tcslen(buffer), _T("%dP+%dE,"), cpu_info.physical_cores_p, cpu_info.physical_cores_e); + } + _stprintf_s(buffer + _tcslen(buffer), nSize - _tcslen(buffer), _T("%dC/%dT)"), cpu_info.physical_cores, cpu_info.logical_cores); +#if ENCODER_QSV && !FOR_AUO + if (pSession != nullptr) { + int cpuGen = getCPUGen(pSession); + if (cpuGen != CPU_GEN_UNKNOWN) { + _stprintf_s(buffer + _tcslen(buffer), nSize - _tcslen(buffer), _T(" <%s>"), CPU_GEN_STR[cpuGen]); + } + } +#endif + } + return ret; +} + +BOOL GetProcessTime(HANDLE hProcess, PROCESS_TIME *time) { +#if defined(_WIN32) || defined(_WIN64) + SYSTEMTIME systime; + GetSystemTime(&systime); + return (NULL != hProcess + && GetProcessTimes(hProcess, (FILETIME *)&time->creation, (FILETIME *)&time->exit, (FILETIME *)&time->kernel, (FILETIME *)&time->user) + && (WAIT_OBJECT_0 == WaitForSingleObject(hProcess, 0) || SystemTimeToFileTime(&systime, (FILETIME *)&time->exit))); +#else //#if defined(_WIN32) || defined(_WIN64) + struct tms tm; + times(&tm); + time->exit = time->creation; + time->creation = clock(); + time->kernel = tm.tms_stime; + time->user = tm.tms_utime; + return 0; +#endif //#if defined(_WIN32) || defined(_WIN64) +} + +BOOL GetProcessTime(PROCESS_TIME *time) { +#if defined(_WIN32) || defined(_WIN64) + return GetProcessTime(GetCurrentProcess(), time); +#else + return GetProcessTime(NULL, time); +#endif +} + +double GetProcessAvgCPUUsage(HANDLE hProcess, PROCESS_TIME *start) { + PROCESS_TIME current = { 0 }; + cpu_info_t cpu_info; + double result = 0; + if (NULL != hProcess + && get_cpu_info(&cpu_info) + && GetProcessTime(hProcess, ¤t)) { + uint64_t current_total_time = current.kernel + current.user; + uint64_t start_total_time = (nullptr == start) ? 0 : start->kernel + start->user; + result = (current_total_time - start_total_time) * 100.0 / (double)(cpu_info.logical_cores * (current.exit - ((nullptr == start) ? current.creation : start->exit))); + } + return result; +} + +double GetProcessAvgCPUUsage(PROCESS_TIME *start) { +#if defined(_WIN32) || defined(_WIN64) + return GetProcessAvgCPUUsage(GetCurrentProcess(), start); +#else + return GetProcessAvgCPUUsage(NULL, start); +#endif +} + +const TCHAR *RGYCacheTypeToStr(RGYCacheType type) { + switch (type) { + case RGYCacheType::Unified: return _T(" "); + case RGYCacheType::Instruction: return _T("I"); + case RGYCacheType::Data: return _T("D"); + default: return _T("-"); + } +} + +tstring print_cpu_info(const cpu_info_t *cpu_info) { + TCHAR buffer[256]; + getCPUInfo(buffer, _countof(buffer)); + + tstring str = buffer; + str += _T("\n"); + str += _T("CPU cores\n"); + for (int ip = 0; ip < cpu_info->physical_cores; ip++) { + auto& targetCore = cpu_info->proc_list[ip]; + str += strsprintf(_T(" core %2d "), ip); + if ((cpu_info->maskCoreP & targetCore.mask) == targetCore.mask) { + str += _T("P"); + } else if ((cpu_info->maskCoreE & targetCore.mask) == targetCore.mask) { + str += _T("E"); + } else { + str += _T(" "); + } + str += _T(" : "); + for (int il = 0; il < cpu_info->logical_cores; il++) { + const auto mask = 1llu << il; + str += (mask & targetCore.mask) ? _T("*") : _T("-"); + } + str += _T("\n"); + } + + if (cpu_info->cache_count[0] > 0) { + str += _T("CPU caches\n"); + for (int icache_level = 0; icache_level < MAX_CACHE_LEVEL; icache_level++) { + for (int ic = 0; ic < cpu_info->cache_count[icache_level]; ic++) { + auto& targetCache = cpu_info->caches[icache_level][ic]; + str += strsprintf(_T(" cache L%d%s : "), icache_level + 1, RGYCacheTypeToStr(targetCache.type)); + for (int il = 0; il < cpu_info->logical_cores; il++) { + const auto mask = 1llu << il; + str += (mask & targetCache.mask) ? _T("*") : _T("-"); + } + str += strsprintf(_T(" : %2dway %6dKB\n"), targetCache.associativity, targetCache.size / 1024); + } + } + } + return str; +} diff --git a/auoCommon/cpu_info.h b/auoCommon/cpu_info.h new file mode 100644 index 0000000..17b55fd --- /dev/null +++ b/auoCommon/cpu_info.h @@ -0,0 +1,134 @@ +// ----------------------------------------------------------------------------------------- +// QSVEnc/NVEnc/VCEEnc/rkmppenc by rigaya +// ----------------------------------------------------------------------------------------- +// The MIT License +// +// Copyright (c) 2011-2020 rigaya +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. +// +// -------------------------------------------------------------------------------------------- + +#ifndef _CPU_INFO_H_ +#define _CPU_INFO_H_ + +#include +#include "rgy_tchar.h" +#include "rgy_osdep.h" + +static const int MAX_CACHE_LEVEL = 4; +static const int MAX_CORE_COUNT = 512; +static const int MAX_NODE_COUNT = 8; + +enum class RGYCacheLevel { + L0, + L1, + L2, + L3, + L4 +}; + +enum class RGYCacheType { + Unified, + Instruction, + Data, + Trace +}; + +enum class RGYUnitType { + Core, + Cache, + Node +}; + +enum class RGYCoreType { + Physical, + Logical +}; + +typedef struct node_info_t { + size_t mask; +} node_info_t; + +typedef struct cache_info_t { + RGYCacheType type; + RGYCacheLevel level; + int associativity; + int linesize; + int size; + size_t mask; +} cache_info_t; + +typedef struct { + int processor_id; // プロセッサID + int core_id; // コアID + int socket_id; // ソケットID + int logical_cores; // 論理コア数 + size_t mask; // 対応する物理コアのマスク +} processor_info_t; // 物理コアの情報 + +typedef struct { + int node_count; // ノード数 + node_info_t nodes[MAX_NODE_COUNT]; + int physical_cores; // 物理コア数 + int physical_cores_p; // 物理コア数 + int physical_cores_e; // 物理コア数 + int logical_cores; // 論理コア数 + int max_cache_level; // キャッシュの最大レベル + int cache_count[MAX_CACHE_LEVEL]; // 各階層のキャッシュの数 + cache_info_t caches[MAX_CACHE_LEVEL][MAX_CORE_COUNT]; // 各階層のキャッシュの情報 + processor_info_t proc_list[MAX_CORE_COUNT]; // 物理コアの情報 + size_t maskCoreP; // Performanceコアのマスク + size_t maskCoreE; // Efficiencyコアのマスク + size_t maskSystem; // システム全体のマスク +} cpu_info_t; + + +int getCPUName(char *buffer, size_t nSize); +bool get_cpu_info(cpu_info_t *cpu_info); +cpu_info_t get_cpu_info(); +uint64_t get_mask(const cpu_info_t *cpu_info, RGYUnitType unit_type, int level, int id); + +tstring print_cpu_info(const cpu_info_t *cpu_info); + +#if ENCODER_QSV +class MFXVideoSession; +int getCPUInfo(TCHAR *buffer, size_t nSize, MFXVideoSession *pSession = nullptr); +#else +int getCPUInfo(TCHAR *buffer, size_t nSize); +#endif + +template +int inline getCPUInfo(TCHAR(&buffer)[size]) { + return getCPUInfo(buffer, size); +} + +double getCPUDefaultClock(); +double getCPUMaxTurboClock(); + +typedef struct PROCESS_TIME { + uint64_t creation, exit, kernel, user; +} PROCESS_TIME; + +BOOL GetProcessTime(PROCESS_TIME *time); +BOOL GetProcessTime(HANDLE hProcess, PROCESS_TIME *time); +double GetProcessAvgCPUUsage(HANDLE hProcess, PROCESS_TIME *start = nullptr); +double GetProcessAvgCPUUsage(PROCESS_TIME *start = nullptr); + +#endif //_CPU_INFO_H_ diff --git a/auoCommon/rgy_arch.h b/auoCommon/rgy_arch.h new file mode 100644 index 0000000..b1ccbab --- /dev/null +++ b/auoCommon/rgy_arch.h @@ -0,0 +1,82 @@ +// ----------------------------------------------------------------------------------------- +// QSVEnc/NVEnc by rigaya +// ----------------------------------------------------------------------------------------- +// The MIT License +// +// Copyright (c) 2011-2016 rigaya +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. +// +// -------------------------------------------------------------------------------------------- + +#pragma once +#ifndef __RGY_ARCH_H__ +#define __RGY_ARCH_H__ + +#if defined(_M_IX86) || defined(_M_X64) || defined(__x86_64) +#include +static inline void rgy_yield() { + _mm_pause(); +} +#if !(defined(_WIN32) || defined(_WIN64)) +static inline void __cpuid(int cpuInfo[4], int param) { + int eax = 0, ebx = 0, ecx = 0, edx = 0; + __asm("xor %%ecx, %%ecx\n\t" + "cpuid" : "=a"(eax), "=b" (ebx), "=c"(ecx), "=d"(edx) + : "0"(param)); + cpuInfo[0] = eax; + cpuInfo[1] = ebx; + cpuInfo[2] = ecx; + cpuInfo[3] = edx; +} + +#ifndef _MSC_VER +static inline unsigned long long rgy_xgetbv(unsigned int index) { + unsigned int eax, edx; + __asm__ __volatile__("xgetbv" : "=a"(eax), "=d"(edx) : "c"(index)); + return ((unsigned long long)edx << 32) | eax; +} +#endif + +#if NO_RDTSCP_INTRIN +static inline uint64_t __rdtscp(uint32_t *Aux) { + uint32_t aux; + uint64_t rax,rdx; + asm volatile ( "rdtscp\n" : "=a" (rax), "=d" (rdx), "=c" (aux) : : ); + *Aux = aux; + return (rdx << 32) + rax; +} +#endif //#if NO_RDTSCP_INTRIN + +//uint64_t __rdtsc() { +// unsigned int eax, edx; +// __asm__ volatile("rdtsc" : "=a"(eax), "=d"(edx)); +// return ((uint64_t)edx << 32) | eax; +//} +#endif //#if !(defined(_WIN32) || defined(_WIN64)) + +#elif (defined(_M_ARM64) || defined(__aarch64__) || defined(__arm64__) || defined(__ARM_ARCH)) + +static inline void rgy_yield() { + __asm__ __volatile__("isb\n"); +} + +#endif + +#endif //__RGY_ARCH_H__ diff --git a/auoCommon/rgy_codepage.cpp b/auoCommon/rgy_codepage.cpp new file mode 100644 index 0000000..2830306 --- /dev/null +++ b/auoCommon/rgy_codepage.cpp @@ -0,0 +1,170 @@ +// ----------------------------------------------------------------------------------------- +// QSVEnc/NVEnc by rigaya +// ----------------------------------------------------------------------------------------- +// The MIT License +// +// Copyright (c) 2019 rigaya +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. +// +// -------------------------------------------------------------------------------------------- + +#include +#include +#include "rgy_codepage.h" + +//BOM文字リスト +static const int MAX_UTF8_CHAR_LENGTH = 6; +static const uint8_t UTF8_BOM[] = { 0xEF, 0xBB, 0xBF }; +static const uint8_t UTF16_LE_BOM[] = { 0xFF, 0xFE }; +static const uint8_t UTF16_BE_BOM[] = { 0xFE, 0xFF }; + +//ボム文字かどうか、コードページの判定 +static uint32_t check_bom(const void* chr) { + if (chr == nullptr) return CODE_PAGE_UNSET; + if (memcmp(chr, UTF16_LE_BOM, sizeof(UTF16_LE_BOM)) == 0) return CODE_PAGE_UTF16_LE; + if (memcmp(chr, UTF16_BE_BOM, sizeof(UTF16_BE_BOM)) == 0) return CODE_PAGE_UTF16_BE; + if (memcmp(chr, UTF8_BOM, sizeof(UTF8_BOM)) == 0) return CODE_PAGE_UTF8; + return CODE_PAGE_UNSET; +} + +static bool isJis(const void *str, uint32_t size_in_byte) { + static const uint8_t ESCAPE[][7] = { + //先頭に比較すべきバイト数 + { 3, 0x1B, 0x28, 0x42, 0x00, 0x00, 0x00 }, + { 3, 0x1B, 0x28, 0x4A, 0x00, 0x00, 0x00 }, + { 3, 0x1B, 0x28, 0x49, 0x00, 0x00, 0x00 }, + { 3, 0x1B, 0x24, 0x40, 0x00, 0x00, 0x00 }, + { 3, 0x1B, 0x24, 0x42, 0x00, 0x00, 0x00 }, + { 6, 0x1B, 0x26, 0x40, 0x1B, 0x24, 0x42 }, + { 4, 0x1B, 0x24, 0x28, 0x44, 0x00, 0x00 }, + { 0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 } //終了 + }; + const uint8_t * const str_fin = (const uint8_t *)str + size_in_byte; + for (const uint8_t *chr = (const uint8_t *)str; chr < str_fin; chr++) { + if (*chr > 0x7F) + return false; + for (int i = 0; ESCAPE[i][0]; i++) { + if (str_fin - chr > ESCAPE[i][0] && + memcmp(chr, &ESCAPE[i][1], ESCAPE[i][0]) == 0) + return true; + } + } + return false; +} + +static uint32_t isUTF16(const void *str, uint32_t size_in_byte) { + const uint8_t * const str_fin = (const uint8_t *)str + size_in_byte; + for (const uint8_t *chr = (const uint8_t *)str; chr < str_fin; chr++) { + if (chr[0] == 0x00 && str_fin - chr > 1 && chr[1] <= 0x7F) + return ((chr - (const uint8_t *)str) % 2 == 1) ? CODE_PAGE_UTF16_LE : CODE_PAGE_UTF16_BE; + } + return CODE_PAGE_UNSET; +} + +static bool isASCII(const void *str, uint32_t size_in_byte) { + const uint8_t * const str_fin = (const uint8_t *)str + size_in_byte; + for (const uint8_t *chr = (const uint8_t *)str; chr < str_fin; chr++) { + if (*chr == 0x1B || *chr >= 0x80) + return false; + } + return true; +} + +static uint32_t jpn_check(const void *str, uint32_t size_in_byte) { + int score_sjis = 0; + int score_euc = 0; + int score_utf8 = 0; + const uint8_t * const str_fin = (const uint8_t *)str + size_in_byte; + for (const uint8_t *chr = (const uint8_t *)str; chr < str_fin - 1; chr++) { + if ((0x81 <= chr[0] && chr[0] <= 0x9F) || + (0xE0 <= chr[0] && chr[0] <= 0xFC) || + (0x40 <= chr[1] && chr[1] <= 0x7E) || + (0x80 <= chr[1] && chr[1] <= 0xFC)) { + score_sjis += 2; chr++; + } + } + for (const uint8_t *chr = (const uint8_t *)str; chr < str_fin - 1; chr++) { + if ((0xC0 <= chr[0] && chr[0] <= 0xDF) && + (0x80 <= chr[1] && chr[1] <= 0xBF)) { + score_utf8 += 2; chr++; + } else if ( + str_fin - chr > 2 && + (0xE0 <= chr[0] && chr[0] <= 0xEF) && + (0x80 <= chr[1] && chr[1] <= 0xBF) && + (0x80 <= chr[2] && chr[2] <= 0xBF)) { + score_utf8 += 3; chr++; + } + } + for (const uint8_t *chr = (const uint8_t *)str; chr < str_fin - 1; chr++) { + if (((0xA1 <= chr[0] && chr[0] <= 0xFE) && (0xA1 <= chr[1] && chr[1] <= 0xFE)) || + (chr[0] == 0x8E && (0xA1 <= chr[1] && chr[1] <= 0xDF))) { + score_euc += 2; chr++; + } else if ( + str_fin - chr > 2 && + chr[0] == 0x8F && + (0xA1 <= chr[1] && chr[1] <= 0xFE) && + (0xA1 <= chr[2] && chr[2] <= 0xFE)) { + score_euc += 3; chr += 2; + } + } + if (score_sjis > score_euc && score_sjis > score_utf8) + return CODE_PAGE_SJIS; + if (score_utf8 > score_euc && score_utf8 > score_sjis) + return CODE_PAGE_UTF8; + if (score_euc > score_sjis && score_euc > score_utf8) + return CODE_PAGE_EUC_JP; + return CODE_PAGE_UNSET; +} + +uint32_t get_code_page(const void *str, uint32_t size_in_byte) { + uint32_t ret = CODE_PAGE_UNSET; + if ((ret = check_bom(str)) != CODE_PAGE_UNSET) + return ret; + + if (isJis(str, size_in_byte)) + return CODE_PAGE_JIS; + + if ((ret = isUTF16(str, size_in_byte)) != CODE_PAGE_UNSET) + return ret; + + if (isASCII(str, size_in_byte)) + return CODE_PAGE_US_ASCII; + + return jpn_check(str, size_in_byte); +} + +const char *codepage_str(uint32_t codepage) { + switch (codepage) { + case CODE_PAGE_SJIS: + return "CP932"; + case CODE_PAGE_EUC_JP: + return "EUC-JP"; + case CODE_PAGE_UTF16_LE: + return "UTF16LE"; + case CODE_PAGE_UTF16_BE: + return "UTF16BE"; + case CODE_PAGE_JIS: + return "ISO2022JP"; + case CODE_PAGE_UTF8: + return "UTF-8"; + default: + return nullptr; + } +} diff --git a/auoCommon/rgy_codepage.h b/auoCommon/rgy_codepage.h new file mode 100644 index 0000000..ab98c70 --- /dev/null +++ b/auoCommon/rgy_codepage.h @@ -0,0 +1,48 @@ +// ----------------------------------------------------------------------------------------- +// QSVEnc/NVEnc by rigaya +// ----------------------------------------------------------------------------------------- +// The MIT License +// +// Copyright (c) 2019 rigaya +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. +// +// ------------------------------------------------------------------------------------------- + +#pragma once +#ifndef __RGY_CODEPAGE_H__ +#define __RGY_CODEPAGE_H__ + +//日本語環境の一般的なコードページ一覧 +enum : uint32_t { + CODE_PAGE_SJIS = 932, //Shift-JIS + CODE_PAGE_JIS = 50220, + CODE_PAGE_EUC_JP = 51932, + CODE_PAGE_UTF8 = 65001, + CODE_PAGE_UTF16_LE = 1200, //WindowsのUnicode WCHAR のコードページ + CODE_PAGE_UTF16_BE = 1201, + CODE_PAGE_US_ASCII = 20127, + CODE_PAGE_WEST_EUROPE = 1252, //厄介な西ヨーロッパ言語 + CODE_PAGE_UNSET = 0xffffffff, +}; + +uint32_t get_code_page(const void *str, uint32_t size_in_byte); +const char *codepage_str(uint32_t codepage); + +#endif //__RGY_CODEPAGE_H__ diff --git a/auoCommon/rgy_faw.cpp b/auoCommon/rgy_faw.cpp new file mode 100644 index 0000000..04aa129 --- /dev/null +++ b/auoCommon/rgy_faw.cpp @@ -0,0 +1,635 @@ +// ----------------------------------------------------------------------------------------- +// QSVEnc/NVEnc by rigaya +// ----------------------------------------------------------------------------------------- +// The MIT License +// +// Copyright (c) 2023 rigaya +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. +// +// -------------------------------------------------------------------------------------------- + +#include +#include +#include "rgy_faw.h" +#include "rgy_simd.h" + +size_t rgy_memmem_fawstart1_c(const void *data_, const size_t data_size) { + return rgy_memmem_c(data_, data_size, fawstart1.data(), fawstart1.size()); +} + +decltype(rgy_memmem_fawstart1_c)* get_memmem_fawstart1_func() { +#if defined(_M_IX86) || defined(_M_X64) || defined(__x86_64) + const auto simd = get_availableSIMD(); +#if defined(_M_X64) || defined(__x86_64) + if ((simd & RGY_SIMD::AVX512BW) == RGY_SIMD::AVX512BW) return rgy_memmem_fawstart1_avx512bw; +#endif + if ((simd & RGY_SIMD::AVX2) == RGY_SIMD::AVX2) return rgy_memmem_fawstart1_avx2; +#endif + return rgy_memmem_fawstart1_c; +} + +static const std::array AACSYNC_BYTES = { 0xff, 0xf0 }; + +static size_t rgy_find_aacsync_c(const void *data_, const size_t data_size) { + const uint16_t target = *(const uint16_t *)AACSYNC_BYTES.data(); + const size_t target_size = AACSYNC_BYTES.size(); + const uint8_t *data = (const uint8_t *)data_; + if (data_size < target_size) { + return RGY_MEMMEM_NOT_FOUND; + } + for (size_t i = 0; i <= data_size - target_size; i++) { + if ((*(const uint16_t *)(data + i) & target) == target) { + return i; + } + } + return RGY_MEMMEM_NOT_FOUND; +} + +//16bit音声 -> 8bit音声 +void rgy_convert_audio_16to8(uint8_t *dst, const short *src, const size_t n) { + uint8_t *byte = dst; + const uint8_t *fin = byte + n; + const short *sh = src; + while (byte < fin) { + *byte = (*sh >> 8) + 128; + byte++; + sh++; + } +} + +void rgy_split_audio_16to8x2(uint8_t *dst0, uint8_t *dst1, const short *src, const size_t n) { + const short *sh = src; + const short *sh_fin = src + n; + for (; sh < sh_fin; sh++, dst0++, dst1++) { + *dst0 = (*sh >> 8) + 128; + *dst1 = (*sh & 0xff) + 128; + } +} + +decltype(rgy_convert_audio_16to8)* get_convert_audio_16to8_func() { +#if defined(_M_IX86) || defined(_M_X64) || defined(__x86_64) + const auto simd = get_availableSIMD(); + if ((simd & RGY_SIMD::AVX2) == RGY_SIMD::AVX2) return rgy_convert_audio_16to8_avx2; +#endif + return rgy_convert_audio_16to8; +} + +decltype(rgy_split_audio_16to8x2)* get_split_audio_16to8x2_func() { +#if defined(_M_IX86) || defined(_M_X64) || defined(__x86_64) + const auto simd = get_availableSIMD(); + if ((simd & RGY_SIMD::AVX2) == RGY_SIMD::AVX2) return rgy_split_audio_16to8x2_avx2; +#endif + return rgy_split_audio_16to8x2; +} + +template +static uint8_t faw_read_half(const uint16_t v) { + uint8_t i = (upperhalf) ? (v & 0xff00) >> 8 : (v & 0xff); + return i - 0x80; +} + +template +void faw_read(uint8_t *dst, const uint8_t *src, const size_t outlen) { + if (!ishalf) { + memcpy(dst, src, outlen); + return; + } + const uint16_t *srcPtr = (const uint16_t *)src; + for (size_t i = 0; i < outlen; i++) { + dst[i] = faw_read_half(srcPtr[i]); + } +} + +static uint32_t faw_checksum_calc(const uint8_t *buf, const size_t len) { + uint32_t _v4288 = 0; + uint32_t _v48 = 0; + const size_t fin_mod2 = (len & (~1)); + for (size_t i = 0; i < fin_mod2; i += 2) { + uint32_t _v132 = *(uint16_t *)(buf + i); + _v4288 += _v132; + _v48 ^= _v132; + } + if ((len & 1) != 0) { + uint32_t _v132 = *(uint8_t *)(buf + len - 1); + _v4288 += _v132; + _v48 ^= _v132; + } + uint32_t res = (_v4288 & 0xffff) | ((_v48 & 0xffff) << 16); + return res; +} + +static uint32_t faw_checksum_read(const uint8_t *buf) { + uint32_t v; + memcpy(&v, buf, sizeof(v)); + return v; +} + +int RGYAACHeader::sampleRateIdxToRate(const uint32_t idx) { + static const int samplerateList[] = { + 96000, + 88200, + 64000, + 48000, + 44100, + 32000, + 24000, + 22050, + 16000, + 12000, + 11025, + 8000, + 7350, + 0 + }; + return samplerateList[std::min(idx, _countof(samplerateList)-1)]; +} + +void RGYAACHeader::parse(const uint8_t *buf) { + const uint8_t buf1 = buf[1]; + const uint8_t buf2 = buf[2]; + const uint8_t buf3 = buf[3]; + const uint8_t buf4 = buf[4]; + const uint8_t buf5 = buf[5]; + const uint8_t buf6 = buf[6]; + id = (buf1 & 0x08) != 0; + protection = (buf1 & 0x01) != 0; + profile = (buf2 & 0xC0) >> 6; + samplerate = sampleRateIdxToRate((buf2 & 0x3C) >> 2); + private_bit = (buf2 & 0x02) >> 1; + channel = ((buf2 & 0x01) << 2) | ((buf3 & 0xC0) >> 6); + original = (buf3 & 0x20) != 0; + home = (buf3 & 0x10) != 0; + copyright = (buf3 & 0x08) != 0; + copyright_start = (buf3 & 0x04) != 0; + aac_frame_length = ((buf3 & 0x03) << 11) | (buf4 << 3) | (buf5 >> 5); + adts_buffer_fullness = ((buf5 & 0x1f) << 6) | (buf6 >> 2); + no_raw_data_blocks_in_frame = buf6 & 0x03; +} + +RGYFAWBitstream::RGYFAWBitstream() : + buffer(), + bufferLength(0), + bufferOffset(0), + bytePerWholeSample(0), + inputLengthByte(0), + outSamples(0), + aacHeader() { + +} + +RGYFAWBitstream::~RGYFAWBitstream() {}; + +void RGYFAWBitstream::setBytePerSample(const int val) { + bytePerWholeSample = val; +} + +void RGYFAWBitstream::parseAACHeader(const uint8_t *buf) { + aacHeader.parse(buf); +} + +uint32_t RGYFAWBitstream::aacChannels() const { + return aacHeader.channel; +} +uint32_t RGYFAWBitstream::aacFrameSize() const { + return aacHeader.aac_frame_length; +} + +void RGYFAWBitstream::addOffset(size_t offset) { + if (bufferLength < offset) { + bufferLength = 0; + } else { + bufferLength -= offset; + } + if (bufferLength == 0) { + bufferOffset = 0; + } else { + bufferOffset += offset; + } +} + +void RGYFAWBitstream::addOutputSamples(size_t samples) { + outSamples += samples; +} + + +void RGYFAWBitstream::append(const uint8_t *input, const size_t inputLength) { + if (buffer.size() < bufferLength + inputLength) { + buffer.resize(std::max(bufferLength + inputLength, buffer.size() * 2)); + if (bufferLength == 0) { + bufferOffset = 0; + } + if (bufferOffset > 0) { + memmove(buffer.data(), buffer.data() + bufferOffset, bufferLength); + bufferOffset = 0; + } + } else if (buffer.size() < bufferOffset + bufferLength + inputLength) { + if (bufferLength == 0) { + bufferOffset = 0; + } + if (bufferOffset > 0) { + memmove(buffer.data(), buffer.data() + bufferOffset, bufferLength); + bufferOffset = 0; + } + } + if (input != nullptr) { + memcpy(buffer.data() + bufferOffset + bufferLength, input, inputLength); + } + bufferLength += inputLength; + inputLengthByte += inputLength; +} + +void RGYFAWBitstream::clear() { + bufferLength = 0; + bufferOffset = 0; + inputLengthByte = 0; + outSamples = 0; +} + +static const std::array aac_silent0 = { + 0xFF, 0xF9, 0x4C, 0x00, 0x02, 0x1F, 0xFC, 0x21, + 0x00, 0x49, 0x90, 0x02, 0x19, 0x00, 0x23, 0x80 +}; +static const std::array aac_silent1 = { + 0xFF, 0xF9, 0x4C, 0x40, 0x01, 0xBF, 0xFC, 0x00, + 0xC8, 0x40, 0x80, 0x23, 0x80 +}; +static const std::array aac_silent2 = { + 0xFF, 0xF9, 0x4C, 0x80, 0x02, 0x1F, 0xFC, 0x21, + 0x00, 0x49, 0x90, 0x02, 0x19, 0x00, 0x23, 0x80 +}; +static const std::array aac_silent6 = { + 0xFF, 0xF9, 0x4D, 0x80, 0x04, 0x3F, 0xFC, 0x00, + 0xC8, 0x00, 0x80, 0x20, 0x84, 0x01, 0x26, 0x40, + 0x08, 0x64, 0x00, 0x82, 0x30, 0x04, 0x99, 0x00, + 0x21, 0x90, 0x02, 0x18, 0x32, 0x00, 0x20, 0x08, + 0xE0 +}; + +RGYFAWDecoder::RGYFAWDecoder() : + wavheader(), + fawmode(RGYFAWMode::Unknown), + bufferIn(), + bufferHalf0(), + bufferHalf1(), + funcMemMem(get_memmem_func()), + funcMemMemFAWStart1(get_memmem_fawstart1_func()), + funcAudio16to8(get_convert_audio_16to8_func()), + funcSplitAudio16to8x2(get_split_audio_16to8x2_func()) { +} +RGYFAWDecoder::~RGYFAWDecoder() { + +} + +void RGYFAWDecoder::setWavInfo() { + bufferIn.setBytePerSample(wavheader.number_of_channels * wavheader.bits_per_sample / 8); + if (wavheader.bits_per_sample > 8) { + bufferHalf0.setBytePerSample(wavheader.number_of_channels * wavheader.bits_per_sample / 16); + bufferHalf1.setBytePerSample(wavheader.number_of_channels * wavheader.bits_per_sample / 16); + } +} + +int RGYFAWDecoder::init(const uint8_t *data) { + int headerSize = wavheader.parseHeader(data); + setWavInfo(); + return headerSize; +} + +int RGYFAWDecoder::init(const RGYWAVHeader *data) { + wavheader = *data; + setWavInfo(); + return 0; +} + +void RGYFAWDecoder::appendFAWHalf(const uint8_t *data, const size_t dataLength) { + const auto prevSize = bufferHalf0.size(); + bufferHalf0.append(nullptr, dataLength / sizeof(short)); + funcAudio16to8(bufferHalf0.data() + prevSize, (const short *)data, dataLength / sizeof(short)); +} + +void RGYFAWDecoder::appendFAWMix(const uint8_t *data, const size_t dataLength) { + const auto prevSize0 = bufferHalf0.size(); + const auto prevSize1 = bufferHalf1.size(); + bufferHalf0.append(nullptr, dataLength / sizeof(short)); + bufferHalf1.append(nullptr, dataLength / sizeof(short)); + funcSplitAudio16to8x2(bufferHalf0.data() + prevSize0, bufferHalf1.data() + prevSize1, (const short *)data, dataLength / sizeof(short)); +} + +int RGYFAWDecoder::decode(RGYFAWDecoderOutput& output, const uint8_t *input, const size_t inputLength) { + for (auto& b : output) { + b.clear(); + } + + bool inputDataAppended = false; + + // FAWの種類を判別 + if (fawmode == RGYFAWMode::Unknown) { + bufferIn.append(input, inputLength); + inputDataAppended = true; + + int64_t ret0 = 0, ret1 = 0; + if ((ret0 = funcMemMemFAWStart1(bufferIn.data(), bufferIn.size())) != RGY_MEMMEM_NOT_FOUND) { + fawmode = RGYFAWMode::Full; + } else if ((ret0 = funcMemMem(bufferIn.data(), bufferIn.size(), fawstart2.data(), fawstart2.size())) != RGY_MEMMEM_NOT_FOUND) { + fawmode = RGYFAWMode::Half; + appendFAWHalf(bufferIn.data(), bufferIn.size()); + bufferIn.clear(); + } else { + appendFAWMix(bufferIn.data(), bufferIn.size()); + if ( (ret0 = funcMemMemFAWStart1(bufferHalf0.data(), bufferHalf0.size())) != RGY_MEMMEM_NOT_FOUND + && (ret1 = funcMemMemFAWStart1(bufferHalf1.data(), bufferHalf1.size())) != RGY_MEMMEM_NOT_FOUND) { + fawmode = RGYFAWMode::Mix; + bufferIn.clear(); + } else { + bufferHalf0.clear(); + bufferHalf1.clear(); + } + } + } + if (fawmode == RGYFAWMode::Unknown) { + return -1; + } + if (!inputDataAppended) { + if (fawmode == RGYFAWMode::Full) { + bufferIn.append(input, inputLength); + } else if (fawmode == RGYFAWMode::Half) { + appendFAWHalf(input, inputLength); + } else if (fawmode == RGYFAWMode::Mix) { + appendFAWMix(input, inputLength); + } + inputDataAppended = true; + } + + // デコード + if (fawmode == RGYFAWMode::Full) { + decode(output[0], bufferIn); + } else if (fawmode == RGYFAWMode::Half) { + decode(output[0], bufferHalf0); + } else if (fawmode == RGYFAWMode::Mix) { + decode(output[0], bufferHalf0); + decode(output[1], bufferHalf1); + } + return 0; +} + +int RGYFAWDecoder::decode(std::vector& output, RGYFAWBitstream& input) { + while (input.size() > 0) { + auto ret = decodeBlock(output, input); + if (ret == 0) { + break; + } + } + return 0; +} + +int RGYFAWDecoder::decodeBlock(std::vector& output, RGYFAWBitstream& input) { + auto posStart = funcMemMemFAWStart1(input.data(), input.size()); + if (posStart == RGY_MEMMEM_NOT_FOUND) { + return 0; + } + input.parseAACHeader(input.data() + posStart + fawstart1.size()); + + auto posFin = funcMemMem(input.data() + posStart + fawstart1.size(), input.size() - posStart - fawstart1.size(), fawfin1.data(), fawfin1.size()); + if (posFin == RGY_MEMMEM_NOT_FOUND) { + return 0; + } + posFin += posStart + fawstart1.size(); // データの先頭からの位置に変更 + + // pos_start から pos_fin までの間に、別のfawstart1がないか探索する + while (posStart + fawstart1.size() < posFin) { + auto ret = funcMemMemFAWStart1(input.data() + posStart + fawstart1.size(), posFin - posStart - fawstart1.size()); + if (ret == RGY_MEMMEM_NOT_FOUND) { + break; + } + posStart += ret + fawstart1.size(); + input.parseAACHeader(input.data() + posStart + fawstart1.size()); + } + + if (posStart + fawstart1.size() + 4 >= posFin) { + // 無効なブロックなので破棄 + input.addOffset(posFin + fawfin1.size()); + return 1; + } + const size_t blockSize = posFin - posStart - fawstart1.size() - 4 /*checksum*/; + const uint32_t checksumCalc = faw_checksum_calc(input.data() + posStart + fawstart1.size(), blockSize); + const uint32_t checksumRead = faw_checksum_read(input.data() + posFin - 4); + // checksumとフレーム長が一致しない場合、そのデータは破棄 + if (checksumCalc != checksumRead || blockSize != input.aacFrameSize()) { + input.addOffset(posFin + fawfin1.size()); + return 1; + } + + // pos_start -> sample start + const auto posStartSample = input.inputSampleStart() + posStart / input.bytePerSample(); + //fprintf(stderr, "Found block: %lld\n", posStartSample); + + // 出力が先行していたらdrop + if (posStartSample + (AAC_BLOCK_SAMPLES / 2) < input.outputSamples()) { + input.addOffset(posFin + fawfin1.size()); + return 1; + } + + // 時刻ずれを無音データで補正 + while (input.outputSamples() + (AAC_BLOCK_SAMPLES/2) < posStartSample) { + //fprintf(stderr, "Insert silence: %lld: %lld -> %lld\n", posStartSample, input.outputSamples(), input.outputSamples() + AAC_BLOCK_SAMPLES); + addSilent(output, input); + } + + // ブロックを出力に追加 + const auto orig_size = output.size(); + output.resize(orig_size + blockSize); + memcpy(output.data() + orig_size, input.data() + posStart + fawstart1.size(), blockSize); + //fprintf(stderr, "Set block: %lld: %lld -> %lld\n", posStartSample, input.outputSamples(), input.outputSamples() + AAC_BLOCK_SAMPLES); + + input.addOutputSamples(AAC_BLOCK_SAMPLES); + input.addOffset(posFin + fawfin1.size()); + return 1; +} + +void RGYFAWDecoder::addSilent(std::vector& output, RGYFAWBitstream& input) { + auto ptrSilent = aac_silent0.data(); + auto dataSize = aac_silent0.size(); + switch (input.aacChannels()) { + case 0: + break; + case 1: + ptrSilent = aac_silent1.data(); + dataSize = aac_silent1.size(); + break; + case 6: + ptrSilent = aac_silent6.data(); + dataSize = aac_silent6.size(); + break; + case 2: + default: + ptrSilent = aac_silent2.data(); + dataSize = aac_silent2.size(); + break; + } + const auto orig_size = output.size(); + output.resize(orig_size + dataSize); + memcpy(output.data() + orig_size, ptrSilent, dataSize); + input.addOutputSamples(AAC_BLOCK_SAMPLES); +} + +void RGYFAWDecoder::fin(RGYFAWDecoderOutput& output) { + for (auto& b : output) { + b.clear(); + } + if (fawmode == RGYFAWMode::Full) { + fin(output[0], bufferIn); + } else if (fawmode == RGYFAWMode::Half) { + fin(output[0], bufferHalf0); + } else if (fawmode == RGYFAWMode::Mix) { + fin(output[0], bufferHalf0); + fin(output[1], bufferHalf1); + } +} + +void RGYFAWDecoder::fin(std::vector& output, RGYFAWBitstream& input) { + //fprintf(stderr, "Fin sample: %lld\n", input.inputSampleFin()); + while (input.outputSamples() + (AAC_BLOCK_SAMPLES / 2) < input.inputSampleFin()) { + //fprintf(stderr, "Insert silence: %lld -> %lld\n", input.outputSamples(), input.outputSamples() + AAC_BLOCK_SAMPLES); + addSilent(output, input); + } +} + +RGYFAWEncoder::RGYFAWEncoder() : + wavheader(), + fawmode(), + delaySamples(0), + inputAACPosByte(0), + outputFAWPosByte(0), + bufferIn(), + bufferTmp() { + +} + +RGYFAWEncoder::~RGYFAWEncoder() { + +} + +int RGYFAWEncoder::init(const RGYWAVHeader *data, const RGYFAWMode mode, const int delayMillisec) { + wavheader = *data; + fawmode = mode; + bufferTmp.setBytePerSample(wavheader.number_of_channels * wavheader.bits_per_sample / 8); + delaySamples = delayMillisec * (int)wavheader.sample_rate / 1000; + inputAACPosByte += delaySamples * bufferTmp.bytePerSample(); + return 0; +} + +int RGYFAWEncoder::encode(std::vector& output, const uint8_t *input, const size_t inputLength) { + output.clear(); + bufferTmp.clear(); + + if (fawmode == RGYFAWMode::Unknown) { + return -1; + } + + bufferIn.append(input, inputLength); + + const auto ret = rgy_find_aacsync_c(bufferIn.data(), bufferIn.size()); + if (ret == RGY_MEMMEM_NOT_FOUND) { + return 0; + } + bufferIn.addOffset(ret); + return encode(output); +} + +int RGYFAWEncoder::encode(std::vector& output) { + if (bufferIn.size() < AAC_HEADER_MIN_SIZE) { + return 0; + } + bufferIn.parseAACHeader(bufferIn.data()); + auto aacBlockSize = bufferIn.aacFrameSize(); + if (aacBlockSize > bufferIn.size()) { + return 0; + } + auto ret0 = rgy_find_aacsync_c(bufferIn.data() + aacBlockSize, bufferIn.size() - aacBlockSize); + while (ret0 != RGY_MEMMEM_NOT_FOUND) { + ret0 += aacBlockSize; + if (inputAACPosByte < outputFAWPosByte) { + ; // このブロックを破棄 + } else { + if (outputFAWPosByte < inputAACPosByte) { + const auto offsetBytes = inputAACPosByte - outputFAWPosByte; + const auto origSize = bufferTmp.size(); + bufferTmp.append(nullptr, (size_t)offsetBytes); + memset(bufferTmp.data() + origSize, 0, (size_t)offsetBytes); + outputFAWPosByte = inputAACPosByte; + } + // outputWavPosSample == inputAACPosSample + encodeBlock(bufferIn.data(), aacBlockSize); + } + inputAACPosByte += AAC_BLOCK_SAMPLES * bufferTmp.bytePerSample(); + + bufferIn.addOffset(ret0); + if (bufferIn.size() < AAC_HEADER_MIN_SIZE) { + break; + } + bufferIn.parseAACHeader(bufferIn.data()); + aacBlockSize = bufferIn.aacFrameSize(); + if (aacBlockSize > bufferIn.size()) { + break; + } + ret0 = rgy_find_aacsync_c(bufferIn.data() + aacBlockSize, bufferIn.size() - aacBlockSize); + } + + output.resize(bufferTmp.size()); + memcpy(output.data(), bufferTmp.data(), bufferTmp.size()); + bufferTmp.clear(); + return 0; +} + +void RGYFAWEncoder::encodeBlock(const uint8_t *data, const size_t dataLength) { + const uint32_t checksumCalc = faw_checksum_calc(data, dataLength); + + bufferTmp.append(fawstart1.data(), fawstart1.size()); + outputFAWPosByte += fawstart1.size(); + + bufferTmp.append(data, dataLength); + outputFAWPosByte += dataLength; + + bufferTmp.append((const uint8_t *)&checksumCalc, sizeof(checksumCalc)); + outputFAWPosByte += sizeof(checksumCalc); + + bufferTmp.append(fawfin1.data(), fawfin1.size()); + outputFAWPosByte += fawfin1.size(); +} + +int RGYFAWEncoder::fin(std::vector& output) { + output.clear(); + bufferIn.append(AACSYNC_BYTES.data(), AACSYNC_BYTES.size()); + auto ret = encode(output); + if (outputFAWPosByte < inputAACPosByte) { + // 残りのbyteを0で調整 + const auto offsetBytes = inputAACPosByte - outputFAWPosByte; + output.resize(output.size() + (size_t)offsetBytes, 0); + } + if (delaySamples < 0) { + // 負のdelayの場合、wavの長さを合わせるために0で埋める + const auto offsetBytes = -1 * delaySamples * bufferTmp.bytePerSample(); + output.resize(output.size() + offsetBytes, 0); + } + //最終出力は4byte少ない (先頭に4byte入れたためと思われる) + if (output.size() > 4) { + output.resize(output.size() - 4); + } + return ret; +} diff --git a/auoCommon/rgy_faw.h b/auoCommon/rgy_faw.h new file mode 100644 index 0000000..7836e1b --- /dev/null +++ b/auoCommon/rgy_faw.h @@ -0,0 +1,189 @@ +// ----------------------------------------------------------------------------------------- +// QSVEnc/NVEnc by rigaya +// ----------------------------------------------------------------------------------------- +// The MIT License +// +// Copyright (c) 2023 rigaya +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. +// +// -------------------------------------------------------------------------------------------- + +#ifndef __RGY_FAW_H__ +#define __RGY_FAW_H__ + +#include +#include +#include +#include "rgy_wav_parser.h" +#include "rgy_memmem.h" + +static const std::array fawstart1 = { + 0x72, 0xF8, 0x1F, 0x4E, 0x07, 0x01, 0x00, 0x00 +}; +static const std::array fawstart2 = { + 0x00, 0xF2, 0x00, 0x78, 0x00, 0x9F, 0x00, 0xCE, + 0x00, 0x87, 0x00, 0x81, 0x00, 0x80, 0x00, 0x80 +}; +static const std::array fawfin1 = { + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x45, 0x4E, 0x44, 0x00 +}; +static const std::array fawfin2 = { + 0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80, + 0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80, + 0x00, 0xC5, 0x00, 0xCE, 0x00, 0xC4, 0x00, 0x80 +}; + +size_t rgy_memmem_fawstart1_c(const void *data_, const size_t data_size); +size_t rgy_memmem_fawstart1_avx2(const void *data_, const size_t data_size); +size_t rgy_memmem_fawstart1_avx512bw(const void *data_, const size_t data_size); + +void rgy_convert_audio_16to8(uint8_t *dst, const short *src, const size_t n); +void rgy_convert_audio_16to8_avx2(uint8_t *dst, const short *src, const size_t n); + +void rgy_split_audio_16to8x2(uint8_t *dst0, uint8_t *dst1, const short *src, const size_t n); +void rgy_split_audio_16to8x2_avx2(uint8_t *dst0, uint8_t *dst1, const short *src, const size_t n); + +using RGYFAWDecoderOutput = std::array, 2>; + +enum class RGYFAWMode { + Unknown, + Full, + Half, + Mix +}; + +static const int AAC_HEADER_MIN_SIZE = 7; +static const uint32_t AAC_BLOCK_SAMPLES = 1024; + +struct RGYAACHeader { + bool id; + bool protection; + int profile; // 00 ... main, 01 ... lc, 10 ... ssr + int samplerate; + bool private_bit; + uint32_t channel; + bool original; + bool home; + bool copyright; + bool copyright_start; + uint32_t aac_frame_length; // AACヘッダを含む + int adts_buffer_fullness; + int no_raw_data_blocks_in_frame; + + void parse(const uint8_t *buffer); + int sampleRateIdxToRate(const uint32_t idx); +}; + +class RGYFAWBitstream { +private: + std::vector buffer; + size_t bufferOffset; + size_t bufferLength; + + int bytePerWholeSample; // channels * bits per sample + uint64_t inputLengthByte; + uint64_t outSamples; + + RGYAACHeader aacHeader; +public: + RGYFAWBitstream(); + ~RGYFAWBitstream(); + + void setBytePerSample(const int val); + + uint8_t *data() { return buffer.data() + bufferOffset; } + const uint8_t *data() const { return buffer.data() + bufferOffset; } + size_t size() const { return bufferLength; } + uint64_t inputLength() const { return inputLengthByte; } + uint64_t inputSampleStart() const { return (inputLengthByte - bufferLength) / bytePerWholeSample; } + uint64_t inputSampleFin() const { return inputLengthByte / bytePerWholeSample; } + uint64_t outputSamples() const { return outSamples; } + int bytePerSample() const { return bytePerWholeSample; } + + void addOffset(size_t offset); + void addOutputSamples(size_t samples); + + void append(const uint8_t *input, const size_t inputLength); + + void clear(); + + void parseAACHeader(const uint8_t *buffer); + uint32_t aacChannels() const; + uint32_t aacFrameSize() const; +}; + +class RGYFAWDecoder { +private: + RGYWAVHeader wavheader; + RGYFAWMode fawmode; + + RGYFAWBitstream bufferIn; + + RGYFAWBitstream bufferHalf0; + RGYFAWBitstream bufferHalf1; + + decltype(rgy_memmem_c)* funcMemMem; + decltype(rgy_memmem_fawstart1_c)* funcMemMemFAWStart1; + decltype(rgy_convert_audio_16to8)* funcAudio16to8; + decltype(rgy_split_audio_16to8x2)* funcSplitAudio16to8x2; +public: + RGYFAWDecoder(); + ~RGYFAWDecoder(); + + RGYFAWMode mode() const { return fawmode; } + int init(const uint8_t *data); + int init(const RGYWAVHeader *data); + int decode(RGYFAWDecoderOutput& output, const uint8_t *data, const size_t dataLength); + void fin(RGYFAWDecoderOutput& output); +private: + void appendFAWHalf(const uint8_t *data, const size_t dataLength); + void appendFAWMix(const uint8_t *data, const size_t dataLength); + + void setWavInfo(); + int decode(std::vector& output, RGYFAWBitstream& input); + int decodeBlock(std::vector& output, RGYFAWBitstream& input); + void addSilent(std::vector& output, RGYFAWBitstream& input); + void fin(std::vector& output, RGYFAWBitstream& input); +}; + +class RGYFAWEncoder { +private: + RGYWAVHeader wavheader; + RGYFAWMode fawmode; + int delaySamples; + + int64_t inputAACPosByte; + int64_t outputFAWPosByte; + RGYFAWBitstream bufferIn; + RGYFAWBitstream bufferTmp; +public: + RGYFAWEncoder(); + ~RGYFAWEncoder(); + + int init(const RGYWAVHeader *data, const RGYFAWMode mode, const int delayMillisec); + int encode(std::vector& output, const uint8_t *data, const size_t dataLength); + int fin(std::vector& output); +private: + int encode(std::vector& output); + void encodeBlock(const uint8_t *data, const size_t dataLength); +}; + +#endif //__RGY_FAW_H__ \ No newline at end of file diff --git a/auoCommon/rgy_faw_avx2.cpp b/auoCommon/rgy_faw_avx2.cpp new file mode 100644 index 0000000..8043b6e --- /dev/null +++ b/auoCommon/rgy_faw_avx2.cpp @@ -0,0 +1,98 @@ +// ----------------------------------------------------------------------------------------- +// QSVEnc/NVEnc by rigaya +// ----------------------------------------------------------------------------------------- +// The MIT License +// +// Copyright (c) 2023 rigaya +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. +// +// -------------------------------------------------------------------------------------------- + +#define RGY_MEMMEM_AVX2 +#include "rgy_faw.h" + +#if defined(_M_IX86) || defined(_M_X64) || defined(__x86_64) + +size_t rgy_memmem_fawstart1_avx2(const void *data_, const size_t data_size) { + return rgy_memmem_avx2_imp(data_, data_size, fawstart1.data(), fawstart1.size()); +} + +void rgy_convert_audio_16to8_avx2(uint8_t *dst, const short *src, const size_t n) { + uint8_t *byte = dst; + const short *sh = src; + uint8_t * const loop_start = (uint8_t *)(((size_t)dst + 31) & ~31); + uint8_t * const loop_fin = (uint8_t *)(((size_t)dst + n) & ~31); + uint8_t * const fin = dst + n; + __m256i ySA, ySB; + static const __m256i yConst = _mm256_set1_epi16(128); + //アライメント調整 + while (byte < loop_start) { + *byte = (*sh >> 8) + 128; + byte++; + sh++; + } + //メインループ + while (byte < loop_fin) { + ySA = _mm256_set_m128i(_mm_loadu_si128((const __m128i*)(sh + 16)), _mm_loadu_si128((const __m128i*)(sh + 0))); + ySB = _mm256_set_m128i(_mm_loadu_si128((const __m128i*)(sh + 24)), _mm_loadu_si128((const __m128i*)(sh + 8))); + ySA = _mm256_srai_epi16(ySA, 8); + ySB = _mm256_srai_epi16(ySB, 8); + ySA = _mm256_add_epi16(ySA, yConst); + ySB = _mm256_add_epi16(ySB, yConst); + ySA = _mm256_packus_epi16(ySA, ySB); + _mm256_stream_si256((__m256i *)byte, ySA); + sh += 32; + byte += 32; + } + //残り + while (byte < fin) { + *byte = (*sh >> 8) + 128; + byte++; + sh++; + } +} + +void rgy_split_audio_16to8x2_avx2(uint8_t *dst0, uint8_t *dst1, const short *src, const size_t n) { + const short *sh = src; + const short *sh_fin = src + (n & ~15); + __m256i y0, y1, y2, y3; + __m256i yMask = _mm256_srli_epi16(_mm256_cmpeq_epi8(_mm256_setzero_si256(), _mm256_setzero_si256()), 8); + __m256i yConst = _mm256_set1_epi8(-128); + for (; sh < sh_fin; sh += 32, dst0 += 32, dst1 += 32) { + y0 = _mm256_set_m128i(_mm_loadu_si128((const __m128i*)(sh + 16)), _mm_loadu_si128((const __m128i*)(sh + 0))); + y1 = _mm256_set_m128i(_mm_loadu_si128((const __m128i*)(sh + 24)), _mm_loadu_si128((const __m128i*)(sh + 8))); + y2 = _mm256_and_si256(y0, yMask); //Lower8bit + y3 = _mm256_and_si256(y1, yMask); //Lower8bit + y0 = _mm256_srli_epi16(y0, 8); //Upper8bit + y1 = _mm256_srli_epi16(y1, 8); //Upper8bit + y2 = _mm256_packus_epi16(y2, y3); + y0 = _mm256_packus_epi16(y0, y1); + y2 = _mm256_add_epi8(y2, yConst); + y0 = _mm256_add_epi8(y0, yConst); + _mm256_storeu_si256((__m256i*)dst0, y0); + _mm256_storeu_si256((__m256i*)dst1, y2); + } + sh_fin = sh + (n & 15); + for (; sh < sh_fin; sh++, dst0++, dst1++) { + *dst0 = (*sh >> 8) + 128; + *dst1 = (*sh & 0xff) + 128; + } +} +#endif diff --git a/auoCommon/rgy_memmem.cpp b/auoCommon/rgy_memmem.cpp new file mode 100644 index 0000000..a422d79 --- /dev/null +++ b/auoCommon/rgy_memmem.cpp @@ -0,0 +1,55 @@ +// ----------------------------------------------------------------------------------------- +// QSVEnc/NVEnc by rigaya +// ----------------------------------------------------------------------------------------- +// The MIT License +// +// Copyright (c) 2023 rigaya +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. +// +// -------------------------------------------------------------------------------------------- + +#include +#include +#include "rgy_simd.h" +#include "rgy_memmem.h" + +size_t rgy_memmem_c(const void *data_, const size_t data_size, const void *target_, const size_t target_size) { + const uint8_t *data = (const uint8_t *)data_; + if (data_size < target_size) { + return RGY_MEMMEM_NOT_FOUND; + } + for (size_t i = 0; i <= data_size - target_size; i++) { + if (memcmp(data + i, target_, target_size) == 0) { + return i; + } + } + return RGY_MEMMEM_NOT_FOUND; +} + +decltype(rgy_memmem_c)* get_memmem_func() { +#if defined(_M_IX86) || defined(_M_X64) || defined(__x86_64) + const auto simd = get_availableSIMD(); +#if defined(_M_X64) || defined(__x86_64) + if ((simd & RGY_SIMD::AVX512BW) == RGY_SIMD::AVX512BW) return rgy_memmem_avx512bw; +#endif + if ((simd & RGY_SIMD::AVX2) == RGY_SIMD::AVX2) return rgy_memmem_avx2; +#endif + return rgy_memmem_c; +} diff --git a/auoCommon/rgy_memmem.h b/auoCommon/rgy_memmem.h new file mode 100644 index 0000000..e598d1e --- /dev/null +++ b/auoCommon/rgy_memmem.h @@ -0,0 +1,223 @@ +// ----------------------------------------------------------------------------------------- +// QSVEnc/NVEnc by rigaya +// ----------------------------------------------------------------------------------------- +// The MIT License +// +// Copyright (c) 2023 rigaya +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. +// +// -------------------------------------------------------------------------------------------- + +#ifndef __RGY_MEMMEM_H__ +#define __RGY_MEMMEM_H__ + +#include +#include +#include +#include +#include "rgy_osdep.h" + +size_t rgy_memmem_c(const void *data_, const size_t data_size, const void *target_, const size_t target_size); +size_t rgy_memmem_avx2(const void *data_, const size_t data_size, const void *target_, const size_t target_size); +size_t rgy_memmem_avx512bw(const void *data_, const size_t data_size, const void *target_, const size_t target_size); + +static const auto RGY_MEMMEM_NOT_FOUND = std::numeric_limits::max(); + +decltype(rgy_memmem_c)* get_memmem_func(); + +#if defined(RGY_MEMMEM_AVX2) + +#if defined(_M_IX86) || defined(_M_X64) || defined(__x86_64) + +#include + +#if _MSC_VER >= 1800 && !defined(__AVX__) && !defined(_DEBUG) +static_assert(false, "do not forget to set /arch:AVX or /arch:AVX2 for this file."); +#endif + +#define CLEAR_LEFT_BIT(x) ((x) & ((x) - 1)) + +#if defined(_WIN32) || defined(_WIN64) +#define CTZ32(x) _tzcnt_u32(x) +#define CTZ64(x) _tzcnt_u64(x) +#else +#define CTZ32(x) __builtin_ctz(x) +#define CTZ64(x) __builtin_ctzll(x) +#endif + +static RGY_FORCEINLINE __m256i _mm256_srlv256_epi8(const __m256i& v, const int shift) { + alignas(64) static const uint8_t shufbtable[] = { + 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, + 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + }; + const __m256i mask = _mm256_broadcastsi128_si256(_mm_loadu_si128((const __m128i*)(shufbtable + shift + 16))); + const __m256i a0 = _mm256_shuffle_epi8(v, mask); + const __m256i a1 = _mm256_shuffle_epi8(_mm256_permute2x128_si256(v, v, 0x80 + 0x01), _mm256_loadu_si256((const __m256i*)(shufbtable + shift))); + return _mm256_or_si256(a0, a1); +} + +static RGY_FORCEINLINE __m256i _mm256_loadu_si256_no_page_overread(const uint8_t *const data, const uint8_t *const data_fin) { + const size_t page_size = 4096; + const size_t load_size = 32; // 256bit + const auto size = data_fin - data; + const size_t datapageaddress = ((size_t)data & (page_size - 1)); + if (datapageaddress > (page_size - load_size) && (datapageaddress + size) <= page_size) { //ページ境界をまたぐ可能性があるか? + const auto loadaddress = (const uint8_t *const)((size_t)data & (~(load_size - 1))); + const int shift = (int)(data - loadaddress); // ロードを引き戻す量 + __m256i y0 = _mm256_loadu_si256((const __m256i*)loadaddress); + return _mm256_srlv256_epi8(y0, shift); + } else { + return _mm256_loadu_si256((const __m256i*)data); + } +} + +static RGY_FORCEINLINE size_t rgy_memmem_avx2_imp(const void *data_, const size_t data_size, const void *target_, const size_t target_size) { + if (data_size < target_size) { + return RGY_MEMMEM_NOT_FOUND; + } + uint8_t *data = (uint8_t *)data_; + const uint8_t *target = (const uint8_t *)target_; + const __m256i target_first = _mm256_set1_epi8(target[0]); + const __m256i target_last = _mm256_set1_epi8(target[target_size - 1]); + const int64_t fin64 = (int64_t)data_size - (int64_t)(target_size + 32 - 1); // r1の32byteロードが安全に行える限界 + size_t i = 0; + if (fin64 > 0) { + const size_t fin = (size_t)fin64; + //まずは単純なロードで行えるところまでループ + for (; i < fin; i += 32) { + const __m256i r0 = _mm256_loadu_si256((const __m256i*)(data + i)); + const __m256i r1 = _mm256_loadu_si256((const __m256i*)(data + i + target_size - 1)); + uint32_t mask = _mm256_movemask_epi8(_mm256_and_si256(_mm256_cmpeq_epi8(r0, target_first), _mm256_cmpeq_epi8(r1, target_last))); + while (mask != 0) { + const auto j = CTZ32(mask); + if (memcmp(data + i + j + 1, target + 1, target_size - 2) == 0) { + const auto ret = i + j; + return ret; + } + mask = CLEAR_LEFT_BIT(mask); + } + } + } + //確保されているメモリ領域のページ境界を考慮しながらロード + uint8_t *data_fin = data + data_size; + for (; i < data_size; i += 32) { + const __m256i r0 = _mm256_loadu_si256_no_page_overread(data + i, data_fin); + const __m256i r1 = _mm256_loadu_si256_no_page_overread(data + i + target_size - 1, data_fin); + uint32_t mask = _mm256_movemask_epi8(_mm256_and_si256(_mm256_cmpeq_epi8(r0, target_first), _mm256_cmpeq_epi8(r1, target_last))); + while (mask != 0) { + const auto j = CTZ32(mask); + if ((i + j + target_size - 1 < data_size) + && memcmp(data + i + j + 1, target + 1, target_size - 2) == 0) { + const auto ret = i + j; + return ret < data_size ? ret : RGY_MEMMEM_NOT_FOUND; + } + mask = CLEAR_LEFT_BIT(mask); + } + } + return RGY_MEMMEM_NOT_FOUND; +} +#endif //#if defined(_M_IX86) || defined(_M_X64) || defined(__x86_64) + +#elif defined(RGY_MEMMEM_AVX512) + +#if defined(_M_X64) || defined(__x86_64) + +#include + +#if _MSC_VER >= 1800 && !defined(__AVX512BW__) && !defined(_DEBUG) +static_assert(false, "do not forget to set /arch:AVX512 for this file."); +#endif + +#define CLEAR_LEFT_BIT(x) ((x) & ((x) - 1)) + +#if defined(_WIN32) || defined(_WIN64) +#define CTZ32(x) _tzcnt_u32(x) +#define CTZ64(x) _tzcnt_u64(x) +#else +#define CTZ32(x) __builtin_ctz(x) +#define CTZ64(x) __builtin_ctzll(x) +#endif + +static RGY_FORCEINLINE __m512i _mm512_loadu_si512_exact(const uint8_t *const data, const uint8_t *const data_fin) { + alignas(64) static const uint8_t inctable[] = { + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, + 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, + 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63 + }; + const __m512i inc = _mm512_load_si512((const __m512i*)inctable); + const uint8_t remain_size = (uint8_t)std::min(data_fin - data, 64); + const auto mask = _mm512_cmplt_epi8_mask(inc, _mm512_set1_epi8(remain_size)); + return _mm512_maskz_loadu_epi8(mask, (const __m512i*)data); +} + +static RGY_FORCEINLINE size_t rgy_memmem_avx512_imp(const void *data_, const size_t data_size, const void *target_, const size_t target_size) { + if (data_size < target_size) { + return RGY_MEMMEM_NOT_FOUND; + } + uint8_t *data = (uint8_t *)data_; + const uint8_t *target = (const uint8_t *)target_; + const __m512i target_first = _mm512_set1_epi8(target[0]); + const __m512i target_last = _mm512_set1_epi8(target[target_size - 1]); + const int64_t fin64 = (int64_t)data_size - (int64_t)(target_size + 64 - 1); // r1の64byteロードが安全に行える限界 + size_t i = 0; + if (fin64 > 0) { + const size_t fin = (size_t)fin64; + //まずは単純なロードで行えるところまでループ + for (; i < fin; i += 64) { + const __m512i r0 = _mm512_loadu_si512((const __m512i*)(data + i)); + const __m512i r1 = _mm512_loadu_si512((const __m512i*)(data + i + target_size - 1)); + uint64_t mask = _mm512_mask_cmpeq_epi8_mask(_mm512_cmpeq_epi8_mask(r0, target_first), r1, target_last); + while (mask != 0) { + const auto j = CTZ64(mask); + if (memcmp(data + i + j + 1, target + 1, target_size - 2) == 0) { + const auto ret = i + j; + return ret; + } + mask = CLEAR_LEFT_BIT(mask); + } + } + } + //ロード範囲をmaskで考慮しながらロード + uint8_t *data_fin = data + data_size; + for (; i < data_size; i += 64) { + const __m512i r0 = _mm512_loadu_si512_exact(data + i, data_fin); + const __m512i r1 = _mm512_loadu_si512_exact(data + i + target_size - 1, data_fin); + uint64_t mask = _mm512_mask_cmpeq_epi8_mask(_mm512_cmpeq_epi8_mask(r0, target_first), r1, target_last); + while (mask != 0) { + const auto j = CTZ64(mask); + if ((i + j + target_size - 1 < data_size) + && memcmp(data + i + j + 1, target + 1, target_size - 2) == 0) { + const auto ret = i + j; + return ret; + } + mask = CLEAR_LEFT_BIT(mask); + } + } + return RGY_MEMMEM_NOT_FOUND; +} + +#endif //#if defined(_M_X64) || defined(__x86_64) + +#endif //#if defined(RGY_MEMMEM_AVX2) + +#endif //__RGY_MEMMEM_H__ \ No newline at end of file diff --git a/auoCommon/rgy_memmem_avx2.cpp b/auoCommon/rgy_memmem_avx2.cpp new file mode 100644 index 0000000..69bd75e --- /dev/null +++ b/auoCommon/rgy_memmem_avx2.cpp @@ -0,0 +1,35 @@ +// ----------------------------------------------------------------------------------------- +// QSVEnc/NVEnc by rigaya +// ----------------------------------------------------------------------------------------- +// The MIT License +// +// Copyright (c) 2023 rigaya +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. +// +// -------------------------------------------------------------------------------------------- + +#define RGY_MEMMEM_AVX2 +#include "rgy_memmem.h" + +#if defined(_M_IX86) || defined(_M_X64) || defined(__x86_64) +size_t rgy_memmem_avx2(const void *data_, const size_t data_size, const void *target_, const size_t target_size) { + return rgy_memmem_avx2_imp(data_, data_size, target_, target_size); +} +#endif diff --git a/auoCommon/rgy_osdep.h b/auoCommon/rgy_osdep.h new file mode 100644 index 0000000..db746ca --- /dev/null +++ b/auoCommon/rgy_osdep.h @@ -0,0 +1,336 @@ +// ----------------------------------------------------------------------------------------- +// QSVEnc/NVEnc by rigaya +// ----------------------------------------------------------------------------------------- +// The MIT License +// +// Copyright (c) 2011-2016 rigaya +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. +// +// -------------------------------------------------------------------------------------------- + +#pragma once +#ifndef __RGY_OSDEP_H__ +#define __RGY_OSDEP_H__ + +#if defined(_MSC_VER) +#ifndef RGY_FORCEINLINE +#define RGY_FORCEINLINE __forceinline +#endif +#ifndef RGY_NOINLINE +#define RGY_NOINLINE __declspec(noinline) +#endif +#else +#ifndef RGY_FORCEINLINE +#define RGY_FORCEINLINE inline +#endif +#ifndef RGY_NOINLINE +#define RGY_NOINLINE __attribute__ ((noinline)) +#endif +#endif + +#if defined(_WIN32) || defined(_WIN64) +#define WIN32_LEAN_AND_MEAN +#define NOMINMAX +#include +#include +#include +#include +#include +#pragma comment(lib, "winmm.lib") +#include +#define RGY_LOAD_LIBRARY(x) LoadLibrary(x) +#define RGY_GET_PROC_ADDRESS GetProcAddress +#define RGY_FREE_LIBRARY FreeLibrary + +static bool RGYThreadStillActive(HANDLE handle) { + DWORD exitCode = 0; + return GetExitCodeThread(handle, &exitCode) == STILL_ACTIVE; +} + +static int getStdInKey() { + static HANDLE hStdInHandle = NULL; + static bool stdin_from_console = false; + if (hStdInHandle == NULL) { + hStdInHandle = GetStdHandle(STD_INPUT_HANDLE); + DWORD mode = 0; + stdin_from_console = GetConsoleMode(hStdInHandle, &mode) != 0; + } + if (stdin_from_console) { + if (_kbhit()) { + return _getch(); + } + } + return 0; +} + +#else //#if defined(_WIN32) || defined(_WIN64) +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#ifndef _GNU_SOURCE +#define _GNU_SOURCE +#endif +#include +#include + +static inline void *_aligned_malloc(size_t size, size_t alignment) { + void *p = nullptr; + int ret = posix_memalign(&p, alignment, size); + return (ret == 0) ? p : 0; +} +#define _aligned_free free + +typedef wchar_t WCHAR; +typedef int BOOL; +typedef void* HANDLE; +typedef void* HMODULE; +typedef void* HINSTANCE; +typedef int errno_t; + +#define RGY_LOAD_LIBRARY(x) dlopen((x), RTLD_LAZY) +#define RGY_GET_PROC_ADDRESS dlsym +#define RGY_FREE_LIBRARY dlclose + +static uint32_t CP_THREAD_ACP = 0; +static uint32_t CP_UTF8 = 0; + +#define __stdcall +#define __fastcall + +template +char (*__countof_helper(_CountofType (&_Array)[_SizeOfArray]))[_SizeOfArray]; +#define _countof(_Array) (int)sizeof(*__countof_helper(_Array)) + +#ifndef TRUE +#define TRUE (1) +#endif + +#ifndef FALSE +#define FALSE (0) +#endif + +struct LUID { + int LowPart; + int HighPart; +}; + +static inline char *strtok_s(char *strToken, const char *strDelimit, char **context) { + return strtok(strToken, strDelimit); +} +static inline char *strcpy_s(char *dst, size_t size, const char *src) { + return strcpy(dst, src); +} +static inline char *strcpy_s(char *dst, const char *src) { + return strcpy(dst, src); +} +static inline char *strncpy_s(char *dst, size_t numberOfElements, const char *src, size_t count) { + return strncpy(dst, src, count); +} +static inline char *strncpy_s(char *dst, const char *src, size_t count) { + return strncpy(dst, src, count); +} +static inline char *strcat_s(char *dst, size_t size, const char *src) { + return strcat(dst, src); +} +static inline int _vsprintf_s(char *buffer, size_t size, const char *format, va_list argptr) { + return vsprintf(buffer, format, argptr); +} +#define sscanf_s sscanf +#define swscanf_s swscanf +#define vsprintf_s(buf, size, fmt, va) vsprintf(buf, fmt, va) +#define vswprintf_s vswprintf +#define _strnicmp strncasecmp +#define stricmp strcasecmp +#define _stricmp stricmp +#define wcsicmp wcscasecmp +#define _wcsicmp wcsicmp + +static short _InterlockedIncrement16(volatile short *pVariable) { + return __sync_add_and_fetch((volatile short*)pVariable, 1); +} + +static short _InterlockedDecrement16(volatile short *pVariable) { + return __sync_sub_and_fetch((volatile short*)pVariable, 1); +} + +static int32_t _InterlockedIncrement(volatile int32_t *pVariable) { + return __sync_add_and_fetch((volatile int32_t*)pVariable, 1); +} + +static int32_t _InterlockedDecrement(volatile int32_t *pVariable) { + return __sync_sub_and_fetch((volatile int32_t*)pVariable, 1); +} + +static inline int _vscprintf(const char * format, va_list pargs) { + int retval; + va_list argcopy; + va_copy(argcopy, pargs); + retval = vsnprintf(NULL, 0, format, argcopy); + va_end(argcopy); + return retval; +} + +static inline int _vscwprintf(const WCHAR * format, va_list pargs) { + int retval; + va_list argcopy; + va_copy(argcopy, pargs); + retval = vswprintf(NULL, 0, format, argcopy); + va_end(argcopy); + return retval; +} + +static inline int sprintf_s(char *dst, const char* format, ...) { + va_list args; + va_start(args, format); + int ret = vsprintf(dst, format, args); + va_end(args); + return ret; +} +static inline int sprintf_s(char *dst, size_t size, const char* format, ...) { + va_list args; + va_start(args, format); + int ret = vsprintf(dst, format, args); + va_end(args); + return ret; +} + +static inline int fopen_s(FILE **pfp, const char *filename, const char *mode) { + FILE *fp = fopen(filename, mode); + *pfp = fp; + return (fp == NULL) ? 1 : 0; +} + +static uint32_t GetCurrentProcessId() { + pid_t pid = getpid(); + return (uint32_t)pid; +} + +static pid_t GetCurrentProcess() { + return getpid(); +} + +static pthread_t GetCurrentThread() { + return pthread_self(); +} + +static size_t SetProcessAffinityMask(pid_t process, size_t mask) { + cpu_set_t cpuset_org; + CPU_ZERO(&cpuset_org); + sched_getaffinity(process, sizeof(cpu_set_t), &cpuset_org); + size_t mask_org = 0x00; + for (uint32_t j = 0; j < sizeof(mask_org) * 8; j++) { + if (CPU_ISSET(j, &cpuset_org)) { + mask_org |= ((size_t)1u << j); + } + } + cpu_set_t cpuset; + CPU_ZERO(&cpuset); + for (uint32_t j = 0; j < sizeof(mask) * 8; j++) { + if (mask & (1 << j)) { + CPU_SET(j, &cpuset); + } + } + sched_setaffinity(process, sizeof(cpu_set_t), &cpuset); + return mask_org; +} + +static size_t SetThreadAffinityMask(pthread_t thread, size_t mask) { + cpu_set_t cpuset_org; + CPU_ZERO(&cpuset_org); + pthread_getaffinity_np(thread, sizeof(cpu_set_t), &cpuset_org); + size_t mask_org = 0x00; + for (uint32_t j = 0; j < sizeof(mask_org) * 8; j++) { + if (CPU_ISSET(j, &cpuset_org)) { + mask_org |= ((size_t)1u << j); + } + } + cpu_set_t cpuset; + CPU_ZERO(&cpuset); + for (uint32_t j = 0; j < sizeof(mask) * 8; j++) { + if (mask & (1 << j)) { + CPU_SET(j, &cpuset); + } + } + pthread_setaffinity_np(thread, sizeof(cpu_set_t), &cpuset); + return mask_org; +} + +static bool RGYThreadStillActive(pthread_t thread) { + return pthread_tryjoin_np(thread, nullptr) != 0; +} + +enum { + THREAD_PRIORITY_NORMAL, + THREAD_PRIORITY_HIGHEST, + THREAD_PRIORITY_ABOVE_NORMAL, + THREAD_PRIORITY_BELOW_NORMAL, + THREAD_PRIORITY_LOWEST, + THREAD_PRIORITY_IDLE, +}; + +static void SetThreadPriority(pthread_t thread, int priority) { + return; //何もしない +} + +static void SetPriorityClass(pid_t thread, int priority) { + return; //何もしない +} + +static int getStdInKey() { +#if 0 // stdinで読み込む場合と干渉してしまうので、無効化する + const int stdInFd = 0; // 0 = stdin + fd_set fdStdIn; + FD_ZERO(&fdStdIn); + FD_SET(stdInFd, &fdStdIn); + + struct timeval timeout = { 0 }; + if (select(stdInFd+1, &fdStdIn, NULL, NULL, &timeout) > 0) { + char key = 0; + if (read(0, &key, 1) == 1) { + return key; + } + } +#endif + return 0; +} + +#define _fread_nolock fread +#define _fwrite_nolock fwrite +#define _fgetc_nolock fgetc +#define _fseeki64 fseek +#define _ftelli64 ftell + +#endif //#if defined(_WIN32) || defined(_WIN64) + +static bool stdInAbort() { + const auto key = getStdInKey(); + return (key == 'q' || key == 'Q'); +} + +#endif //__RGY_OSDEP_H__ diff --git a/auoCommon/rgy_simd.cpp b/auoCommon/rgy_simd.cpp new file mode 100644 index 0000000..2a431d2 --- /dev/null +++ b/auoCommon/rgy_simd.cpp @@ -0,0 +1,88 @@ +// ----------------------------------------------------------------------------------------- +// QSVEnc/NVEnc by rigaya +// ----------------------------------------------------------------------------------------- +// The MIT License +// +// Copyright (c) 2011-2016 rigaya +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. +// +// -------------------------------------------------------------------------------------------- + +#include +#include "rgy_osdep.h" +#include "rgy_simd.h" +#include "rgy_arch.h" +#if defined(_M_IX86) || defined(_M_X64) || defined(__x86_64) +#if _MSC_VER +#include +#else +#include +#endif //_MSC_VER + +RGY_SIMD get_availableSIMD() { + int CPUInfo[4]; + __cpuid(CPUInfo, 1); + RGY_SIMD simd = RGY_SIMD::NONE; + if (CPUInfo[3] & 0x04000000) simd |= RGY_SIMD::SSE2; + if (CPUInfo[2] & 0x00000001) simd |= RGY_SIMD::SSE3; + if (CPUInfo[2] & 0x00000200) simd |= RGY_SIMD::SSSE3; + if (CPUInfo[2] & 0x00080000) simd |= RGY_SIMD::SSE41; + if (CPUInfo[2] & 0x00100000) simd |= RGY_SIMD::SSE42; + if (CPUInfo[2] & 0x00800000) simd |= RGY_SIMD::POPCNT; + uint64_t xgetbv = 0; + if ((CPUInfo[2] & 0x18000000) == 0x18000000) { +#if _MSC_VER + xgetbv = _xgetbv(0); +#else + xgetbv = rgy_xgetbv(0); +#endif + if ((xgetbv & 0x06) == 0x06) + simd |= RGY_SIMD::AVX; + } + __cpuid(CPUInfo, 7); + if (!!(simd & RGY_SIMD::AVX) && (CPUInfo[1] & 0x00000020)) { + simd |= RGY_SIMD::AVX2; + } + if (!!(simd & RGY_SIMD::AVX) && ((xgetbv >> 5) & 7) == 7) { + if (CPUInfo[1] & (1u << 3)) simd |= RGY_SIMD::BMI1; + if (CPUInfo[1] & (1u << 8)) simd |= RGY_SIMD::BMI2; + if (CPUInfo[1] & (1u << 16)) simd |= RGY_SIMD::AVX512F; + if (!!(simd & RGY_SIMD::AVX512F)) { + if (CPUInfo[1] & (1u << 17)) simd |= RGY_SIMD::AVX512DQ; + if (CPUInfo[1] & (1u << 21)) simd |= RGY_SIMD::AVX512IFMA; + if (CPUInfo[1] & (1u << 26)) simd |= RGY_SIMD::AVX512PF; + if (CPUInfo[1] & (1u << 27)) simd |= RGY_SIMD::AVX512ER; + if (CPUInfo[1] & (1u << 28)) simd |= RGY_SIMD::AVX512CD; + if (CPUInfo[1] & (1u << 30)) simd |= RGY_SIMD::AVX512BW; + if (CPUInfo[1] & (1u << 31)) simd |= RGY_SIMD::AVX512VL; + if (CPUInfo[2] & (1u << 1)) simd |= RGY_SIMD::AVX512VBMI; + if (CPUInfo[2] & (1u << 6)) simd |= RGY_SIMD::AVX512VBMI2; + if (CPUInfo[2] & (1u << 11)) simd |= RGY_SIMD::AVX512VNNI; + if (CPUInfo[2] & (1u << 12)) simd |= RGY_SIMD::AVX512BITALG; + if (CPUInfo[2] & (1u << 14)) simd |= RGY_SIMD::AVX512VPOPCNTDQ; + } + } + return simd; +} +#else +RGY_SIMD get_availableSIMD() { + return RGY_SIMD::NONE; +} +#endif diff --git a/auoCommon/rgy_simd.h b/auoCommon/rgy_simd.h new file mode 100644 index 0000000..3224f8e --- /dev/null +++ b/auoCommon/rgy_simd.h @@ -0,0 +1,96 @@ +// ----------------------------------------------------------------------------------------- +// QSVEnc/NVEnc by rigaya +// ----------------------------------------------------------------------------------------- +// The MIT License +// +// Copyright (c) 2011-2016 rigaya +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. +// +// ------------------------------------------------------------------------------------------- + +#pragma once +#ifndef __RGY_SIMD_H__ +#define __RGY_SIMD_H__ + +#include +#include + +#ifndef _MSC_VER + +#ifndef __forceinline +#define __forceinline __attribute__((always_inline)) +#endif + +#endif //#ifndef _MSC_VER + +enum class RGY_SIMD : uint64_t { + NONE = 0x000000, + SSE2 = 0x000001, + SSE3 = 0x000002, + SSSE3 = 0x000004, + SSE41 = 0x000008, + SSE42 = 0x000010, + POPCNT = 0x000020, + AVX = 0x000040, + AVX2 = 0x000080, + BMI1 = 0x000100, + BMI2 = 0x000200, + AVX512F = 0x000400, + AVX512DQ = 0x000800, + AVX512IFMA = 0x001000, + AVX512PF = 0x002000, + AVX512ER = 0x004000, + AVX512CD = 0x008000, + AVX512BW = 0x010000, + AVX512VL = 0x020000, + AVX512VBMI = 0x040000, + AVX512VBMI2 = 0x080000, + AVX512VNNI = 0x100000, + AVX512BITALG = 0x200000, + AVX512VPOPCNTDQ = 0x400000, + + SIMD_ALL = std::numeric_limits::max(), +}; + +static bool operator!(RGY_SIMD e) { + return e == static_cast(0); +} + +static RGY_SIMD operator|(RGY_SIMD a, RGY_SIMD b) { + return (RGY_SIMD)((uint64_t)a | (uint64_t)b); +} + +static RGY_SIMD operator|=(RGY_SIMD &a, RGY_SIMD b) { + a = a | b; + return a; +} + +static RGY_SIMD operator&(RGY_SIMD a, RGY_SIMD b) { + return (RGY_SIMD)((uint64_t)a & (uint64_t)b); +} + +static RGY_SIMD operator&=(RGY_SIMD &a, RGY_SIMD b) { + a = a & b; + return a; +} + +RGY_SIMD get_availableSIMD(); + +#endif //__RGY_SIMD_H__ diff --git a/auoCommon/rgy_tchar.h b/auoCommon/rgy_tchar.h new file mode 100644 index 0000000..5b578e9 --- /dev/null +++ b/auoCommon/rgy_tchar.h @@ -0,0 +1,95 @@ +// ----------------------------------------------------------------------------------------- +// QSVEnc/NVEnc by rigaya +// ----------------------------------------------------------------------------------------- +// The MIT License +// +// Copyright (c) 2011-2016 rigaya +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. +// +// -------------------------------------------------------------------------------------------- + +#pragma once +#ifndef __RGY_TCHAR_H__ +#define __RGY_TCHAR_H__ + +#if defined(_WIN32) || defined(_WIN64) +#define WIN32_LEAN_AND_MEAN +#define NOMINMAX +#include +#else +#include +#include +#include + +typedef char TCHAR; +#define _T(x) x +#define _tmain main +#define _tcslen strlen +#define _ftprintf fprintf +#define _stscanf_s sscanf +#define _stscanf sscanf +#define _tcscmp strcmp +#define _tcsnccmp strncmp +#define _tcsicmp strcasecmp +#define _tcschr strchr +#define _tcsrchr strrchr +#define _tcsstr strstr +#define _tcscat_s strcat_s +#define _tcstol strtol +#define _tcsdup strdup +#define _tfopen fopen +#define _tfopen_s fopen_s +#define _stprintf_s sprintf_s +#define _vsctprintf _vscprintf +#define _vstprintf_s _vsprintf_s +#define _tcstok_s strtok_s +#define _tcserror strerror +#define _fgetts fgets +#define _tcscpy strcpy +#define _tcsncpy strncpy +#define _tremove remove +#define _trename rename +#define _istalpha isalpha +#define _tcsftime strftime + +#define _SH_DENYRW 0x10 // deny read/write mode +#define _SH_DENYWR 0x20 // deny write mode +#define _SH_DENYRD 0x30 // deny read mode +#define _SH_DENYNO 0x40 // deny none mode +#define _SH_SECURE 0x80 // secure mode + +static inline FILE *_tfsopen(const TCHAR *filename, const TCHAR *mode, int shflag) { + return fopen(filename, mode); +} + +static inline char *_tcscpy_s(TCHAR *dst, const TCHAR *src) { + return strcpy(dst, src); +} + +static inline char *_tcscpy_s(TCHAR *dst, size_t size, const TCHAR *src) { + return strcpy(dst, src); +} +#endif //#if defined(_WIN32) || defined(_WIN64) + +#include + +typedef std::basic_string tstring; + +#endif // __RGY_TCHAR_H__ diff --git a/auoCommon/rgy_thread_affinity.cpp b/auoCommon/rgy_thread_affinity.cpp new file mode 100644 index 0000000..44ad6b7 --- /dev/null +++ b/auoCommon/rgy_thread_affinity.cpp @@ -0,0 +1,636 @@ +// ----------------------------------------------------------------------------------------- +// QSVEnc/NVEnc by rigaya +// ----------------------------------------------------------------------------------------- +// The MIT License +// +// Copyright (c) 2021 rigaya +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. +// +// -------------------------------------------------------------------------------------------- + +#include +#include +#include "rgy_thread_affinity.h" +#include "rgy_osdep.h" +#if defined(_WIN32) || defined(_WIN64) +#include +#endif //#if defined(_WIN32) || defined(_WIN64) +#include "cpu_info.h" + +const TCHAR* rgy_thread_priority_mode_to_str(RGYThreadPriority mode) { + for (const auto& p : RGY_THREAD_PRIORITY_STR) { + if (p.first == mode) return p.second; + } + return nullptr; +} +RGYThreadPriority rgy_str_to_thread_priority_mode(const TCHAR* str) { + tstring target(str); + for (const auto& p : RGY_THREAD_PRIORITY_STR) { + if (target == p.second) return p.first; + } + return RGYThreadPriority::Unknwon; +} + +const TCHAR* rgy_thread_power_throttoling_mode_to_str(RGYThreadPowerThrottlingMode mode) { + for (const auto& p : RGY_THREAD_POWER_THROTTOLING_MODE_STR) { + if (p.first == mode) return p.second; + } + return nullptr; +} + +RGYThreadPowerThrottlingMode rgy_str_to_thread_power_throttoling_mode(const TCHAR* str) { + tstring target(str); + for (const auto& p : RGY_THREAD_POWER_THROTTOLING_MODE_STR) { + if (target == p.second) return p.first; + } + return RGYThreadPowerThrottlingMode::END; +} + +RGYThreadAffinity::RGYThreadAffinity() : mode(), custom(std::numeric_limits::max()) {}; + +RGYThreadAffinity::RGYThreadAffinity(RGYThreadAffinityMode affinityMode) : mode(affinityMode), custom(std::numeric_limits::max()) {}; + +RGYThreadAffinity::RGYThreadAffinity(RGYThreadAffinityMode m, uint64_t customAffinity) : mode(m), custom(customAffinity) {}; + +tstring RGYThreadAffinity::to_string() const { + if (mode == RGYThreadAffinityMode::CUSTOM) { + TCHAR buf[64]; + _stprintf_s(buf, _T("0x%llx"), custom); + return buf; + } + auto modeStr = rgy_thread_affnity_mode_to_str(mode); + if ( mode == RGYThreadAffinityMode::LOGICAL + || mode == RGYThreadAffinityMode::PHYSICAL + || mode == RGYThreadAffinityMode::CACHEL2 + || mode == RGYThreadAffinityMode::CACHEL3 + ) { + const auto cpu_info = get_cpu_info(); + int targetCount = 0; + if (mode == RGYThreadAffinityMode::LOGICAL) { + targetCount = cpu_info.logical_cores; + } else if (mode == RGYThreadAffinityMode::PHYSICAL) { + targetCount = cpu_info.physical_cores; + } else if (mode == RGYThreadAffinityMode::CACHEL2) { + targetCount = cpu_info.cache_count[1]; + } else if (mode == RGYThreadAffinityMode::CACHEL3) { + targetCount = cpu_info.cache_count[2]; + } + std::basic_stringstream tmp; + for (int id = 0; id < targetCount; id++) { + const auto target = 1llu << id; + if (target & custom) { + tmp << _T(":") << id; + } + } + if (!tmp.str().empty()) { + return modeStr + tstring(_T("#")) + tmp.str().substr(1); + } else { + return modeStr; + } + } + return modeStr; +} + +const TCHAR *rgy_thread_affnity_mode_to_str(RGYThreadAffinityMode mode) { + for (const auto& p : RGY_THREAD_AFFINITY_MODE_STR) { + if (p.second == mode) return p.first; + } + return nullptr; +} + +RGYThreadAffinityMode rgy_str_to_thread_affnity_mode(const TCHAR *str) { + tstring target(str); + for (const auto& p : RGY_THREAD_AFFINITY_MODE_STR) { + if (target == p.first) return p.second; + } + return RGYThreadAffinityMode::END; +} + +bool RGYThreadAffinity::operator==(const RGYThreadAffinity &x) const { + return mode == x.mode + && custom == x.custom; +} +bool RGYThreadAffinity::operator!=(const RGYThreadAffinity &x) const { + return !(*this == x); +} + +uint64_t RGYThreadAffinity::getMask(int idx) const { + return selectMaskFromLowerBit(getMask(), idx); +} + +uint64_t RGYThreadAffinity::getMask() const { + uint64_t mask = 0; + const auto cpu_info = get_cpu_info(); + switch (mode) { + case RGYThreadAffinityMode::PCORE: + case RGYThreadAffinityMode::ECORE: { + auto maskSelected = cpu_info.maskSystem; + if (mode == RGYThreadAffinityMode::PCORE && cpu_info.maskCoreP) maskSelected = cpu_info.maskCoreP; + if (mode == RGYThreadAffinityMode::ECORE && cpu_info.maskCoreE) maskSelected = cpu_info.maskCoreE; + int targetCore = 0; + for (int i = 0; i < cpu_info.physical_cores; i++) { + const auto target_i = get_mask(&cpu_info, RGYUnitType::Core, (int)RGYCoreType::Physical, i); + if (maskSelected & target_i) { // PCoreであるか? + const auto target_core_mask = 1llu << targetCore; + if (target_core_mask & custom) { // customで指定のコアであるか? + mask |= target_i; + } + targetCore++; + } + } + } break; + case RGYThreadAffinityMode::LOGICAL: + for (int i = 0; i < cpu_info.logical_cores; i++) { + const auto target = 1llu << i; + if (target & custom) { + mask |= get_mask(&cpu_info, RGYUnitType::Core, (int)RGYCoreType::Logical, i); + } + } + break; + case RGYThreadAffinityMode::PHYSICAL: + for (int i = 0; i < cpu_info.physical_cores; i++) { + const auto target = 1llu << i; + if (target & custom) { + mask |= get_mask(&cpu_info, RGYUnitType::Core, (int)RGYCoreType::Physical, i); + } + } + break; + case RGYThreadAffinityMode::CACHEL2: + for (int i = 0; i < cpu_info.cache_count[1]; i++) { + const auto target = 1llu << i; + if (target & custom) { + mask |= get_mask(&cpu_info, RGYUnitType::Cache, (int)RGYCacheLevel::L2, i); + } + } + break; + case RGYThreadAffinityMode::CACHEL3: + for (int i = 0; i < cpu_info.cache_count[2]; i++) { + const auto target = 1llu << i; + if (target & custom) { + mask |= get_mask(&cpu_info, RGYUnitType::Cache, (int)RGYCacheLevel::L3, i); + } + } + break; + case RGYThreadAffinityMode::CUSTOM: mask = (custom) ? custom & cpu_info.maskSystem : cpu_info.maskSystem; break; + case RGYThreadAffinityMode::ALL: + default: mask = cpu_info.maskSystem; break; + } + return (mask) ? mask : std::numeric_limits::max(); +} + +RGYParamThread::RGYParamThread() : + affinity(), + priority(RGYThreadPriority::Normal), + throttling(RGYThreadPowerThrottlingMode::Unset) { + +} + +uint32_t RGYParamThread::getPriorityCalss() { +#if defined(_WIN32) || defined(_WIN64) + static const std::array, RGY_THREAD_PRIORITY_STR.size()> RGY_THREAD_PRIORITY_CLASS = { + std::pair{ RGYThreadPriority::BackgroundBeign, PROCESS_MODE_BACKGROUND_BEGIN}, + std::pair{ RGYThreadPriority::Idle, IDLE_PRIORITY_CLASS}, + std::pair{ RGYThreadPriority::Lowest, IDLE_PRIORITY_CLASS}, + std::pair{ RGYThreadPriority::BelowNormal, BELOW_NORMAL_PRIORITY_CLASS}, + std::pair{ RGYThreadPriority::Normal, NORMAL_PRIORITY_CLASS}, + std::pair{ RGYThreadPriority::AboveNormal, ABOVE_NORMAL_PRIORITY_CLASS}, + std::pair{ RGYThreadPriority::Highest, HIGH_PRIORITY_CLASS} + }; + for (const auto& p : RGY_THREAD_PRIORITY_CLASS) { + if (p.first == priority) return p.second; + } +#endif //#if defined(_WIN32) || defined(_WIN64) + return 0u; +} + +tstring RGYParamThread::to_string(RGYParamThreadType type) const { + switch (type) { + case RGYParamThreadType::affinity: return affinity.to_string(); + case RGYParamThreadType::priority: return rgy_thread_priority_mode_to_str(priority); + case RGYParamThreadType::throttling: return rgy_thread_power_throttoling_mode_to_str(throttling); + case RGYParamThreadType::all: + default: { + tstring str = _T("affinity="); + str += affinity.to_string(); + str += _T(",priority="); + str += rgy_thread_priority_mode_to_str(priority); + str += _T(",throttling="); + str += rgy_thread_power_throttoling_mode_to_str(throttling); + return str; + } + } +} + +tstring RGYParamThread::desc() const { + tstring str; + str += affinity.to_string(); + str += _T(" (0x"); + TCHAR buf[64]; + _stprintf_s(buf, _T("0x%llx"), affinity.getMask()); + str += buf; + str += _T("), priority="); + str += rgy_thread_priority_mode_to_str(priority); + str += _T(", throttling="); + str += rgy_thread_power_throttoling_mode_to_str(throttling); + return str; +} + +bool RGYParamThread::operator==(const RGYParamThread& x) const { + return affinity == x.affinity + && priority == x.priority + && throttling == x.throttling; +} +bool RGYParamThread::operator!=(const RGYParamThread& x) const { + return !(*this == x); +} + +void RGYParamThread::set(RGYThreadAffinity affinity_, RGYThreadPriority priority_, RGYThreadPowerThrottlingMode throttling_) { + affinity = affinity_; + priority = priority_; + throttling = throttling_; +} + +bool RGYParamThread::apply(RGYThreadHandle threadHandle) const { + bool ret = true; + if (affinity.mode != RGYThreadAffinityMode::ALL) { + SetThreadAffinityMask(threadHandle, affinity.getMask()); + } +#if defined(_WIN32) || defined(_WIN64) + if (priority != RGYThreadPriority::Normal) { + ret &= !!SetThreadPriority(threadHandle, (int)priority); + } + if (throttling != RGYThreadPowerThrottlingMode::Auto) { + ret &= SetThreadPowerThrottolingMode(threadHandle, throttling); + } +#endif //#if defined(_WIN32) || defined(_WIN64) + return ret; +} + +RGYParamThreads::RGYParamThreads() : + process(), + main(), + dec(), + enc(), + csp(), + input(), + output(), + audio(), + perfmonitor(), + videoquality() { + perfmonitor.priority = RGYThreadPriority::BackgroundBeign; + perfmonitor.throttling = RGYThreadPowerThrottlingMode::Enabled; + // そのほかはAutoにする + for (int i = (int)RGYThreadType::ALL + 1; i < (int)RGYThreadType::END; i++) { + const auto targetType = (RGYThreadType)i; + //DEC,ENC,OUTPUT,VIDEO_QUALITYは実行時に決める + if ( targetType != RGYThreadType::DEC + && targetType != RGYThreadType::ENC + && targetType != RGYThreadType::OUTUT + && targetType != RGYThreadType::VIDEO_QUALITY + && targetType != RGYThreadType::PERF_MONITOR) { + get(targetType).throttling = RGYThreadPowerThrottlingMode::Auto; + } + } +} + +const TCHAR *rgy_thread_type_to_str(RGYThreadType type) { + for (const auto& p : RGY_THREAD_TYPE_STR) { + if (p.first == type) return p.second; + } + return nullptr; +} + +RGYParamThread& RGYParamThreads::get(RGYThreadType type) { + switch (type) { + case RGYThreadType::MAIN: return main; + case RGYThreadType::DEC: return dec; + case RGYThreadType::ENC: return enc; + case RGYThreadType::CSP: return csp; + case RGYThreadType::INPUT: return input; + case RGYThreadType::OUTUT: return output; + case RGYThreadType::AUDIO: return audio; + case RGYThreadType::PERF_MONITOR: return perfmonitor; + case RGYThreadType::VIDEO_QUALITY: return videoquality; + case RGYThreadType::PROCESS: return process; + case RGYThreadType::ALL: + default: return process; + } +} + +const RGYParamThread& RGYParamThreads::get(RGYThreadType type) const { + switch (type) { + case RGYThreadType::MAIN: return main; + case RGYThreadType::DEC: return dec; + case RGYThreadType::ENC: return enc; + case RGYThreadType::CSP: return csp; + case RGYThreadType::INPUT: return input; + case RGYThreadType::OUTUT: return output; + case RGYThreadType::AUDIO: return audio; + case RGYThreadType::PERF_MONITOR: return perfmonitor; + case RGYThreadType::VIDEO_QUALITY: return videoquality; + case RGYThreadType::PROCESS: return process; + case RGYThreadType::ALL: + default: return process; + } +} + +void RGYParamThreads::set(const RGYThreadAffinity affinity, RGYThreadType type) { + if (type == RGYThreadType::ALL) { + for (int i = (int)RGYThreadType::ALL + 1; i < (int)RGYThreadType::END; i++) { + get((RGYThreadType)i).affinity = affinity; + } + } else { + get(type).affinity = affinity; + } +} + +void RGYParamThreads::set(const RGYThreadPriority priority, RGYThreadType type) { + if (type == RGYThreadType::ALL) { + for (int i = (int)RGYThreadType::ALL + 1; i < (int)RGYThreadType::END; i++) { + get((RGYThreadType)i).priority = priority; + } + } else { + get(type).priority = priority; + } +} + +void RGYParamThreads::set(const RGYThreadPowerThrottlingMode mode, RGYThreadType type) { + if (type == RGYThreadType::ALL) { + for (int i = (int)RGYThreadType::ALL + 1; i < (int)RGYThreadType::END; i++) { + get((RGYThreadType)i).throttling = mode; + } + } else { + get(type).throttling = mode; + } +} + +tstring RGYParamThreads::to_string(RGYParamThreadType type) const { + std::basic_stringstream tmp; +#define RGY_THREAD_AFF_ADD_TYPE(TYPE, VAR) { tmp << _T(",") << rgy_thread_type_to_str(TYPE) << _T("=") << VAR.to_string(type); } + RGY_THREAD_AFF_ADD_TYPE(RGYThreadType::PROCESS, process); + RGY_THREAD_AFF_ADD_TYPE(RGYThreadType::MAIN, main); + RGY_THREAD_AFF_ADD_TYPE(RGYThreadType::DEC, dec); + RGY_THREAD_AFF_ADD_TYPE(RGYThreadType::ENC, enc); + RGY_THREAD_AFF_ADD_TYPE(RGYThreadType::INPUT, input); + RGY_THREAD_AFF_ADD_TYPE(RGYThreadType::OUTUT, output); + RGY_THREAD_AFF_ADD_TYPE(RGYThreadType::AUDIO, audio); + RGY_THREAD_AFF_ADD_TYPE(RGYThreadType::PERF_MONITOR, perfmonitor); + RGY_THREAD_AFF_ADD_TYPE(RGYThreadType::VIDEO_QUALITY, videoquality); +#undef LOG_LEVEL_ADD_TYPE + return tmp.str().substr(1); +} + +bool RGYParamThreads::operator==(const RGYParamThreads&x) const { + return process == x.process + && main == x.main + && dec == x.dec + && enc == x.enc + && csp == x.csp + && input == x.input + && output == x.output + && audio == x.audio + && perfmonitor == x.perfmonitor + && videoquality == x.videoquality; +} +bool RGYParamThreads::operator!=(const RGYParamThreads&x) const { + return !(*this == x); +} + +#pragma warning(push) +#pragma warning(disable: 4146) //warning C4146: 符号付きの値を代入する変数は、符号付き型にキャストしなければなりません。 +uint64_t selectMaskFromLowerBit(uint64_t mask, const int idx) { + int count = 0; + uint64_t ret = 0; + do { + mask &= (~ret); + ret = (uint64_t)(mask & (-mask)); // select lowest bit + count++; + } while (count <= idx); + return ret; +} +#pragma warning(pop) + +#if defined(_WIN32) || defined(_WIN64) +static inline bool check_ptr_range(void *value, void *min, void *max) { + return (min <= value && value <= max); +} + +static const int ThreadQuerySetWin32StartAddress = 9; +typedef int (WINAPI* typeNtQueryInformationThread)(HANDLE, int, PVOID, ULONG, PULONG); + +static void* GetThreadBeginAddress(const uint32_t TargetProcessId) { + HMODULE hNtDll = NULL; + typeNtQueryInformationThread NtQueryInformationThread = NULL; + HANDLE hThread = NULL; + ULONG length = 0; + void* BeginAddress = NULL; + + if ( NULL != (hNtDll = LoadLibrary(_T("ntdll.dll"))) + && NULL != (NtQueryInformationThread = (typeNtQueryInformationThread)GetProcAddress(hNtDll, "NtQueryInformationThread")) + && NULL != (hThread = OpenThread(THREAD_QUERY_INFORMATION, FALSE, TargetProcessId))) { + NtQueryInformationThread(hThread, ThreadQuerySetWin32StartAddress, &BeginAddress, sizeof(BeginAddress), &length); + } + if (hNtDll) + FreeLibrary(hNtDll); + if (hThread) + CloseHandle(hThread); + return BeginAddress; +} + +static inline std::vector GetThreadList(const uint32_t TargetProcessId) { + std::vector ThreadList; + HANDLE hSnapshot; + + if (INVALID_HANDLE_VALUE != (hSnapshot = CreateToolhelp32Snapshot(TH32CS_SNAPTHREAD, 0x00))) { + THREADENTRY32 te32 = { 0 }; + te32.dwSize = sizeof(THREADENTRY32); + + if (Thread32First(hSnapshot, &te32)) { + do { + if (te32.th32OwnerProcessID == TargetProcessId) + ThreadList.push_back(te32.th32ThreadID); + } while (Thread32Next(hSnapshot, &te32)); + } + CloseHandle(hSnapshot); + } + return ThreadList; +} + +static inline std::vector GetModuleList(const uint32_t TargetProcessId) { + std::vector ModuleList; + HANDLE hSnapshot; + + if (INVALID_HANDLE_VALUE != (hSnapshot = CreateToolhelp32Snapshot(TH32CS_SNAPMODULE, TargetProcessId))) { + MODULEENTRY32 me32 = { 0 }; + me32.dwSize = sizeof(MODULEENTRY32); + + if (Module32First(hSnapshot, &me32)) { + do { + ModuleList.push_back(me32); + } while (Module32Next(hSnapshot, &me32)); + } + CloseHandle(hSnapshot); + } + return ModuleList; +} + +static bool SetThreadPriorityFromThreadId(const uint32_t TargetThreadId, const RGYThreadPriority ThreadPriority) { + HANDLE hThread = OpenThread(THREAD_SET_INFORMATION, FALSE, TargetThreadId); + if (hThread == NULL) + return FALSE; + BOOL ret = SetThreadPriority(hThread, (int)ThreadPriority); + CloseHandle(hThread); + return ret != 0; +} + +bool SetThreadPriorityForModule(const uint32_t TargetProcessId, const TCHAR *TargetModule, const RGYThreadPriority ThreadPriority) { + bool ret = true; + const auto thread_list = GetThreadList(TargetProcessId); + const auto module_list = GetModuleList(TargetProcessId); + for (const auto thread_id : thread_list) { + void* thread_address = GetThreadBeginAddress(thread_id); + if (!thread_address) { + ret = FALSE; + } else if (TargetModule == nullptr) { + ret &= !!SetThreadPriorityFromThreadId(thread_id, ThreadPriority); + } else { + for (const auto& i_module : module_list) { + if (check_ptr_range(thread_address, i_module.modBaseAddr, i_module.modBaseAddr + i_module.modBaseSize - 1) + && (TargetModule == nullptr || _tcsncicmp(TargetModule, i_module.szModule, _tcslen(TargetModule)) == 0)) { + ret &= !!SetThreadPriorityFromThreadId(thread_id, ThreadPriority); + break; + } + } + } + } + return ret; +} + +static bool SetThreadAffinityFromThreadId(const uint32_t TargetThreadId, const uint64_t ThreadAffinityMask) { + HANDLE hThread = OpenThread(THREAD_ALL_ACCESS, FALSE, TargetThreadId); + if (hThread == NULL) + return FALSE; + auto ret = SetThreadAffinityMask(hThread, ThreadAffinityMask); + CloseHandle(hThread); + return (ret != 0); +} + +bool SetThreadAffinityForModule(const uint32_t TargetProcessId, const TCHAR *TargetModule, const uint64_t ThreadAffinityMask) { + bool ret = TRUE; + const auto thread_list = GetThreadList(TargetProcessId); + const auto module_list = GetModuleList(TargetProcessId); + for (const auto thread_id : thread_list) { + void* thread_address = GetThreadBeginAddress(thread_id); + if (!thread_address) { + ret = FALSE; + } else if (TargetModule == nullptr) { + ret &= !!SetThreadAffinityFromThreadId(thread_id, ThreadAffinityMask); + } else { + for (const auto& i_module : module_list) { + if (check_ptr_range(thread_address, i_module.modBaseAddr, i_module.modBaseAddr + i_module.modBaseSize - 1) + && (TargetModule == nullptr || _tcsncicmp(TargetModule, i_module.szModule, _tcslen(TargetModule)) == 0)) { + ret &= !!SetThreadAffinityFromThreadId(thread_id, ThreadAffinityMask); + break; + } + } + } + } + return ret; +} + +bool SetThreadPowerThrottolingMode(RGYThreadHandle threadHandle, const RGYThreadPowerThrottlingMode mode) { + THREAD_POWER_THROTTLING_STATE throttlingState; + RtlZeroMemory(&throttlingState, sizeof(throttlingState)); + throttlingState.Version = THREAD_POWER_THROTTLING_CURRENT_VERSION; + + switch (mode) { + case RGYThreadPowerThrottlingMode::Enabled: + throttlingState.ControlMask = THREAD_POWER_THROTTLING_EXECUTION_SPEED; + throttlingState.StateMask = THREAD_POWER_THROTTLING_EXECUTION_SPEED; + break; + case RGYThreadPowerThrottlingMode::Disabled: + throttlingState.ControlMask = THREAD_POWER_THROTTLING_EXECUTION_SPEED; + throttlingState.StateMask = 0; + break; + case RGYThreadPowerThrottlingMode::Unset: + case RGYThreadPowerThrottlingMode::Auto: + default: + throttlingState.ControlMask = 0; + throttlingState.StateMask = 0; + break; + } + HMODULE hDll = NULL; + decltype(SetThreadInformation)* ptrSetThreadInformation = nullptr; + + bool ret = false; + if ((hDll = LoadLibrary(_T("kernel32.dll"))) != NULL + && (ptrSetThreadInformation = (decltype(SetThreadInformation)*)GetProcAddress(hDll, "SetThreadInformation")) != NULL) { + ret = ptrSetThreadInformation(threadHandle, ThreadPowerThrottling, &throttlingState, sizeof(throttlingState)); + } + if (hDll) { + FreeLibrary(hDll); + } + return ret; +} + +bool SetThreadPowerThrottolingModeForModule(const uint32_t TargetProcessId, const TCHAR *TargetModule, const RGYThreadPowerThrottlingMode mode) { + bool ret = TRUE; + const auto thread_list = GetThreadList(TargetProcessId); + const auto module_list = GetModuleList(TargetProcessId); + for (const auto thread_id : thread_list) { + void* thread_address = GetThreadBeginAddress(thread_id); + if (!thread_address) { + ret = FALSE; + } else if (TargetModule == nullptr) { + HANDLE hThread = OpenThread(THREAD_ALL_ACCESS, FALSE, thread_id); + if (hThread) { + ret &= !!SetThreadPowerThrottolingMode(hThread, mode); + CloseHandle(hThread); + } + } else { + for (const auto& i_module : module_list) { + if (check_ptr_range(thread_address, i_module.modBaseAddr, i_module.modBaseAddr + i_module.modBaseSize - 1) + && (TargetModule == nullptr || _tcsncicmp(TargetModule, i_module.szModule, _tcslen(TargetModule)) == 0)) { + HANDLE hThread = OpenThread(THREAD_ALL_ACCESS, FALSE, thread_id); + if (hThread) { + ret &= !!SetThreadPowerThrottolingMode(hThread, mode); + CloseHandle(hThread); + } + break; + } + } + } + } + return ret; +} +#else +bool SetThreadPriorityForModule(const uint32_t TargetProcessId, const TCHAR* TargetModule, const RGYThreadPriority ThreadPriority) { + return false; +} +bool SetThreadAffinityForModule(const uint32_t TargetProcessId, const TCHAR* TargetModule, const uint64_t ThreadAffinityMask) { + return false; +} +bool SetThreadPowerThrottolingMode(RGYThreadHandle threadHandle, const RGYThreadPowerThrottlingMode mode) { + return false; +} +bool SetThreadPowerThrottolingModeForModule(const uint32_t TargetProcessId, const TCHAR* TargetModule, const RGYThreadPowerThrottlingMode mode) { + return false; +} +#endif // #if defined(_WIN32) || defined(_WIN64) diff --git a/auoCommon/rgy_thread_affinity.h b/auoCommon/rgy_thread_affinity.h new file mode 100644 index 0000000..aba5dd2 --- /dev/null +++ b/auoCommon/rgy_thread_affinity.h @@ -0,0 +1,216 @@ +// ----------------------------------------------------------------------------------------- +// QSVEnc/NVEnc by rigaya +// ----------------------------------------------------------------------------------------- +// The MIT License +// +// Copyright (c) 2021 rigaya +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. +// +// ------------------------------------------------------------------------------------------- + +#pragma once +#ifndef __RGY_THREAD_AFFINITY_H__ +#define __RGY_THREAD_AFFINITY_H__ + +#include +#include +#include +#include "rgy_tchar.h" + +#if defined(_WIN32) || defined(_WIN64) +typedef void* RGYThreadHandle; +#else +#include +typedef pthread_t RGYThreadHandle; +#endif + +enum class RGYThreadPriority : int { + BackgroundBeign = 0x00010000, + BackgroundEnd = 0x00020000, + Idle = -15, + Lowest = -2, + BelowNormal = -1, + Normal = 0, + AboveNormal = 1, + Highest = 2, + TimeCritical = 15, + + Unknwon = 0xffff, +}; + +static const std::array, 7> RGY_THREAD_PRIORITY_STR = { + std::pair{ RGYThreadPriority::BackgroundBeign, _T("background")}, + std::pair{ RGYThreadPriority::Idle, _T("idle")}, + std::pair{ RGYThreadPriority::Lowest, _T("lowest")}, + std::pair{ RGYThreadPriority::BelowNormal, _T("belownormal")}, + std::pair{ RGYThreadPriority::Normal, _T("normal")}, + std::pair{ RGYThreadPriority::AboveNormal, _T("abovenormal")}, + std::pair{ RGYThreadPriority::Highest, _T("highest")} +}; + +const TCHAR* rgy_thread_priority_mode_to_str(RGYThreadPriority mode); +RGYThreadPriority rgy_str_to_thread_priority_mode(const TCHAR* str); + +enum class RGYThreadPowerThrottlingMode { + Unset = -2, + Auto = -1, + Disabled = 0, + Enabled = 1, + + END +}; + +static const std::array, (int)RGYThreadPowerThrottlingMode::END - (int)RGYThreadPowerThrottlingMode::Unset> RGY_THREAD_POWER_THROTTOLING_MODE_STR = { + std::pair{ RGYThreadPowerThrottlingMode::Unset, _T("unset")}, + std::pair{ RGYThreadPowerThrottlingMode::Auto, _T("auto")}, + std::pair{ RGYThreadPowerThrottlingMode::Disabled, _T("off")}, + std::pair{ RGYThreadPowerThrottlingMode::Enabled, _T("on")} +}; + +const TCHAR* rgy_thread_power_throttoling_mode_to_str(RGYThreadPowerThrottlingMode mode); +RGYThreadPowerThrottlingMode rgy_str_to_thread_power_throttoling_mode(const TCHAR* str); + +enum class RGYThreadAffinityMode { + ALL, + PCORE, + ECORE, + LOGICAL, + PHYSICAL, + CACHEL2, + CACHEL3, + CUSTOM, + END +}; + +static const std::array, (int)RGYThreadAffinityMode::END - (int)RGYThreadAffinityMode::ALL> RGY_THREAD_AFFINITY_MODE_STR = { + std::pair{ _T("all"), RGYThreadAffinityMode::ALL }, + std::pair{ _T("pcore"), RGYThreadAffinityMode::PCORE }, + std::pair{ _T("ecore"), RGYThreadAffinityMode::ECORE }, + std::pair{ _T("logical"), RGYThreadAffinityMode::LOGICAL }, + std::pair{ _T("physical"), RGYThreadAffinityMode::PHYSICAL }, + std::pair{ _T("cachel2"), RGYThreadAffinityMode::CACHEL2 }, + std::pair{ _T("cachel3"), RGYThreadAffinityMode::CACHEL3 }, + std::pair{ _T("custom"), RGYThreadAffinityMode::CUSTOM } +}; + +const TCHAR *rgy_thread_affnity_mode_to_str(RGYThreadAffinityMode mode); +RGYThreadAffinityMode rgy_str_to_thread_affnity_mode(const TCHAR *str); + +struct RGYThreadAffinity { + RGYThreadAffinityMode mode; + uint64_t custom; + + RGYThreadAffinity(); + RGYThreadAffinity(RGYThreadAffinityMode m); + RGYThreadAffinity(RGYThreadAffinityMode m, uint64_t customAffinity); + uint64_t getMask() const; + uint64_t getMask(int idx) const; + tstring to_string() const; + bool operator==(const RGYThreadAffinity &x) const; + bool operator!=(const RGYThreadAffinity &x) const; +}; + +uint64_t selectMaskFromLowerBit(uint64_t mask, const int idx); + +enum class RGYThreadType { + ALL, + PROCESS, + MAIN, + DEC, + ENC, + CSP, + INPUT, + OUTUT, + AUDIO, + PERF_MONITOR, + VIDEO_QUALITY, + + END +}; + +static const std::array, (int)RGYThreadType::END - (int)RGYThreadType::ALL> RGY_THREAD_TYPE_STR = { + std::pair{ RGYThreadType::ALL, _T("all")}, + std::pair{ RGYThreadType::PROCESS, _T("process")}, + std::pair{ RGYThreadType::MAIN, _T("main")}, + std::pair{ RGYThreadType::DEC, _T("decoder")}, + std::pair{ RGYThreadType::ENC, _T("encoder")}, + std::pair{ RGYThreadType::CSP, _T("csp")}, + std::pair{ RGYThreadType::INPUT, _T("input")}, + std::pair{ RGYThreadType::OUTUT, _T("output")}, + std::pair{ RGYThreadType::AUDIO, _T("audio")}, + std::pair{ RGYThreadType::PERF_MONITOR, _T("perfmonitor")}, + std::pair{ RGYThreadType::VIDEO_QUALITY, _T("videoquality")} +}; + +const TCHAR *rgy_thread_type_to_str(RGYThreadType type); + +enum class RGYParamThreadType { + all, + affinity, + priority, + throttling, +}; + +struct RGYParamThread { + RGYThreadAffinity affinity; + RGYThreadPriority priority; + RGYThreadPowerThrottlingMode throttling; + + RGYParamThread(); + uint32_t getPriorityCalss(); + tstring to_string(RGYParamThreadType type) const; + tstring desc() const; + void set(RGYThreadAffinity affinity, RGYThreadPriority priority, RGYThreadPowerThrottlingMode throttling); + bool apply(RGYThreadHandle threadHandle) const; + bool operator==(const RGYParamThread& x) const; + bool operator!=(const RGYParamThread& x) const; +}; + +struct RGYParamThreads { + RGYParamThread process; + RGYParamThread main; + RGYParamThread dec; + RGYParamThread enc; + RGYParamThread csp; + RGYParamThread input; + RGYParamThread output; + RGYParamThread audio; + RGYParamThread perfmonitor; + RGYParamThread videoquality; + + RGYParamThreads(); + RGYParamThread& get(RGYThreadType type); + const RGYParamThread& get(RGYThreadType type) const; + void set(const RGYThreadAffinity affinity, RGYThreadType type); + void set(const RGYThreadPriority priority, RGYThreadType type); + void set(const RGYThreadPowerThrottlingMode mode, RGYThreadType type); + void apply_unset(); + tstring to_string(RGYParamThreadType type) const; + bool operator==(const RGYParamThreads&x) const; + bool operator!=(const RGYParamThreads&x) const; +}; + +bool SetThreadPriorityForModule(const uint32_t TargetProcessId, const TCHAR *TargetModule, const RGYThreadPriority ThreadPriority); +bool SetThreadAffinityForModule(const uint32_t TargetProcessId, const TCHAR *TargetModule, const uint64_t ThreadAffinityMask); + +bool SetThreadPowerThrottolingMode(RGYThreadHandle threadHandle, const RGYThreadPowerThrottlingMode mode); +bool SetThreadPowerThrottolingModeForModule(const uint32_t TargetProcessId, const TCHAR* TargetModule, const RGYThreadPowerThrottlingMode mode); + +#endif //__RGY_THREAD_AFFINITY_H__ diff --git a/auoCommon/rgy_util.cpp b/auoCommon/rgy_util.cpp new file mode 100644 index 0000000..5067f52 --- /dev/null +++ b/auoCommon/rgy_util.cpp @@ -0,0 +1,692 @@ +// ----------------------------------------------------------------------------------------- +// QSVEnc/NVEnc by rigaya +// ----------------------------------------------------------------------------------------- +// The MIT License +// +// Copyright (c) 2011-2016 rigaya +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. +// +// ------------------------------------------------------------------------------------------ + +#include "rgy_util.h" +#include "rgy_codepage.h" +#if !(defined(_WIN32) || defined(_WIN64)) +#include +#endif + +#pragma warning (push) +#pragma warning (disable: 4100) +#if defined(_WIN32) || defined(_WIN64) +unsigned int wstring_to_string(const wchar_t *wstr, std::string& str, uint32_t codepage) { + if (wstr == nullptr) { + str = ""; + return 0; + } + uint32_t flags = (codepage == CP_UTF8) ? 0 : WC_NO_BEST_FIT_CHARS; + int multibyte_length = WideCharToMultiByte(codepage, flags, wstr, -1, nullptr, 0, nullptr, nullptr); + std::vector tmp(multibyte_length, 0); + if (0 == WideCharToMultiByte(codepage, flags, wstr, -1, tmp.data(), (int)tmp.size(), nullptr, nullptr)) { + str.clear(); + return 0; + } + str = tmp.data(); + return multibyte_length; +} +#else +unsigned int wstring_to_string(const wchar_t *wstr, std::string& str, uint32_t codepage) { + if (wstr == nullptr) { + str = ""; + return 0; + } + auto codepage_str_to = codepage_str(codepage); + if (codepage_str_to == nullptr) codepage_str_to = "UTF-8"; + auto ic = iconv_open(codepage_str_to, "wchar_t"); //to, from + auto input_len = (wcslen(wstr)+1) * 4; + std::vector buf(input_len, 0); + memcpy(buf.data(), wstr, input_len); + auto output_len = input_len * 8; + std::vector bufout(output_len, 0); + char *outbuf = bufout.data(); + char *input = buf.data(); + iconv(ic, &input, &input_len, &outbuf, &output_len); + iconv_close(ic); + str = bufout.data(); + return output_len; +} +#endif //#if defined(_WIN32) || defined(_WIN64) + +std::string wstring_to_string(const wchar_t *wstr, uint32_t codepage) { + if (wstr == nullptr) { + return ""; + } + std::string str; + wstring_to_string(wstr, str, codepage); + return str; +} + +std::string wstring_to_string(const std::wstring& wstr, uint32_t codepage) { + std::string str; + wstring_to_string(wstr.c_str(), str, codepage); + return str; +} + +unsigned int tchar_to_string(const TCHAR *tstr, std::string& str, uint32_t codepage) { +#if UNICODE + return wstring_to_string(tstr, str, codepage); +#else + str = (tstr) ? std::string(tstr) : ""; + return (unsigned int)str.length(); +#endif +} + +std::string tchar_to_string(const TCHAR *tstr, uint32_t codepage) { + if (tstr == nullptr) { + return ""; + } + std::string str; + tchar_to_string(tstr, str, codepage); + return str; +} + +std::wstring tchar_to_wstring(const tstring& tstr, uint32_t codepage) { +#if UNICODE + return std::wstring(tstr); +#else + return char_to_wstring(tstr, codepage); +#endif +} + +std::wstring tchar_to_wstring(const TCHAR *tstr, uint32_t codepage) { + if (tstr == nullptr) { + return L""; + } + return tchar_to_wstring(tstring(tstr), codepage); +} + +std::string tchar_to_string(const tstring& tstr, uint32_t codepage) { + std::string str; + tchar_to_string(tstr.c_str(), str, codepage); + return str; +} + +unsigned int wstring_to_tstring(const WCHAR *wstr, tstring& tstr, uint32_t codepage) { + if (wstr == nullptr) { + tstr = _T(""); + return 0; + } +#if UNICODE + tstr = std::wstring(wstr); +#else + return wstring_to_string(wstr, tstr, codepage); +#endif + return (unsigned int)tstr.length(); +} + +tstring wstring_to_tstring(const WCHAR *wstr, uint32_t codepage) { + if (wstr == nullptr) { + return _T(""); + } + tstring tstr; + wstring_to_tstring(wstr, tstr, codepage); + return tstr; +} + +tstring wstring_to_tstring(const std::wstring& wstr, uint32_t codepage) { + tstring tstr; + wstring_to_tstring(wstr.c_str(), tstr, codepage); + return tstr; +} + +#if defined(_WIN32) || defined(_WIN64) +unsigned int char_to_wstring(std::wstring& wstr, const char *str, uint32_t codepage) { + if (str == nullptr) { + wstr = L""; + return 0; + } + int widechar_length = MultiByteToWideChar(codepage, 0, str, -1, nullptr, 0); + std::vector tmp(widechar_length, 0); + if (0 == MultiByteToWideChar(codepage, 0, str, -1, tmp.data(), (int)tmp.size())) { + wstr.clear(); + return 0; + } + wstr = tmp.data(); + return widechar_length; +} +unsigned int char_to_string(std::string& dst, uint32_t codepage_to, const char *src, uint32_t codepage_from) { + if (src == nullptr) { + dst = ""; + return 0; + } + if (codepage_to == codepage_from) { + dst = src; + return (unsigned int)dst.length(); + } + std::wstring wstrtemp; + char_to_wstring(wstrtemp, src, codepage_from); + wstring_to_string(wstrtemp.c_str(), dst, codepage_to); + return (unsigned int)dst.length(); +} +#else +unsigned int char_to_wstring(std::wstring& wstr, const char *str, uint32_t codepage) { + if (str == nullptr) { + wstr = L""; + return 0; + } + auto codepage_str_from = codepage_str(codepage); + if (codepage_str_from == nullptr) codepage_str_from = "UTF-8"; + auto ic = iconv_open("wchar_t", codepage_str_from); //to, from + if ((int64_t)ic == -1) { + fprintf(stderr, "iconv_error\n"); + } + auto input_len = strlen(str)+1; + std::vector buf(input_len); + strcpy(buf.data(), str); + auto output_len = (input_len + 1) * 8; + std::vector bufout(output_len, 0); + char *inbuf = buf.data(); + char *outbuf = bufout.data(); + iconv(ic, &inbuf, &input_len, &outbuf, &output_len); + iconv_close(ic); + wstr = std::wstring((WCHAR *)bufout.data()); + return wstr.length(); +} + +unsigned int char_to_string(std::string& dst, uint32_t codepage_to, const char *src, uint32_t codepage_from) { + if (src == nullptr) { + dst = ""; + return 0; + } + auto codepage_str_from = codepage_str(codepage_from); + if (codepage_str_from == nullptr) codepage_str_from = "UTF-8"; + auto codepage_str_to = codepage_str(codepage_to); + if (codepage_str_to == nullptr) codepage_str_to = "UTF-8"; + if (codepage_to == codepage_from + || strcmp(codepage_str_to, codepage_str_from) == 0) { + dst = src; + return dst.length(); + } + auto ic = iconv_open(codepage_str_to, codepage_str_from); //to, from + if ((int64_t)ic == -1) { + fprintf(stderr, "iconv_error\n"); + } + auto input_len = strlen(src)+1; + std::vector buf(input_len); + strcpy(buf.data(), src); + auto output_len = (input_len + 1) * 12; + std::vector bufout(output_len, 0); + char *inbuf = buf.data(); + char *outbuf = bufout.data(); + iconv(ic, &inbuf, &input_len, &outbuf, &output_len); + iconv_close(ic); + dst = std::string(bufout.data()); + return dst.length(); +} +#endif //#if defined(_WIN32) || defined(_WIN64) +std::wstring char_to_wstring(const char *str, uint32_t codepage) { + if (str == nullptr) { + return L""; + } + std::wstring wstr; + char_to_wstring(wstr, str, codepage); + return wstr; +} +std::wstring char_to_wstring(const std::string& str, uint32_t codepage) { + std::wstring wstr; + char_to_wstring(wstr, str.c_str(), codepage); + return wstr; +} +std::string char_to_string(uint32_t codepage_to, const char *src, uint32_t codepage_from) { + std::string dst; + char_to_string(dst, codepage_to, src, codepage_from); + return dst; +} + +unsigned int char_to_tstring(tstring& tstr, const char *str, uint32_t codepage) { +#if UNICODE + return char_to_wstring(tstr, str, codepage); +#else + tstr = (str) ? std::string(str) : _T(""); + return (unsigned int)tstr.length(); +#endif +} + +tstring char_to_tstring(const char *str, uint32_t codepage) { + if (str == nullptr) { + return _T(""); + } + tstring tstr; + char_to_tstring(tstr, str, codepage); + return tstr; +} +tstring char_to_tstring(const std::string& str, uint32_t codepage) { + tstring tstr; + char_to_tstring(tstr, str.c_str(), codepage); + return tstr; +} +std::string strsprintf(const char* format, ...) { + if (format == nullptr) { + return ""; + } + va_list args; + va_start(args, format); + const size_t len = _vscprintf(format, args) + 1; + + std::vector buffer(len, 0); + vsprintf(buffer.data(), format, args); + va_end(args); + std::string retStr = std::string(buffer.data()); + return retStr; +} +#if defined(_WIN32) || defined(_WIN64) +std::wstring strsprintf(const WCHAR* format, ...) { + if (format == nullptr) { + return L""; + } + va_list args; + va_start(args, format); + const size_t len = _vscwprintf(format, args) + 1; + + std::vector buffer(len, 0); + vswprintf(buffer.data(), buffer.size(), format, args); + va_end(args); + std::wstring retStr = std::wstring(buffer.data()); + return retStr; +} +#endif //#if defined(_WIN32) || defined(_WIN64) + +std::string str_replace(std::string str, const std::string& from, const std::string& to) { + std::string::size_type pos = 0; + while(pos = str.find(from, pos), pos != std::string::npos) { + str.replace(pos, from.length(), to); + pos += to.length(); + } + return str; +} + +#if defined(_WIN32) || defined(_WIN64) +std::wstring str_replace(std::wstring str, const std::wstring& from, const std::wstring& to) { + std::wstring::size_type pos = 0; + while (pos = str.find(from, pos), pos != std::wstring::npos) { + str.replace(pos, from.length(), to); + pos += to.length(); + } + return str; +} +#endif //#if defined(_WIN32) || defined(_WIN64) + +#pragma warning (pop) +#if defined(_WIN32) || defined(_WIN64) +std::vector split(const std::wstring &str, const std::wstring &delim, bool bTrim) { + std::vector res; + size_t current = 0, found, delimlen = delim.size(); + while (std::wstring::npos != (found = str.find(delim, current))) { + auto segment = std::wstring(str, current, found - current); + if (bTrim) { + segment = trim(segment); + } + if (!bTrim || segment.length()) { + res.push_back(segment); + } + current = found + delimlen; + } + auto segment = std::wstring(str, current, str.size() - current); + if (bTrim) { + segment = trim(segment); + } + if (!bTrim || segment.length()) { + res.push_back(std::wstring(segment.c_str())); + } + return res; +} +#endif //#if defined(_WIN32) || defined(_WIN64) + +std::vector split(const std::string &str, const std::string &delim, bool bTrim) { + std::vector res; + size_t current = 0, found, delimlen = delim.size(); + while (std::string::npos != (found = str.find(delim, current))) { + auto segment = std::string(str, current, found - current); + if (bTrim) { + segment = trim(segment); + } + if (!bTrim || segment.length()) { + res.push_back(segment); + } + current = found + delimlen; + } + auto segment = std::string(str, current, str.size() - current); + if (bTrim) { + segment = trim(segment); + } + if (!bTrim || segment.length()) { + res.push_back(std::string(segment.c_str())); + } + return res; +} + +#if defined(_WIN32) || defined(_WIN64) +std::vector sep_cmd(const std::wstring& cmd) { + std::vector args; + int argc = 0; + auto ptr = CommandLineToArgvW(cmd.c_str(), &argc); + for (int i = 0; i < argc; i++) { + args.push_back(ptr[i]); + } + args.push_back(L""); + LocalFree(ptr); + return std::move(args); +} + +std::vector sep_cmd(const std::string& cmd) { + std::vector args; + std::wstring wcmd = char_to_wstring(cmd); + for (const auto &warg : sep_cmd(wcmd)) { + args.push_back(wstring_to_string(warg)); + } + return std::move(args); +} +#endif //#if defined(_WIN32) || defined(_WIN64) + +std::string lstrip(const std::string& string, const char* trim) { + auto result = string; + auto left = string.find_first_not_of(trim); + if (left != std::string::npos) { + result = string.substr(left, 0); + } + return result; +} + +std::string rstrip(const std::string& string, const char* trim) { + auto result = string; + auto right = string.find_last_not_of(trim); + if (right != std::string::npos) { + result = string.substr(0, right); + } + return result; +} + +std::string trim(const std::string& string, const char* trim) { + auto result = string; + auto left = string.find_first_not_of(trim); + if (left != std::string::npos) { + auto right = string.find_last_not_of(trim); + result = string.substr(left, right - left + 1); + } + return result; +} + +std::wstring lstrip(const std::wstring& string, const WCHAR* trim) { + auto result = string; + auto left = string.find_first_not_of(trim); + if (left != std::string::npos) { + result = string.substr(left, 0); + } + return result; +} + +std::wstring rstrip(const std::wstring& string, const WCHAR* trim) { + auto result = string; + auto right = string.find_last_not_of(trim); + if (right != std::string::npos) { + result = string.substr(0, right); + } + return result; +} + +std::wstring trim(const std::wstring& string, const WCHAR* trim) { + auto result = string; + auto left = string.find_first_not_of(trim); + if (left != std::string::npos) { + auto right = string.find_last_not_of(trim); + result = string.substr(left, right - left + 1); + } + return result; +} + +std::string add_indent(const std::string& str, const int indentLength) { + const auto origLength = str.length(); + + std::string indent(indentLength, ' '); + + std::string ret; + ret.reserve(origLength + indentLength * origLength / 16); + + size_t current = 0, found; + while (std::string::npos != (found = str.find("\n", current))) { + auto segment = std::string(str, current, found - current); + ret.append(indent); + ret.append(segment); + ret.append("\n"); + current = found + 1; + } + return ret; +} + +std::wstring add_indent(const std::wstring& str, const int indentLength) { + const auto origLength = str.length(); + + std::wstring indent(indentLength, L' '); + + std::wstring ret; + ret.reserve(origLength + indentLength * origLength / 16); + + size_t current = 0, found; + while (std::wstring::npos != (found = str.find(L"\n", current))) { + auto segment = std::wstring(str, current, found - current); + ret.append(indent); + ret.append(segment); + ret.append(L"\n"); + current = found + 1; + } + return ret; +} + +struct RGYSIPrefix { + char prefix; + bool inverse; + int64_t pow2; + int64_t pow10; + + RGYSIPrefix(char prefix_, bool inverse_, int64_t pow2_, int64_t pow10_) : + prefix(prefix_), inverse(inverse_), pow2(pow2_), pow10(pow10_) {}; +}; + +const auto RGY_SI_PREFIX_LIST = make_array( + RGYSIPrefix{ 'a', true, rgy_pow_int<60,int64_t>(2), rgy_pow_int<18,int64_t>(10) }, + RGYSIPrefix{ 'f', true, rgy_pow_int<50,int64_t>(2), rgy_pow_int<15,int64_t>(10) }, + RGYSIPrefix{ 'p', true, rgy_pow_int<40,int64_t>(2), rgy_pow_int<12,int64_t>(10) }, + RGYSIPrefix{ 'n', true, rgy_pow_int<30,int64_t>(2), rgy_pow_int< 9,int64_t>(10) }, + RGYSIPrefix{ 'u', true, rgy_pow_int<20,int64_t>(2), rgy_pow_int< 6,int64_t>(10) }, + RGYSIPrefix{ 'm', true, rgy_pow_int<10,int64_t>(2), rgy_pow_int< 3,int64_t>(10) }, + RGYSIPrefix{ 'k', false, rgy_pow_int<10,int64_t>(2), rgy_pow_int< 3,int64_t>(10) }, + RGYSIPrefix{ 'K', false, rgy_pow_int<10,int64_t>(2), rgy_pow_int< 3,int64_t>(10) }, + RGYSIPrefix{ 'M', false, rgy_pow_int<20,int64_t>(2), rgy_pow_int< 6,int64_t>(10) }, + RGYSIPrefix{ 'g', false, rgy_pow_int<30,int64_t>(2), rgy_pow_int< 9,int64_t>(10) }, + RGYSIPrefix{ 'G', false, rgy_pow_int<30,int64_t>(2), rgy_pow_int< 9,int64_t>(10) }, + RGYSIPrefix{ 't', false, rgy_pow_int<40,int64_t>(2), rgy_pow_int<12,int64_t>(10) }, + RGYSIPrefix{ 'T', false, rgy_pow_int<40,int64_t>(2), rgy_pow_int<12,int64_t>(10) }, + RGYSIPrefix{ 'P', false, rgy_pow_int<50,int64_t>(2), rgy_pow_int<15,int64_t>(10) }, + RGYSIPrefix{ 'E', false, rgy_pow_int<60,int64_t>(2), rgy_pow_int<18,int64_t>(10) } + ); + +template +static void rgy_apply_si_prefix(T& val, const TCHAR *endptr) { + const auto prefix = tchar_to_string(endptr, CODE_PAGE_UTF8); + if (prefix[0] != '\0') { + auto siprefix = std::find_if(RGY_SI_PREFIX_LIST.begin(), RGY_SI_PREFIX_LIST.end(), [p = prefix[0]](const RGYSIPrefix& si) { return si.prefix == p; }); + if (siprefix != RGY_SI_PREFIX_LIST.end()) { + const bool usepow2 = prefix[1] != 'i'; + if (siprefix->inverse) { + val /= (usepow2) ? siprefix->pow2 : siprefix->pow10; + } else { + val *= (usepow2) ? siprefix->pow2 : siprefix->pow10; + } + } + } +} + +int rgy_parse_num(int& val, const tstring& str) { + val = 0; + try { + size_t idx = 0; + int64_t val64 = std::stoll(str, &idx, 0); + auto endptr = str.c_str() + idx; + rgy_apply_si_prefix(val64, endptr); + if (val64 < std::numeric_limits::min() || std::numeric_limits::max() < val64) { + val = 0; + return 1; + } + val = (int)val64; + } catch (...) { + return 1; + } + return 0; +} + +int rgy_parse_num(int64_t& val, const tstring& str) { + val = 0; + try { + size_t idx = 0; + val = std::stoll(str, &idx, 0); + auto endptr = str.c_str() + idx; + const auto prefix = tchar_to_string(endptr, CODE_PAGE_UTF8); + rgy_apply_si_prefix(val, endptr); + } catch (...) { + return 1; + } + return 0; +} + +int rgy_parse_num(float& val, const tstring& str) { + val = 0; + try { + size_t idx = 0; + double vald = std::stod(str, &idx); + rgy_apply_si_prefix(vald, str.c_str() + idx); + val = (float)vald; + } catch (...) { + return 1; + } + return 0; +} + +int rgy_parse_num(double& val, const tstring& str) { + val = 0; + try { + size_t idx = 0; + val = std::stod(str, &idx); + rgy_apply_si_prefix(val, str.c_str() + idx); + } catch (...) { + return 1; + } + return 0; +} + +tstring rgy_print_num_with_siprefix(const int64_t value) { + const RGYSIPrefix *usePrefix = nullptr; + for (const auto& prefix : RGY_SI_PREFIX_LIST) { + if (!prefix.inverse && value > prefix.pow10) { + usePrefix = &prefix; + } + } + if (usePrefix) { + return strsprintf(_T("%.3f%c"), value / (double)usePrefix->pow10, usePrefix->prefix); + } else { + return strsprintf(_T("%lld"), value); + } +} + +tstring print_time(double time) { + int sec = (int)time; + time -= sec; + int miniute = (int)(sec / 60); + sec -= miniute * 60; + int hour = miniute / 60; + miniute -= hour * 60; + tstring frac = strsprintf(_T("%.3f"), time); + return strsprintf(_T("%d:%02d:%02d%s"), hour, miniute, sec, frac.substr(frac.find_first_of(_T("."))).c_str()); +} + +size_t malloc_degeneracy(void **ptr, size_t nSize, size_t nMinSize) { + *ptr = nullptr; + nMinSize = (std::max)(nMinSize, 1); + nSize = (std::max)(nSize, nMinSize); + //確保できなかったら、サイズを小さくして再度確保を試みる (最終的に1MBも確保できなかったら諦める) + while (nSize >= nMinSize) { + void *qtr = malloc(nSize); + if (qtr != nullptr) { + *ptr = qtr; + return nSize; + } + size_t nNextSize = 0; + for (size_t i = nMinSize; i < nSize; i<<=1) { + nNextSize = i; + } + nSize = nNextSize; + } + return 0; +} + +// convert float to half precision floating point +unsigned short float2half(float value) { + // 1 : 8 : 23 + union { + unsigned int u; + float f; + } tmp; + + tmp.f = value; + + // 1 : 8 : 23 + unsigned short sign = (tmp.u & 0x80000000) >> 31; + unsigned short exponent = (tmp.u & 0x7F800000) >> 23; + unsigned int significand = tmp.u & 0x7FFFFF; + + // fprintf(stderr, "%d %d %d\n", sign, exponent, significand); + + // 1 : 5 : 10 + unsigned short fp16; + if (exponent == 0) { + // zero or denormal, always underflow + fp16 = (sign << 15) | (0x00 << 10) | 0x00; + } else if (exponent == 0xFF) { + // infinity or NaN + fp16 = (sign << 15) | (0x1F << 10) | (significand ? 0x200 : 0x00); + } else { + // normalized + short newexp = exponent + (-127 + 15); + if (newexp >= 31) { + // overflow, return infinity + fp16 = (sign << 15) | (0x1F << 10) | 0x00; + } else if (newexp <= 0) { + // underflow + if (newexp >= -10) { + // denormal half-precision + unsigned short sig = (unsigned short)((significand | 0x800000) >> (14 - newexp)); + fp16 = (sign << 15) | (0x00 << 10) | sig; + } else { + // underflow + fp16 = (sign << 15) | (0x00 << 10) | 0x00; + } + } else { + fp16 = (unsigned short)((sign << 15) | (newexp << 10) | (significand >> 13)); + } + } + return fp16; +} diff --git a/auoCommon/rgy_util.h b/auoCommon/rgy_util.h new file mode 100644 index 0000000..2d5105e --- /dev/null +++ b/auoCommon/rgy_util.h @@ -0,0 +1,1102 @@ +// ----------------------------------------------------------------------------------------- +// QSVEnc/NVEnc by rigaya +// ----------------------------------------------------------------------------------------- +// The MIT License +// +// Copyright (c) 2011-2016 rigaya +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. +// +// -------------------------------------------------------------------------------------------- + +#pragma once +#ifndef __RGY_UTIL_H__ +#define __RGY_UTIL_H__ + +#include "rgy_tchar.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "rgy_osdep.h" + +#ifndef UNREFERENCED_PARAMETER +#define UNREFERENCED_PARAMETER(x) +#endif + +#if defined(_MSC_VER) +#define RGY_DO_PRAGMA(x) +#define RGY_DISABLE_WARNING_PUSH +#define RGY_DISABLE_WARNING_STR(str) +#define RGY_DISABLE_WARNING_POP +#elif defined(__clang__) +#define RGY_DO_PRAGMA_(x) _Pragma (#x) +#define RGY_DO_PRAGMA(x) RGY_DO_PRAGMA_(x) +#define RGY_DISABLE_WARNING_PUSH RGY_DO_PRAGMA(clang diagnostic push) +#define RGY_DISABLE_WARNING_NUM(num) +#define RGY_DISABLE_WARNING_STR(str) RGY_DO_PRAGMA(clang diagnostic ignored str) +#define RGY_DISABLE_WARNING_POP RGY_DO_PRAGMA(clang diagnostic pop) +#elif defined(__GNUC__) +#define RGY_DO_PRAGMA_(x) _Pragma (#x) +#define RGY_DO_PRAGMA(x) RGY_DO_PRAGMA_(x) +#define RGY_DISABLE_WARNING_PUSH RGY_DO_PRAGMA(GCC diagnostic push) +#define RGY_DISABLE_WARNING_NUM(num) +#define RGY_DISABLE_WARNING_STR(str) RGY_DO_PRAGMA(GCC diagnostic ignored str) +#define RGY_DISABLE_WARNING_POP RGY_DO_PRAGMA(GCC diagnostic pop) +#endif + +using std::vector; +using std::unique_ptr; +using std::shared_ptr; + +#ifndef MIN3 +#define MIN3(a,b,c) (min((a), min((b), (c)))) +#endif +#ifndef MAX3 +#define MAX3(a,b,c) (max((a), max((b), (c)))) +#endif + +#ifndef clamp +#define clamp(x, low, high) (((x) <= (high)) ? (((x) >= (low)) ? (x) : (low)) : (high)) +#endif + +#define ALIGN(x,align) (((x)+((align)-1))&(~((align)-1))) +#define ALIGN16(x) (((x)+15)&(~15)) +#define ALIGN32(x) (((x)+31)&(~31)) + +template +static bool rgy_is_pow2(T i) { + static_assert(std::is_integral::value, "rgy_is_pow2 is defined only for integer."); + return (i & (i - 1)) != 0; +} +template +static T rgy_ceil_int(T i, T div) { + static_assert(std::is_integral::value, "rgy_ceil_int is defined only for integer."); + return ((i + div - 1) / div) * div; +} + +#define MAP_PAIR_0_1_PROTO(prefix, name0, type0, name1, type1) \ + type1 prefix ## _ ## name0 ## _to_ ## name1(type0 var0); \ + type0 prefix ## _ ## name1 ## _to_ ## name0(type1 var1); + +#define MAP_PAIR_0_1(prefix, name0, type0, name1, type1, map_pair, default0, default1) \ + RGY_NOINLINE \ + type1 prefix ## _ ## name0 ## _to_ ## name1(type0 var0) {\ + auto ret = std::find_if(map_pair.begin(), map_pair.end(), [var0](std::pair a) { \ + return a.first == var0; \ + }); \ + return (ret == map_pair.end()) ? (default1) : ret->second; \ + } \ + RGY_NOINLINE \ + type0 prefix ## _ ## name1 ## _to_ ## name0(type1 var1) {\ + auto ret = std::find_if(map_pair.begin(), map_pair.end(), [var1](std::pair a) { \ + return a.second == var1; \ + }); \ + return (ret == map_pair.end()) ? (default0) : ret->first; \ + } + +typedef long long lls; +typedef unsigned long long llu; + +#define RGY_MEMSET_ZERO(x) { memset(&(x), 0, sizeof(x)); } + +template +std::vector make_vector(T(&ptr)[size]) { + return std::vector(ptr, ptr + size); +} +template +std::vector make_vector(const T(&ptr)[size]) { + return std::vector(ptr, ptr + size); +} +template +std::vector make_vector(ArgTypes... args) { + return std::vector{ reinterpret_cast(args)... }; +} +template +std::vector make_vector(const T0 *ptr, T1 size) { + static_assert(std::is_integral::value == true, "T1 should be integral"); + return (ptr && size) ? std::vector(ptr, ptr + size) : std::vector(); +} +template +std::vector make_vector(T0 *ptr, T1 size) { + static_assert(std::is_integral::value == true, "T1 should be integral"); + return (ptr && size) ? std::vector(ptr, ptr + size) : std::vector(); +} +template +constexpr std::array make_array(Args&&... args) { + return std::array{ static_cast(args)... }; +} +template +constexpr std::size_t array_size(const std::array&) { + return N; +} +template +constexpr std::size_t array_size(T(&)[N]) { + return N; +} +template +void vector_cat(vector& v1, const vector& v2) { + if (v2.size()) { + v1.insert(v1.end(), v2.begin(), v2.end()); + } +} +template +void vector_cat(std::vector& v1, const T *ptr, size_t nCount) { + if (nCount) { + size_t currentSize = v1.size(); + v1.resize(currentSize + nCount); + memcpy(v1.data() + currentSize, ptr, sizeof(T) * nCount); + } +} +template +void vector_move(vector& v1, vector v2) { + if (v2.size()) { + v1.insert(v1.end(), std::make_move_iterator(v2.begin()), std::make_move_iterator(v2.end())); + } +} +template +static void rgy_free(T& ptr) { + static_assert(std::is_pointer::value == true, "T should be pointer"); + if (ptr) { + free(ptr); + ptr = nullptr; + } +} + +// ------------------------------------------------------- +// RGYArgN<関数引数のインデックス, decltype(関数名)>::type で関数引数の型がとれる +template +struct RGYTypeN + { using type = typename RGYTypeN::type; }; + +template +struct RGYTypeN<0U, T0, Ts...> + { using type = T0; }; + +template +struct RGYArgN; + +template +struct RGYArgN + { using type = typename RGYTypeN::type; }; + +template +struct RGYReturnType; + +template +struct RGYReturnType + { using type = R; }; +// ------------------------------------------------------- + +#pragma warning(push) +#pragma warning(disable: 4127) +template +struct RGYPowerBase { + static T run(T x) { + if (N < 0) { + return RGYPowerBase::run(1 / x); + } else if (N % 2 != 0) { + return x * RGYPowerBase0)?N-1:0)>::run(x); + } else if (N == 0) { + return 1; + } else { + return RGYPowerBase::run(x * x); + } + } +}; + + +template +T rgy_pow_int(T x) { + return RGYPowerBase::run(x); +} + +template +T rgy_pow_int(T x, int n) { + if (n < 0) { + x = T(1) / x; + n = -n; + } + T v = T(1); + for (int i = 0; i < n; i++) { + v *= x; + } + return v; +} +#pragma warning(pop) + +int rgy_parse_num(int& val, const tstring& str); +int rgy_parse_num(int64_t& val, const tstring& str); +int rgy_parse_num(float& val, const tstring& str); +int rgy_parse_num(double& val, const tstring& str); +tstring rgy_print_num_with_siprefix(const int64_t value); + +template +using unique_ptr_custom = std::unique_ptr>; + +struct aligned_malloc_deleter { + void operator()(void* ptr) const { + if (ptr) { + _aligned_free(ptr); + } + } +}; + +struct malloc_deleter { + void operator()(void* ptr) const { + if (ptr) { + free(ptr); + } + } +}; + +struct fp_deleter { + void operator()(FILE* fp) const { + if (fp) { + fflush(fp); + fclose(fp); + } + } +}; + +struct handle_deleter { + void operator()(HANDLE handle) const { + if (handle) { +#if defined(_WIN32) || defined(_WIN64) + CloseHandle(handle); +#endif //#if defined(_WIN32) || defined(_WIN64) + } + } +}; + +struct module_deleter { + void operator()(void *hmodule) const { + if (hmodule) { +#if defined(_WIN32) || defined(_WIN64) + FreeLibrary((HMODULE)hmodule); +#endif //#if defined(_WIN32) || defined(_WIN64) + } + } +}; + +template +static inline T rgy_gcd(T a, T b) { + static_assert(std::is_integral::value, "rgy_gcd is defined only for integer."); + if (a == 0) return b; + if (b == 0) return a; + T c; + while ((c = a % b) != 0) + a = b, b = c; + return b; +} + +template +static inline T rgy_gcd(std::pair int2) { + return rgy_gcd(int2.first, int2.second); +} + +template +static inline T rgy_lcm(T a, T b) { + static_assert(std::is_integral::value, "rgy_lcm is defined only for integer."); + if (a == 0) return 0; + if (b == 0) return 0; + T gcd = rgy_gcd(a, b); + a /= gcd; + b /= gcd; + return a * b * gcd; +} + +template +static inline T rgy_lcm(std::pair int2) { + return rgy_lcm(int2.first, int2.second); +} + +template +static inline void rgy_reduce(T& a, T& b) { + static_assert(std::is_integral::value, "rgy_reduce is defined only for integer."); + if (a == 0 || b == 0) return; + T gcd = rgy_gcd(a, b); + a /= gcd; + b /= gcd; +} + +template +static inline void rgy_reduce(std::pair& int2) { + rgy_reduce(int2.first, int2.second); +} + +template +class rgy_rational { + static_assert(std::is_integral::value, "rgy_rational is defined only for integer."); +private: + T num, den; +public: + rgy_rational() : num(0), den(1) {} + rgy_rational(T _num) : num(_num), den(1) { } + rgy_rational(T _num, T _den) : num(_num), den(_den) { reduce(); } + rgy_rational(const rgy_rational& r) : num(r.num), den(r.den) { reduce(); } + rgy_rational& operator=(const rgy_rational &r) { num = r.num; den = r.den; reduce(); return *this; } + bool is_valid() const { return den != 0; }; + T n() const { + return this->num; + } + T d() const { + return this->den; + } + float qfloat() const { + return (float)qdouble(); + } + double qdouble() const { + return (double)num / (double)den; + } + void reduce() { + if (den == 0) { + return; + } + rgy_reduce(num, den); + if (den < 0) { + num = -num; + den = -den; + } + } + rgy_rational inv() const { + rgy_rational tmp(den, num); + if (tmp.den == 0) { + tmp.den = 0; + tmp.num = 0; + } else if (tmp.den < 0) { + tmp.num = -tmp.num; + tmp.den = -tmp.den; + } + return tmp; + } + + rgy_rational operator+ () { + return *this; + } + rgy_rational operator- () { + return rgy_rational(-1 * this->num, this->den); + } + + rgy_rational& operator+= (const rgy_rational& r) { + if (r.den == 0 || den == 0) { + den = 0; + num = 0; + return *this; + } + + T gcd0 = rgy_gcd(den, r.den); + den /= gcd0; + T tmp = r.den / gcd0; + num = num * tmp + r.num * den; + T gcd1 = rgy_gcd(num, gcd0); + num /= gcd1; + tmp = r.den / gcd1; + den *= tmp; + + return *this; + } + rgy_rational& operator-= (const rgy_rational& r) { + rgy_rational tmp(r); + tmp.num *= -1; + *this += tmp; + return *this; + } + rgy_rational& operator*= (const rgy_rational& r) { + if (r.den == 0 || den == 0) { + den = 0; + num = 0; + return *this; + } + T gcd0 = rgy_gcd(num, r.den); + T gcd1 = rgy_gcd(den, r.num); + T a0 = num / gcd0; + T a1 = r.num / gcd1; + T b0 = den / gcd1; + T b1 = r.den / gcd0; + num = a0 * a1; + den = b0 * b1; + + if (den < 0) { + num = -num; + den = -den; + } + return *this; + } + rgy_rational& operator/= (const rgy_rational& r) { + *this *= r.inv(); + return *this; + } + + rgy_rational& operator+= (const T& i) { + num += i * den; + return *this; + } + rgy_rational& operator-= (const T& i) { + num -= i * den; + return *this; + } + rgy_rational& operator*= (const T& i) { + T gcd = rgy_gcd(i, den); + num *= i / gcd; + den /= gcd; + return *this; + } + rgy_rational& operator/= (const T& i) { + if (i == 0) { + num = 0; + den = 0; + } else if (num != 0) { + T gcd = rgy_gcd(num, i); + num /= gcd; + den *= i / gcd; + if (den < 0) { + num = -num; + den = -den; + } + } + return *this; + } + + template + rgy_rational operator + (const Arg& a) const { + rgy_rational t(*this); + t += a; + return t; + } + template + rgy_rational operator - (const Arg& a) const { + rgy_rational t(*this); + t -= a; + return t; + } + template + rgy_rational operator * (const Arg& a) const { + rgy_rational t(*this); + t *= a; + return t; + } + template + rgy_rational operator / (const Arg& a) const { + rgy_rational t(*this); + t /= a; + return t; + } + const rgy_rational& operator++() { num += den; return *this; } + const rgy_rational& operator--() { num -= den; return *this; } + + bool operator== (const rgy_rational& r) const { + return ((num == r.num) && (den == r.den)); + } + bool operator!= (const rgy_rational& r) const { + return ((num != r.num) || (den != r.den)); + } + + std::string print() const { + std::stringstream ss; + ss << num << "/" << den; + return ss.str(); + } + + std::wstring printw() const { + std::wstringstream ss; + ss << num << "/" << den; + return ss.str(); + } + + tstring printt() const { +#if _UNICODE + return printw(); +#else + return print(); +#endif + } +}; + +static int64_t rgy_change_scale(int64_t t, const rgy_rational& scale_in, const rgy_rational& scale_out) { + rgy_rational a = rgy_rational(scale_in.n(), scale_in.d()); + rgy_rational b = rgy_rational(scale_out.n(), scale_out.d()); + a *= t; + a /= b; + int64_t n = ((a.n() + a.d() / 2) / a.d()); + return n; +} + +template +void atomic_max(std::atomic &maximum_value, T const &value) noexcept { + T prev_value = maximum_value; + while (prev_value < value && + !maximum_value.compare_exchange_weak(prev_value, value)); +} + +#if UNICODE +#define to_tstring to_wstring +#else +#define to_tstring to_string +#endif + +typedef std::basic_stringstream TStringStream; + +#pragma warning (push) +#pragma warning (disable: 4244) +#pragma warning (disable: 4996) +static inline std::string tolowercase(const std::string& str) { + std::string str_copy = str; + std::transform(str_copy.cbegin(), str_copy.cend(), str_copy.begin(), tolower); + return str_copy; +} +static inline std::string touppercase(const std::string &str) { + std::string str_copy = str; + std::transform(str_copy.cbegin(), str_copy.cend(), str_copy.begin(), toupper); + return str_copy; +} +#if defined(_WIN32) || defined(_WIN64) +static inline std::wstring tolowercase(const std::wstring &str) { + auto temp = wcsdup(str.data()); + _wcslwr(temp); + std::wstring str_lo = temp; + free(temp); + return str_lo; +} +static inline std::wstring touppercase(const std::wstring &str) { + auto temp = wcsdup(str.data()); + _wcsupr(temp); + std::wstring str_lo = temp; + free(temp); + return str_lo; +} +#endif //#if defined(_WIN32) || defined(_WIN64) +#pragma warning (pop) + +unsigned int wstring_to_string(const wchar_t *wstr, std::string& str, uint32_t codepage = CP_THREAD_ACP); +std::string wstring_to_string(const wchar_t *wstr, uint32_t codepage = CP_THREAD_ACP); +std::string wstring_to_string(const std::wstring& wstr, uint32_t codepage = CP_THREAD_ACP); +unsigned int char_to_wstring(std::wstring& wstr, const char *str, uint32_t codepage = CP_THREAD_ACP); +std::wstring char_to_wstring(const char *str, uint32_t = CP_THREAD_ACP); +std::wstring char_to_wstring(const std::string& str, uint32_t codepage = CP_THREAD_ACP); +#if defined(_WIN32) || defined(_WIN64) +std::wstring strsprintf(const WCHAR* format, ...); + +std::wstring str_replace(std::wstring str, const std::wstring& from, const std::wstring& to); +tstring getACPCodepageStr(); +#endif //#if defined(_WIN32) || defined(_WIN64) + +std::wstring tchar_to_wstring(const tstring& tstr, uint32_t codepage = CP_THREAD_ACP); +std::wstring tchar_to_wstring(const TCHAR *tstr, uint32_t codepage = CP_THREAD_ACP); +unsigned int tchar_to_string(const TCHAR *tstr, std::string& str, uint32_t codepage = CP_THREAD_ACP); +std::string tchar_to_string(const TCHAR *tstr, uint32_t codepage = CP_THREAD_ACP); +std::string tchar_to_string(const tstring& tstr, uint32_t codepage = CP_THREAD_ACP); +unsigned int char_to_tstring(tstring& tstr, const char *str, uint32_t codepage = CP_THREAD_ACP); +tstring char_to_tstring(const char *str, uint32_t codepage = CP_THREAD_ACP); +tstring char_to_tstring(const std::string& str, uint32_t codepage = CP_THREAD_ACP); +unsigned int wstring_to_tstring(const WCHAR *wstr, tstring& tstr, uint32_t codepage = CP_THREAD_ACP); +tstring wstring_to_tstring(const WCHAR *wstr, uint32_t codepage = CP_THREAD_ACP); +tstring wstring_to_tstring(const std::wstring& wstr, uint32_t codepage = CP_THREAD_ACP); +unsigned int char_to_string(std::string& dst, uint32_t codepage_to, const char *src, uint32_t codepage_from = CP_THREAD_ACP); +std::string char_to_string(uint32_t codepage_to, const char *src, uint32_t codepage_from = CP_THREAD_ACP); + +std::string strsprintf(const char* format, ...); +std::vector split(const std::wstring &str, const std::wstring &delim, bool bTrim = false); +std::vector split(const std::string &str, const std::string &delim, bool bTrim = false); +std::string lstrip(const std::string& string, const char* trim = " \t\v\r\n"); +std::string rstrip(const std::string& string, const char* trim = " \t\v\r\n"); +std::string trim(const std::string& string, const char* trim = " \t\v\r\n"); +std::wstring lstrip(const std::wstring& string, const WCHAR* trim = L" \t\v\r\n"); +std::wstring rstrip(const std::wstring& string, const WCHAR* trim = L" \t\v\r\n"); +std::wstring trim(const std::wstring& string, const WCHAR* trim = L" \t\v\r\n"); +std::string add_indent(const std::string& str, const int indentLength); +std::wstring add_indent(const std::wstring& str, const int indentLength); + +#if defined(_WIN32) || defined(_WIN64) +std::vector sep_cmd(const std::wstring &cmd); +std::vector sep_cmd(const std::string &cmd); +#endif //#if defined(_WIN32) || defined(_WIN64) + +std::string str_replace(std::string str, const std::string& from, const std::string& to); + +tstring print_time(double time); + +static inline uint16_t readUB16(const void *ptr) { + uint16_t i = *(uint16_t *)ptr; + return (i >> 8) | (i << 8); +} + +static inline uint32_t readUB32(const void *ptr) { + uint32_t i = *(uint32_t *)ptr; + return (i >> 24) | ((i & 0xff0000) >> 8) | ((i & 0xff00) << 8) | ((i & 0xff) << 24); +} + +static inline uint32_t check_range_unsigned(uint32_t value, uint32_t min, uint32_t max) { + return (value - min) <= (max - min); +} + +static inline uint32_t popcnt32(uint32_t bits) { + bits = (bits & 0x55555555) + (bits >> 1 & 0x55555555); + bits = (bits & 0x33333333) + (bits >> 2 & 0x33333333); + bits = (bits & 0x0f0f0f0f) + (bits >> 4 & 0x0f0f0f0f); + bits = (bits & 0x00ff00ff) + (bits >> 8 & 0x00ff00ff); + return (bits & 0x0000ffff) + (bits >>16 & 0x0000ffff); +} + +static inline uint32_t popcnt64(uint64_t bits) { + bits = (bits & 0x5555555555555555) + (bits >> 1 & 0x5555555555555555); + bits = (bits & 0x3333333333333333) + (bits >> 2 & 0x3333333333333333); + bits = (bits & 0x0f0f0f0f0f0f0f0f) + (bits >> 4 & 0x0f0f0f0f0f0f0f0f); + bits = (bits & 0x00ff00ff00ff00ff) + (bits >> 8 & 0x00ff00ff00ff00ff); + bits = (bits & 0x0000ffff0000ffff) + (bits >>16 & 0x0000ffff0000ffff); + bits = (bits & 0x00000000ffffffff) + (bits >>32 & 0x00000000ffffffff); + return (uint32_t)bits; +} + +template +static std::basic_string repeatStr(std::basic_string str, int count) { + std::basic_string ret; + for (int i = 0; i < count; i++) { + ret += str; + } + return ret; +} + +static tstring fourccToStr(uint32_t nFourCC) { + tstring fcc; + for (int i = 0; i < 4; i++) { + fcc.push_back((TCHAR)*(i + (char*)&nFourCC)); + } + return fcc; +} + +//確保できなかったら、サイズを小さくして再度確保を試みる (最終的にnMinSizeも確保できなかったら諦める) +size_t malloc_degeneracy(void **ptr, size_t nSize, size_t nMinSize); + +template +class RGYVec3 { +public: + RGYVec3() : v() { + for (int i = 0; i < 3; i++) + v[i] = (T)0.0; + } + RGYVec3(const RGYVec3 &m) { memcpy(&v[0], &m.v[0], sizeof(v)); } + RGYVec3(T a0, T a1, T a2) { + v[0] = a0; + v[1] = a1; + v[2] = a2; + } + RGYVec3 &operator=(const RGYVec3 &m) { memcpy(&v[0], &m.v[0], sizeof(v)); return *this; } + const RGYVec3 &m() const { + return *this; + } + T &operator()(int i) { + return v[i]; + } + const T &operator()(int i) const { + return v[i]; + } + RGYVec3 &operator+= (const RGYVec3 &a) { + for (int i = 0; i < 3; i++) + v[i] += a.v[i]; + return *this; + } + RGYVec3 &operator-= (const RGYVec3 &a) { + for (int i = 0; i < 3; i++) + v[i] -= a.v[i]; + return *this; + } + RGYVec3 &operator*= (const T a) { + for (int i = 0; i < 3; i++) + v[i] *= a; + return *this; + } + RGYVec3 &operator/= (const T a) { + for (int i = 0; i < 3; i++) + v[i] /= a; + return *this; + } + RGYVec3 operator + (const RGYVec3 &a) const { + RGYVec3 t(*this); + t += a; + return t; + } + RGYVec3 operator - (const RGYVec3 &a) const { + RGYVec3 t(*this); + t -= a; + return t; + } + RGYVec3 operator * (const T a) const { + RGYVec3 t(*this); + t *= a; + return t; + } + RGYVec3 operator / (const T a) const { + RGYVec3 t(*this); + t /= a; + return t; + } + RGYVec3 amdal(const RGYVec3 &a) const { + return RGYVec3( + v[0] * a.v[0], + v[1] * a.v[1], + v[2] * a.v[2] + ); + } + T dot(const RGYVec3 &a) const { + return a.v[0] * v[0] + a.v[1] * v[1] + a.v[2] * v[2]; + } + RGYVec3 cross(const RGYVec3 &a) const { + return RGYVec3( + v[1] * a.v[2] - v[2] * a.v[1], + v[2] * a.v[0] - v[0] * a.v[2], + v[0] * a.v[1] - v[1] * a.v[0] + ); + } + RGYVec3 inv() const { + return RGYVec3(1.0f / v[0], 1.0f / v[1], 1.0f / v[2]); + } + bool operator== (const RGYVec3 &r) const { + return memcmp(&v[0], &r.v[0], sizeof(v)) == 0; + } + bool operator!= (const RGYVec3 &r) const { + return memcmp(&v[0], &r.v[0], sizeof(v)) != 0; + } +private: + T v[3]; +}; + +using vec3 = RGYVec3; +using vec3f = RGYVec3; + +class mat3x3 { +public: + mat3x3() : mat() { + for (int j = 0; j < 3; j++) + for (int i = 0; i < 3; i++) + mat[j][i] = 0.0; + } + mat3x3(const vec3 &col0, const vec3 &col1, const vec3 &col2) : mat() { + for (int i = 0; i < 3; i++) { + mat[0][i] = col0(i); + mat[1][i] = col1(i); + mat[2][i] = col2(i); + } + } + mat3x3(const mat3x3 &m) { memcpy(&this->mat[0][0], &m.mat[0][0], sizeof(mat)); } + mat3x3(double a00, double a01, double a02, double a10, double a11, double a12, double a20, double a21, double a22) { + mat[0][0] = a00; + mat[0][1] = a01; + mat[0][2] = a02; + mat[1][0] = a10; + mat[1][1] = a11; + mat[1][2] = a12; + mat[2][0] = a20; + mat[2][1] = a21; + mat[2][2] = a22; + } + mat3x3 &operator=(const mat3x3 &m) { memcpy(&this->mat[0][0], &m.mat[0][0], sizeof(mat)); return *this; } + + const mat3x3 &m() const { + return *this; + } + //(行,列) + double &operator()(int i, int j) { + return mat[i][j]; + } + //(行,列) + const double &operator()(int i, int j) const { + return mat[i][j]; + } + + mat3x3 &operator+= (const mat3x3& a) { + for (int j = 0; j < 3; j++) + for (int i = 0; i < 3; i++) + mat[j][i] += a.mat[j][i]; + return *this; + } + mat3x3 &operator-= (const mat3x3 &a) { + for (int j = 0; j < 3; j++) + for (int i = 0; i < 3; i++) + mat[j][i] -= a.mat[j][i]; + return *this; + } + mat3x3 &operator*= (const double a) { + for (int j = 0; j < 3; j++) + for (int i = 0; i < 3; i++) + mat[j][i] *= a; + return *this; + } + mat3x3 &operator*= (const mat3x3 &r) { + *this = mul(*this, r); + return *this; + } + mat3x3 &operator/= (const double a) { + *this *= (1.0 / a); + return *this; + } + mat3x3 &operator/= (const mat3x3 &r) { + *this = mul(*this, r.inv()); + return *this; + } + + template + mat3x3 operator + (const Arg &a) const { + mat3x3 t(*this); + t += a; + return t; + } + template + mat3x3 operator - (const Arg &a) const { + mat3x3 t(*this); + t -= a; + return t; + } + mat3x3 operator * (const mat3x3 &a) const { + mat3x3 t(*this); + t *= a; + return t; + } + mat3x3 operator * (const double &a) const { + mat3x3 t(*this); + t *= a; + return t; + } + vec3 operator * (const vec3 &a) const { + vec3 v; + for (int j = 0; j < 3; j++) { + double d = 0.0; + for (int i = 0; i < 3; i++) { + d += mat[j][i] * a(i); + } + v(j) = d; + } + return v; + } + template + mat3x3 operator / (const Arg &a) const { + mat3x3 t(*this); + t /= a; + return t; + } + bool operator== (const mat3x3&r) const { + return memcmp(&mat[0][0], &r.mat[0][0], sizeof(mat)) == 0; + } + bool operator!= (const mat3x3& r) const { + return memcmp(&mat[0][0], &r.mat[0][0], sizeof(mat)) != 0; + } + double det() const { + const double determinant = + +mat[0][0]*(mat[1][1]*mat[2][2]-mat[2][1]*mat[1][2]) + -mat[0][1]*(mat[1][0]*mat[2][2]-mat[1][2]*mat[2][0]) + +mat[0][2]*(mat[1][0]*mat[2][1]-mat[1][1]*mat[2][0]); + return determinant; + } + double det2(double a00, double a01, double a10, double a11) const { + return a00 * a11 - a01 * a10; + } + mat3x3 inv() const { + const double invdet = 1.0 / det(); + + mat3x3 ret; + ret.mat[0][0] = det2(mat[1][1], mat[1][2], mat[2][1], mat[2][2]) * invdet; + ret.mat[0][1] = det2(mat[0][2], mat[0][1], mat[2][2], mat[2][1]) * invdet; + ret.mat[0][2] = det2(mat[0][1], mat[0][2], mat[1][1], mat[1][2]) * invdet; + ret.mat[1][0] = det2(mat[1][2], mat[1][0], mat[2][2], mat[2][0]) * invdet; + ret.mat[1][1] = det2(mat[0][0], mat[0][2], mat[2][0], mat[2][2]) * invdet; + ret.mat[1][2] = det2(mat[0][2], mat[0][0], mat[1][2], mat[1][0]) * invdet; + ret.mat[2][0] = det2(mat[1][0], mat[1][1], mat[2][0], mat[2][1]) * invdet; + ret.mat[2][1] = det2(mat[0][1], mat[0][0], mat[2][1], mat[2][0]) * invdet; + ret.mat[2][2] = det2(mat[0][0], mat[0][1], mat[1][0], mat[1][1]) * invdet; + return ret; + } + mat3x3 trans() const { + mat3x3 ret; + for (int j = 0; j < 3; j++) + for (int i = 0; i < 3; i++) + ret.mat[j][i] = mat[i][j]; + return ret; + } + mat3x3 mul(const mat3x3& a, const mat3x3& b) { + mat3x3 ret; + for (int i = 0; i < 3; i++) { + for (int j = 0; j < 3; j++) { + double accum = 0; + for (int k = 0; k < 3; k++) { + accum += a.mat[i][k] * b.mat[k][j]; + } + ret(i,j) = accum; + } + } + return ret; + } + static mat3x3 identity() { + mat3x3 ret; + for (int i = 0; i < 3; i++) { + ret.mat[i][i] = 1.0; + } + return ret; + } +private: + double mat[3][3]; //[行][列] +}; + +struct rgy_time { + int h, m, s, ms, us, ns; + + rgy_time() : h(0), m(0), s(0), ms(0), us(0), ns(0) {}; + rgy_time(double time_sec) : h(0), m(0), s(0), ms(0), us(0), ns(0) { + s = (int)time_sec; + time_sec -= s; + ns = (int)(time_sec * 1e9 + 0.5); + us = ns / 1000; + ns -= us * 1000; + ms = us / 1000; + us -= ms * 1000; + + m = (int)(s / 60); + s -= m * 60; + h = m / 60; + m -= h * 60; + }; + rgy_time(uint32_t millisec) : h(0), m(0), s(0), ms(0), us(0), ns(0) { + s = (int)(millisec / 1000); + ms = (int)(millisec - s * 1000); + m = s / 60; + s -= m * 60; + h = m / 60; + m -= h * 60; + }; + rgy_time(int64_t millisec) : h(0), m(0), s(0), ms(0), us(0), ns(0) { + int64_t sec = millisec / 1000; + ms = (int)(millisec - sec * 1000); + + int64_t min = sec / 60; + s = (int)(sec - min * 60); + + h = (int)(min / 60); + m = (int)(min - h * 60); + } + int64_t in_sec() { + return (((int64_t)h * 60 + m) * 60 + s + (((ms + ((us >= 500) ? 1 : 0)) >= 500) ? 1 : 0)); + }; + int64_t in_ms() { + return (((int64_t)h * 60 + m) * 60 + s) * 1000 + ms + ((us >= 500) ? 1 : 0); + }; + tstring print() { + auto str = strsprintf(_T("%d:%02d:%02d.%3d"), h, m, s, ms); + if (us) { +#if _UNICODE + str += std::to_wstring(us); +#else + str += std::to_string(us); +#endif + } + if (ns) { +#if _UNICODE + str += std::to_wstring(ns); +#else + str += std::to_string(ns); +#endif + } + return str; + }; +}; + +class CombinationGenerator { +public: + CombinationGenerator(int i) : m_nCombination(i) { + + } + void create(vector used) { + if ((int)used.size() == m_nCombination) { + m_nCombinationList.push_back(used); + } + for (int i = 0; i < m_nCombination; i++) { + if (std::find(used.begin(), used.end(), i) == used.end()) { + vector u = used; + u.push_back(i); + create(u); + } + } + } + vector> generate() { + vector used; + create(used); + return m_nCombinationList; + }; + int m_nCombination; + vector> m_nCombinationList; +}; + +template +class RGYListRef { +private: + std::vector> m_objs; + std::unordered_map> m_refCounts; +public: + RGYListRef() : m_objs(), m_refCounts() {}; + ~RGYListRef() { + clear(); + } + void clear() { + m_refCounts.clear(); + m_objs.clear(); + } + std::shared_ptr get(T *ptr) { + if (ptr == nullptr || m_refCounts.count(ptr) == 0) { + return std::shared_ptr(); + } + m_refCounts[ptr]++; + return std::shared_ptr(ptr, [this](T *ptr) { + m_refCounts[ptr]--; + }); + } + std::shared_ptr get(std::function initFunc = nullptr) { + for (auto &count : m_refCounts) { + if (count.second == 0) { + m_refCounts[count.first]++; + return std::shared_ptr(count.first, [this](T *ptr) { + m_refCounts[ptr]--; + }); + } + } + + auto obj = std::make_unique(); + auto ptr = obj.get(); + if (initFunc && initFunc(ptr)) { + return std::shared_ptr(); + } + m_refCounts[ptr] = 1; + m_objs.push_back(std::move(obj)); + return std::shared_ptr(ptr, [this](T *ptr) { + m_refCounts[ptr]--; + }); + } +}; + +unsigned short float2half(float value); + +#endif //__RGY_UTIL_H__ diff --git a/auoCommon/rgy_wav_parser.cpp b/auoCommon/rgy_wav_parser.cpp new file mode 100644 index 0000000..8c4a51b --- /dev/null +++ b/auoCommon/rgy_wav_parser.cpp @@ -0,0 +1,112 @@ +// ----------------------------------------------------------------------------------------- +// QSVEnc/NVEnc by rigaya +// ----------------------------------------------------------------------------------------- +// The MIT License +// +// Copyright (c) 2023 rigaya +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. +// +// -------------------------------------------------------------------------------------------- + +#include +#include "rgy_osdep.h" +#include "rgy_wav_parser.h" + +static inline uint32_t read_u32(const uint8_t* data) { + return data[0] | (data[1] << 8) | (data[2] << 16) | (data[3] << 24); +} + +static inline uint16_t read_u16(const uint8_t* data) { + return data[0] | (data[1] << 8); +} + +uint32_t RGYWAVHeader::parseHeader(const uint8_t *data) { + const uint8_t *data_ptr = data; + + strncpy_s(file_id, (const char *)data_ptr, 4); + data_ptr += 4; + + file_size = read_u32(data_ptr); + data_ptr += 4; + + strncpy_s(format, (const char *)data_ptr, 4); + data_ptr += 4; + + strncpy_s(subchunk_id, (const char *)data_ptr, 4); + data_ptr += 4; + + subchunk_size = read_u32(data_ptr); + data_ptr += 4; + + audio_format = read_u16(data_ptr); + data_ptr += 2; + + number_of_channels = read_u16(data_ptr); + data_ptr += 2; + + sample_rate = read_u32(data_ptr); + data_ptr += 4; + + byte_rate = read_u32(data_ptr); + data_ptr += 4; + + block_align = read_u16(data_ptr); + data_ptr += 2; + + bits_per_sample = read_u16(data_ptr); + data_ptr += 2; + + strncpy_s(data_id, (const char *)data_ptr, 4); + data_ptr += 4; + + data_size = read_u32(data_ptr); + data_ptr += 4; + + return (uint32_t)(data_ptr - data); +} + +std::vector RGYWAVHeader::createHeader() { + std::vector buffer(WAVE_HEADER_SIZE); + auto head = buffer.data(); + + static const char * const RIFF_HEADER = "RIFF"; + static const char * const WAVE_HEADER = "WAVE"; + static const char * const FMT_CHUNK = "fmt "; + static const char * const DATA_CHUNK = "data"; + const int32_t FMT_SIZE = 16; + const int16_t FMT_ID = 1; + const int size = bits_per_sample / 8; + + memcpy(head + 0, RIFF_HEADER, strlen(RIFF_HEADER)); + *(int32_t*)(head + 4) = data_size + WAVE_HEADER_SIZE - 8; + memcpy(head + 8, WAVE_HEADER, strlen(WAVE_HEADER)); + memcpy(head + 12, FMT_CHUNK, strlen(FMT_CHUNK)); + *(int32_t*)(head + 16) = FMT_SIZE; + *(int16_t*)(head + 20) = FMT_ID; + *(int16_t*)(head + 22) = (int16_t)number_of_channels; + *(int32_t*)(head + 24) = sample_rate; + *(int32_t*)(head + 28) = sample_rate * number_of_channels * size; + *(int16_t*)(head + 32) = (int16_t)(size * number_of_channels); + *(int16_t*)(head + 34) = (int16_t)(size * 8); + memcpy(head + 36, DATA_CHUNK, strlen(DATA_CHUNK)); + *(int32_t*)(head + 40) = data_size; + //計44byte(WAVE_HEADER_SIZE) + return buffer; +} diff --git a/auoCommon/rgy_wav_parser.h b/auoCommon/rgy_wav_parser.h new file mode 100644 index 0000000..f0e7660 --- /dev/null +++ b/auoCommon/rgy_wav_parser.h @@ -0,0 +1,55 @@ +// ----------------------------------------------------------------------------------------- +// QSVEnc/NVEnc by rigaya +// ----------------------------------------------------------------------------------------- +// The MIT License +// +// Copyright (c) 2023 rigaya +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. +// +// -------------------------------------------------------------------------------------------- + +#ifndef __RGY_WAV_PARSER_H__ +#define __RGY_WAV_PARSER_H__ + +#include +#include + +static const uint32_t WAVE_HEADER_SIZE = 44; + +struct RGYWAVHeader { + char file_id[5]; // "RIFF" + uint32_t file_size; + char format[5]; // "WAVE" + char subchunk_id[5]; // "fmt " + uint32_t subchunk_size; // 16 for PCM + uint16_t audio_format; // PCM = 1 + uint16_t number_of_channels; + uint32_t sample_rate; + uint32_t byte_rate; // sample_rate * number of channels * bits per sample / 8 + uint16_t block_align; + uint16_t bits_per_sample; + char data_id[5]; //"data" + uint32_t data_size; // samples * number of channels * bits per sample / 8 (Actual number of bytes) + + uint32_t parseHeader(const uint8_t *data); + std::vector createHeader(); +}; + +#endif //__RGY_WAV_PARSER_H__ diff --git a/ffmpegOut.sln b/ffmpegOut.sln index ba541d1..6e44a61 100644 --- a/ffmpegOut.sln +++ b/ffmpegOut.sln @@ -7,20 +7,40 @@ Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "ffmpegOut", "ffmpegOut\ffmp EndProject Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "tinyxml2", "tinyxml2\tinyxml2.vcxproj", "{A34CA86D-6C2B-482F-984E-2687459E65E9}" EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "auoCommon", "auoCommon\auoCommon.vcxproj", "{C7C2269E-87F0-45D4-A5F3-2C6C158D65C5}" +EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution Debug|Win32 = Debug|Win32 + Debug|x64 = Debug|x64 Release|Win32 = Release|Win32 + Release|x64 = Release|x64 EndGlobalSection GlobalSection(ProjectConfigurationPlatforms) = postSolution {3D9A7642-26A6-4CA5-B6F6-6666C65507CC}.Debug|Win32.ActiveCfg = Debug|Win32 {3D9A7642-26A6-4CA5-B6F6-6666C65507CC}.Debug|Win32.Build.0 = Debug|Win32 + {3D9A7642-26A6-4CA5-B6F6-6666C65507CC}.Debug|x64.ActiveCfg = Debug|Win32 + {3D9A7642-26A6-4CA5-B6F6-6666C65507CC}.Debug|x64.Build.0 = Debug|Win32 {3D9A7642-26A6-4CA5-B6F6-6666C65507CC}.Release|Win32.ActiveCfg = Release|Win32 {3D9A7642-26A6-4CA5-B6F6-6666C65507CC}.Release|Win32.Build.0 = Release|Win32 + {3D9A7642-26A6-4CA5-B6F6-6666C65507CC}.Release|x64.ActiveCfg = Release|Win32 + {3D9A7642-26A6-4CA5-B6F6-6666C65507CC}.Release|x64.Build.0 = Release|Win32 {A34CA86D-6C2B-482F-984E-2687459E65E9}.Debug|Win32.ActiveCfg = Debug|Win32 {A34CA86D-6C2B-482F-984E-2687459E65E9}.Debug|Win32.Build.0 = Debug|Win32 + {A34CA86D-6C2B-482F-984E-2687459E65E9}.Debug|x64.ActiveCfg = Debug|Win32 + {A34CA86D-6C2B-482F-984E-2687459E65E9}.Debug|x64.Build.0 = Debug|Win32 {A34CA86D-6C2B-482F-984E-2687459E65E9}.Release|Win32.ActiveCfg = Release|Win32 {A34CA86D-6C2B-482F-984E-2687459E65E9}.Release|Win32.Build.0 = Release|Win32 + {A34CA86D-6C2B-482F-984E-2687459E65E9}.Release|x64.ActiveCfg = Release|Win32 + {A34CA86D-6C2B-482F-984E-2687459E65E9}.Release|x64.Build.0 = Release|Win32 + {C7C2269E-87F0-45D4-A5F3-2C6C158D65C5}.Debug|Win32.ActiveCfg = Debug|Win32 + {C7C2269E-87F0-45D4-A5F3-2C6C158D65C5}.Debug|Win32.Build.0 = Debug|Win32 + {C7C2269E-87F0-45D4-A5F3-2C6C158D65C5}.Debug|x64.ActiveCfg = Debug|x64 + {C7C2269E-87F0-45D4-A5F3-2C6C158D65C5}.Debug|x64.Build.0 = Debug|x64 + {C7C2269E-87F0-45D4-A5F3-2C6C158D65C5}.Release|Win32.ActiveCfg = Release|Win32 + {C7C2269E-87F0-45D4-A5F3-2C6C158D65C5}.Release|Win32.Build.0 = Release|Win32 + {C7C2269E-87F0-45D4-A5F3-2C6C158D65C5}.Release|x64.ActiveCfg = Release|x64 + {C7C2269E-87F0-45D4-A5F3-2C6C158D65C5}.Release|x64.Build.0 = Release|x64 EndGlobalSection GlobalSection(SolutionProperties) = preSolution HideSolutionNode = FALSE diff --git a/ffmpegOut/auo_version.h b/ffmpegOut/auo_version.h index 7864189..1f6a87f 100644 --- a/ffmpegOut/auo_version.h +++ b/ffmpegOut/auo_version.h @@ -28,9 +28,9 @@ #ifndef _AUO_VERSION_H_ #define _AUO_VERSION_H_ -#define AUO_VERSION 0,1,8,0 -#define AUO_VERSION_STR "1.08" -#define AUO_VERSION_STR_W L"1.08" +#define AUO_VERSION 0,1,9,0 +#define AUO_VERSION_STR "1.09" +#define AUO_VERSION_STR_W L"1.09" #define AUO_NAME_R ffmpegOut #define AUO_NAME_WITHOUT_EXT "ffmpegOut" #define AUO_NAME_WITHOUT_EXT_W L"ffmpegOut" diff --git a/ffmpegOut/encode/auo_audio.cpp b/ffmpegOut/encode/auo_audio.cpp index 52c3b8c..848e8e7 100644 --- a/ffmpegOut/encode/auo_audio.cpp +++ b/ffmpegOut/encode/auo_audio.cpp @@ -79,7 +79,7 @@ inline void *get_audio_data(const OUTPUT_INFO *oip, PRM_ENC *pe, int start, int void auo_faw_check(CONF_AUDIO *aud, const OUTPUT_INFO *oip, PRM_ENC *pe, const guiEx_settings *ex_stg) { if (!(oip->flag & OUTPUT_INFO_FLAG_AUDIO)) return; - const int faw_index = ex_stg->s_aud_faw_index; + const int faw_index = ex_stg->get_faw_index(aud->use_internal); if (faw_index == FAW_INDEX_ERROR) { write_log_auo_line_fmt(LOG_WARNING, L"FAWCheck : %s", g_auo_mes.get(AUO_AUDIO_FAW_INDEX_ERR)); return; @@ -106,7 +106,7 @@ void auo_faw_check(CONF_AUDIO *aud, const OUTPUT_INFO *oip, PRM_ENC *pe, const g aud->ext.use_2pass = aud_stg->mode[aud->ext.enc_mode].enc_2pass; aud->ext.use_wav = !aud_stg->pipe_input; } - write_log_auo_line_fmt(LOG_INFO, L"FAWCheck : FAW, %s", FAW_TYPE_NAME[ret]); + write_log_auo_line_fmt(LOG_INFO, L"FAWCheck : FAW, %s", char_to_wstring(FAW_TYPE_NAME[ret]).c_str()); } break; case FAWCHECK_ERROR_TOO_SHORT: write_log_auo_line_fmt(LOG_WARNING, L"FAWCheck : %s", g_auo_mes.get(AUO_AUDIO_ERR_TOO_SHORT)); @@ -151,31 +151,35 @@ void check_audio_length(OUTPUT_INFO *oip) { } } -static void build_wave_header(BYTE *head, const OUTPUT_INFO *oip, BOOL use_8bit, int sample_n) { +static void build_wave_header(BYTE *head, const int audio_ch, const int audio_rate, BOOL use_8bit, int sample_n) { static const char * const RIFF_HEADER = "RIFF"; static const char * const WAVE_HEADER = "WAVE"; - static const char * const FMT_CHUNK = "fmt "; - static const char * const DATA_CHUNK = "data"; - const DWORD FMT_SIZE = 16; - const short FMT_ID = 1; - const int size = (use_8bit) ? sizeof(BYTE) : sizeof(short); - - memcpy( head + 0, RIFF_HEADER, strlen(RIFF_HEADER)); - *(DWORD*)(head + 4) = sample_n * (size * oip->audio_ch) + WAVE_HEADER_SIZE - 8; - memcpy( head + 8, WAVE_HEADER, strlen(WAVE_HEADER)); - memcpy( head + 12, FMT_CHUNK, strlen(FMT_CHUNK)); + static const char * const FMT_CHUNK = "fmt "; + static const char * const DATA_CHUNK = "data"; + const DWORD FMT_SIZE = 16; + const short FMT_ID = 1; + const int size = (use_8bit) ? sizeof(BYTE) : sizeof(short); + + memcpy(head + 0, RIFF_HEADER, strlen(RIFF_HEADER)); + *(DWORD*)(head + 4) = sample_n * (size * audio_ch) + WAVE_HEADER_SIZE - 8; + memcpy(head + 8, WAVE_HEADER, strlen(WAVE_HEADER)); + memcpy(head + 12, FMT_CHUNK, strlen(FMT_CHUNK)); *(DWORD*)(head + 16) = FMT_SIZE; *(short*)(head + 20) = FMT_ID; - *(short*)(head + 22) = (short)oip->audio_ch; - *(DWORD*)(head + 24) = oip->audio_rate; - *(DWORD*)(head + 28) = oip->audio_rate * oip->audio_ch * size; - *(short*)(head + 32) = (short)(size * oip->audio_ch); + *(short*)(head + 22) = (short)audio_ch; + *(DWORD*)(head + 24) = audio_rate; + *(DWORD*)(head + 28) = audio_rate * audio_ch * size; + *(short*)(head + 32) = (short)(size * audio_ch); *(short*)(head + 34) = (short)(size * 8); - memcpy( head + 36, DATA_CHUNK, strlen(DATA_CHUNK)); - *(DWORD*)(head + 40) = sample_n * (size * oip->audio_ch); + memcpy(head + 36, DATA_CHUNK, strlen(DATA_CHUNK)); + *(DWORD*)(head + 40) = sample_n * (size * audio_ch); //計44byte(WAVE_HEADER_SIZE) } +static void build_wave_header(BYTE *head, const OUTPUT_INFO *oip, BOOL use_8bit, int sample_n) { + build_wave_header(head, oip->audio_ch, oip->audio_rate, use_8bit, sample_n); +} + static void correct_header(FILE *f_out, int data_size) { //2箇所の出力データサイズ部分を書き換え int riff_size = data_size + (WAVE_SIZE_POS - RIFF_SIZE_POS); @@ -276,7 +280,7 @@ static void show_progressbar(BOOL use_pipe, const wchar_t *enc_name, int progres static void show_audio_delay_cut_info(int delay_cut, const PRM_ENC *pe) { if (AUDIO_DELAY_CUT_EDTS == delay_cut) { - write_log_auo_line_fmt(LOG_INFO, L"%s - %s", g_auo_mes.get(AUO_AUDIO_DELAY_CUT), AUDIO_DELAY_CUT_MODE[AUDIO_DELAY_CUT_EDTS].desc); + write_log_auo_line_fmt(LOG_INFO, L"%s - %s", g_auo_mes.get(AUO_AUDIO_DELAY_CUT), g_auo_mes.get(AUDIO_DELAY_CUT_MODE[AUDIO_DELAY_CUT_EDTS].mes)); } else if (0 != pe->delay_cut_additional_aframe || 0 != pe->delay_cut_additional_vframe) { wchar_t message[1024] = { 0 }; int mes_len = 0; @@ -643,3 +647,98 @@ AUO_RESULT audio_output(CONF_GUIEX *conf, const OUTPUT_INFO *oip, PRM_ENC *pe, c return ret; } + +BOOL check_audenc_output(const AUDIO_SETTINGS *aud_stg, std::wstring& exe_message) { + //実行ファイルチェック(filenameが空文字列なら実行しない) + if (!str_has_char(aud_stg->filename)) { + return TRUE; + } + if (str_has_char(aud_stg->filename) && !PathFileExists(aud_stg->fullpath)) { + return FALSE; + } + + //qaac以外はチェックしない + if (wcsstr(aud_stg->dispname, L"qaac") == nullptr) { + return TRUE; + } + + exe_message.clear(); + + char fullargs[8192]; + sprintf_s(fullargs, "\"%s\" -o nul -", aud_stg->fullpath); + + const int audio_rate = 48000; + const int audio_n = audio_rate; + const int audio_ch = 2; + const int audio_use_8bit = FALSE; + const int audio_elem_size = (audio_use_8bit) ? 1 : 2; + std::vector test_buffer(audio_n * audio_ch * audio_elem_size, 0); + + PROCESS_INFORMATION pi = { 0 }; + PIPE_SET pipes = { 0 }; + InitPipes(&pipes); + pipes.stdIn.mode = AUO_PIPE_ENABLE; + pipes.stdOut.mode = AUO_PIPE_DISABLE; + pipes.stdErr.mode = AUO_PIPE_ENABLE; + pipes.stdIn.bufferSize = test_buffer.size(); + + char exe_dir[1024] = { 0 }; + strcpy_s(exe_dir, _countof(exe_dir), aud_stg->fullpath); + PathRemoveFileSpecFixed(exe_dir); + + BOOL ret = FALSE; + if ((ret = RunProcess(fullargs, exe_dir, &pi, &pipes, NORMAL_PRIORITY_CLASS, TRUE, FALSE)) == RP_SUCCESS) { + + while (WAIT_TIMEOUT == WaitForInputIdle(pi.hProcess, LOG_UPDATE_INTERVAL)) + log_process_events(); + + BYTE head[WAVE_HEADER_SIZE]; + build_wave_header(head, audio_ch, audio_rate, audio_use_8bit, audio_n); + _fwrite_nolock(&head, 1, sizeof(head), pipes.f_stdin); + _fwrite_nolock(&test_buffer[0], 1, test_buffer.size(), pipes.f_stdin); + CloseStdIn(&pipes); + + auto read_stderr = [](PIPE_SET *pipes) { + DWORD pipe_read = 0; + if (!PeekNamedPipe(pipes->stdErr.h_read, NULL, 0, NULL, &pipe_read, NULL)) + return -1; + if (pipe_read) { + ReadFile(pipes->stdErr.h_read, pipes->read_buf + pipes->buf_len, sizeof(pipes->read_buf) - pipes->buf_len - 1, &pipe_read, NULL); + pipes->buf_len += pipe_read; + pipes->read_buf[pipes->buf_len] = '\0'; + } + return (int)pipe_read; + }; + + while (WAIT_TIMEOUT == WaitForSingleObject(pi.hProcess, 10)) { + if (read_stderr(&pipes)) { + exe_message += char_to_wstring(pipes.read_buf, CP_UTF8); + pipes.buf_len = 0; + } else { + log_process_events(); + } + } + + while (read_stderr(&pipes) > 0) { + exe_message += char_to_wstring(pipes.read_buf, CP_UTF8); + pipes.buf_len = 0; + } + log_process_events(); + + DWORD exitCode = 0; + GetExitCodeProcess(pi.hProcess, &exitCode); + + CloseHandle(pi.hProcess); + CloseHandle(pi.hThread); + + ret = exitCode == 0 ? TRUE : FALSE; + //if (exe_message.find("CoreAudioToolbox.dll") == std::string::npos) { + // ret = TRUE; + //} + } + + if (pipes.stdIn.mode) CloseHandle(pipes.stdIn.h_read); + if (pipes.stdOut.mode) CloseHandle(pipes.stdOut.h_read); + if (pipes.stdErr.mode) CloseHandle(pipes.stdErr.h_read); + return ret; +} diff --git a/ffmpegOut/encode/auo_audio.h b/ffmpegOut/encode/auo_audio.h index 5112263..e802ad5 100644 --- a/ffmpegOut/encode/auo_audio.h +++ b/ffmpegOut/encode/auo_audio.h @@ -43,4 +43,6 @@ void check_audio_length(OUTPUT_INFO *oip); AUO_RESULT audio_output(CONF_GUIEX *conf, const OUTPUT_INFO *oip, PRM_ENC *pe, const SYSTEM_DATA *sys_dat); //音声処理を実行 AUO_RESULT audio_output_parallel(CONF_GUIEX *conf, const OUTPUT_INFO *oip, PRM_ENC *pe, const SYSTEM_DATA *sys_dat); +BOOL check_audenc_output(const AUDIO_SETTINGS *aud_stg, std::wstring& exe_message); + #endif //_AUO_AUDIO_H_ \ No newline at end of file diff --git a/ffmpegOut/encode/auo_encode.cpp b/ffmpegOut/encode/auo_encode.cpp index 4b976b0..185a247 100644 --- a/ffmpegOut/encode/auo_encode.cpp +++ b/ffmpegOut/encode/auo_encode.cpp @@ -592,8 +592,38 @@ BOOL check_output(CONF_GUIEX *conf, OUTPUT_INFO *oip, const PRM_ENC *pe, guiEx_s warning_use_default_audio_encoder(exstg->s_aud_ext[cnf_aud->encoder].dispname); } } - if (0 <= cnf_aud->encoder && cnf_aud->encoder < exstg->s_aud_ext_count) { + for (;;) { + if (cnf_aud->encoder < 0 || exstg->s_aud_ext_count <= cnf_aud->encoder) { + error_invalid_ini_file(); + check = FALSE; + break; + } AUDIO_SETTINGS *aud_stg = &exstg->s_aud_ext[cnf_aud->encoder]; + if (!muxer_supports_audio_format(pe->muxer_to_be_used, aud_stg)) { + const bool retry_with_default_audenc = false; // ffmpeg_audencを配布していないQSV/NV/VCEEncではここのretryは無効化する + const int orig_encoder = cnf_aud->encoder; + if (retry_with_default_audenc) { + if (default_audenc_cnf_avail + && orig_encoder != exstg->s_local.default_audio_encoder_ext + && 0 <= exstg->s_local.default_audio_encoder_ext && exstg->s_local.default_audio_encoder_ext < exstg->s_aud_ext_count + && muxer_supports_audio_format(pe->muxer_to_be_used, &exstg->s_aud_ext[exstg->s_local.default_audio_encoder_ext])) { + cnf_aud->encoder = exstg->s_local.default_audio_encoder_ext; + } else if (default_audenc_auo_avail) { + cnf_aud->encoder = DEFAULT_AUDIO_ENCODER_EXT; + } + } + error_unsupported_audio_format_by_muxer(pe->video_out_type, + exstg->s_aud_ext[orig_encoder].dispname, + (orig_encoder != cnf_aud->encoder) ? exstg->s_aud_ext[cnf_aud->encoder].dispname : nullptr); + // 同じエンコーダあるいはデフォルトエンコーダがうまく取得できな場合は再チェックしても意味がない + if (orig_encoder == cnf_aud->encoder) { + check = FALSE; + break; + } + // デフォルトエンコーダに戻して再チェック + warning_use_default_audio_encoder(exstg->s_aud_ext[cnf_aud->encoder].dispname); + continue; + } if (!audio_encoder_exe_exists(conf, exstg)) { //とりあえず、exe_filesを探す { @@ -638,31 +668,48 @@ BOOL check_output(CONF_GUIEX *conf, OUTPUT_INFO *oip, const PRM_ENC *pe, guiEx_s } } if (!PathFileExists(aud_stg->fullpath)) { - //fawの場合はfaw2aacがあればOKだが、それもなければエラー - if (!(cnf_aud->encoder == exstg->get_faw_index(conf->aud.use_internal) && check_if_faw2aac_exists())) { + //fawの場合はOK + if (cnf_aud->encoder != exstg->get_faw_index(conf->aud.use_internal)) { error_no_exe_file(aud_stg->dispname, aud_stg->fullpath); check = FALSE; + break; } } } - if (str_has_char(aud_stg->filename) && (cnf_aud->encoder != exstg->get_faw_index(conf->aud.use_internal) || !check_if_faw2aac_exists())) { - info_use_exe_found(aud_stg->dispname, aud_stg->fullpath); - } #if !ENCODER_FFMPEG - if (!muxer_supports_audio_format(pe->muxer_to_be_used, aud_stg)) { - AUDIO_SETTINGS *aud_default = nullptr; - if (default_audenc_cnf_avail) { - aud_default = &exstg->s_aud_ext[exstg->s_local.default_audio_encoder_ext]; - } else if (default_audenc_auo_avail) { - aud_default = &exstg->s_aud_ext[DEFAULT_AUDIO_ENCODER_EXT]; + if (str_has_char(aud_stg->filename) && (cnf_aud->encoder != exstg->get_faw_index(conf->aud.use_internal))) { + std::wstring exe_message; + if (!check_audenc_output(aud_stg, exe_message)) { + const bool retry_with_default_audenc = false; // ffmpeg_audencを配布していないQSV/NV/VCEEncではここのretryは無効化する + const int orig_encoder = cnf_aud->encoder; + if (retry_with_default_audenc) { + if (default_audenc_cnf_avail + && orig_encoder != exstg->s_local.default_audio_encoder_ext + && 0 <= exstg->s_local.default_audio_encoder_ext && exstg->s_local.default_audio_encoder_ext < exstg->s_aud_ext_count + && muxer_supports_audio_format(pe->muxer_to_be_used, &exstg->s_aud_ext[exstg->s_local.default_audio_encoder_ext])) { + cnf_aud->encoder = exstg->s_local.default_audio_encoder_ext; + } else if (default_audenc_auo_avail) { + cnf_aud->encoder = DEFAULT_AUDIO_ENCODER_EXT; + } + error_failed_to_run_audio_encoder( + exstg->s_aud_ext[orig_encoder].dispname, + exe_message.c_str(), + (orig_encoder != cnf_aud->encoder) ? exstg->s_aud_ext[cnf_aud->encoder].dispname : nullptr); + } + // 同じエンコーダあるいはデフォルトエンコーダがうまく取得できな場合は再チェックしても意味がない + if (orig_encoder == cnf_aud->encoder) { + check = FALSE; + break; + } + // デフォルトエンコーダに戻して再チェック + warning_use_default_audio_encoder(exstg->s_aud_ext[cnf_aud->encoder].dispname); + continue; } - error_unsupported_audio_format_by_muxer(pe->video_out_type, aud_stg->dispname, (aud_default) ? aud_default->dispname : nullptr); - check = FALSE; + info_use_exe_found(aud_stg->dispname, aud_stg->fullpath); } -#endif - } else { - error_invalid_ini_file(); - check = FALSE; +#endif + // ここまで来たらエンコーダの確認終了なのでbreak + break; } } } @@ -689,11 +736,11 @@ BOOL check_output(CONF_GUIEX *conf, OUTPUT_INFO *oip, const PRM_ENC *pe, guiEx_s return check; } -void open_log_window(const char *savefile, const SYSTEM_DATA *sys_dat, int current_pass, int total_pass, bool amp_crf_reenc) { +void open_log_window(const OUTPUT_INFO *oip, const SYSTEM_DATA *sys_dat, int current_pass, int total_pass, bool amp_crf_reenc) { wchar_t mes[MAX_PATH_LEN + 512]; const wchar_t *newLine = (get_current_log_len(current_pass == 1 && !amp_crf_reenc)) ? L"\r\n\r\n" : L""; //必要なら行送り static const wchar_t *SEPARATOR = L"------------------------------------------------------------------------------------------------------------------------------"; - const std::wstring savefile_w = char_to_wstring(savefile); + const std::wstring savefile_w = char_to_wstring(oip->savefile); if (total_pass < 2 || current_pass > total_pass) swprintf_s(mes, L"%s%s\r\n[%s]\r\n%s", newLine, SEPARATOR, savefile_w.c_str(), SEPARATOR); else @@ -701,14 +748,37 @@ void open_log_window(const char *savefile, const SYSTEM_DATA *sys_dat, int curre show_log_window(sys_dat->aviutl_dir, sys_dat->exstg->s_local.disable_visual_styles); write_log_line(LOG_INFO, mes); - + char cpu_info[256]; getCPUInfo(cpu_info); DWORD buildNumber = 0; - const TCHAR *osver = getOSVersion(&buildNumber); + const auto osver = char_to_wstring(getOSVersion(&buildNumber)); write_log_auo_line_fmt(LOG_INFO, L"%s %s / %s %s (%d) / %s", - AUO_NAME_WITHOUT_EXT_W, AUO_VERSION_STR_W, char_to_wstring(osver).c_str(), - is_64bit_os() ? L"x64" : L"x86", buildNumber, char_to_wstring(cpu_info).c_str()); + AUO_NAME_WITHOUT_EXT_W, AUO_VERSION_STR_W, osver.c_str(), is_64bit_os() ? L"x64" : L"x86", buildNumber, char_to_wstring(cpu_info).c_str()); + + if (oip->flag & OUTPUT_INFO_FLAG_VIDEO) { + const double video_length = oip->n * (double)oip->scale / oip->rate; + + const int vid_h = (int)(video_length / 3600); + const int vid_m = (int)(video_length - vid_h * 3600) / 60; + const int vid_s = (int)(video_length - vid_h * 3600 - vid_m * 60); + const int vid_ms = std::min((int)((video_length - (double)(vid_h * 3600 + vid_m * 60 + vid_s)) * 1000.0), 999); + + write_log_auo_line_fmt(LOG_INFO, L"video: %d:%02d:%02d.%03d %d/%d(%.3f) fps", + vid_h, vid_m, vid_s, vid_ms, oip->rate, oip->scale, oip->rate / (double)oip->scale); + } + + if (oip->flag & OUTPUT_INFO_FLAG_AUDIO) { + const double audio_length = oip->audio_n / (double)oip->audio_rate; + + const int aud_h = (int)audio_length / 3600; + const int aud_m = (int)(audio_length - aud_h * 3600) / 60; + const int aud_s = (int)(audio_length - aud_h * 3600 - aud_m * 60); + const int aud_ms = std::min((int)((audio_length - (double)(aud_h * 3600 + aud_m * 60 + aud_s)) * 1000.0), 999); + + write_log_auo_line_fmt(LOG_INFO, L"audio: %d:%02d:%02d.%03d %dch %.1fkHz %d samples", + aud_h, aud_m, aud_s, aud_ms, oip->audio_ch, oip->audio_rate / 1000.0, oip->audio_n); + } } static void set_tmpdir(PRM_ENC *pe, int tmp_dir_index, const char *savefile, const SYSTEM_DATA *sys_dat) { diff --git a/ffmpegOut/encode/auo_encode.h b/ffmpegOut/encode/auo_encode.h index 3ca05f4..e82bef0 100644 --- a/ffmpegOut/encode/auo_encode.h +++ b/ffmpegOut/encode/auo_encode.h @@ -58,7 +58,7 @@ bool video_is_last_pass(const PRM_ENC *pe); BOOL check_if_exedit_is_used(); BOOL check_output(CONF_GUIEX *conf, OUTPUT_INFO *oip, const PRM_ENC *pe, guiEx_settings *exstg); -void open_log_window(const char *savefile, const SYSTEM_DATA *sys_dat, int current_pass, int total_pass, bool amp_crf_reenc = false); +void open_log_window(const OUTPUT_INFO *oip, const SYSTEM_DATA *sys_dat, int current_pass, int total_pass, bool amp_crf_reenc = false); void auto_save_log(const CONF_GUIEX *conf, const OUTPUT_INFO *oip, const PRM_ENC *pe, const SYSTEM_DATA *sys_dat); void warn_video_length(const OUTPUT_INFO *oip); int get_total_path(const CONF_GUIEX *conf); diff --git a/ffmpegOut/encode/auo_faw2aac.cpp b/ffmpegOut/encode/auo_faw2aac.cpp index 6435b44..b947c9a 100644 --- a/ffmpegOut/encode/auo_faw2aac.cpp +++ b/ffmpegOut/encode/auo_faw2aac.cpp @@ -38,6 +38,7 @@ #include #include "output.h" +#include "rgy_faw.h" #include "auo.h" #include "auo_version.h" #include "auo_util.h" @@ -52,475 +53,226 @@ #include "auo_faw2aac.h" #include "auo_mes.h" -typedef OUTPUT_PLUGIN_TABLE* (*func_get_auo_table)(void); - -typedef void *(*auo_func_get_audio)( int start,int length,int *readed ); - -BOOL check_if_faw2aac_exists() { - char aviutl_dir[MAX_PATH_LEN]; - get_aviutl_dir(aviutl_dir, _countof(aviutl_dir)); - - for (int i = 0; i < _countof(FAW2AAC_NAME); i++) { - char faw2aac_path[MAX_PATH_LEN]; - PathCombineLong(faw2aac_path, _countof(faw2aac_path), aviutl_dir, FAW2AAC_NAME[i]); - if (PathFileExists(faw2aac_path)) - return TRUE; - } - return FALSE; -} - -static const OUTPUT_INFO *g_oip; -static PRM_ENC *g_pe; -static BOOL auo_rest_time_disp(int now, int total) { - if (!g_pe || !g_pe->aud_parallel.th_aud) { //並列処理時には進捗表示をスキップ - if (g_oip) - g_oip->func_rest_time_disp(now, total); - //進捗表示 - static auto tm_last = std::chrono::system_clock::now(); - decltype(tm_last) tm; - if (std::chrono::duration_cast((tm = std::chrono::system_clock::now()) - tm_last).count() > LOG_UPDATE_INTERVAL * 5) { - set_log_progress(now / (double)total); - tm_last = tm; - } - } - return TRUE; +struct faw2aac_data_t { + int id; + char audfile[MAX_PATH_LEN]; + BOOL is_internal; + HANDLE h_aud_namedpipe; + HANDLE he_ov_aud_namedpipe; + std::vector outBuffer; + std::future thOut; + bool thAbort; + HANDLE heOutputDataPushed; + HANDLE heOutputDataWritten; + FILE *fp_out; }; -static void __forceinline audio_pass_upper8bit(short *data, int length) { - for (int i = 0; i < length; i++) - data[i] &= 0xff00; -} -static void __forceinline audio_pass_lower8bit(short *data, int length) { - for (int i = 0; i < length; i++) - data[i] <<= 8; -} -static void __forceinline audio_pass_upper8bit_sse2(short *data, int length) { - short *data_fin = (short *)(((size_t)data + 15) & ~15); - length -= (data_fin - data); - for ( ; data < data_fin; data++) - *data &= 0xff00; - //メインループ - data_fin = data + (length & ~15); - __m128i x0, x1; - __m128i xMask = _mm_slli_epi16(_mm_cmpeq_epi8(_mm_setzero_si128(), _mm_setzero_si128()), 8); //0xff00 - for ( ; data < data_fin; data += 16) { - x0 = _mm_load_si128((__m128i*)(data + 0)); - x1 = _mm_load_si128((__m128i*)(data + 8)); - x0 = _mm_and_si128(x0, xMask); - x1 = _mm_and_si128(x1, xMask); - _mm_store_si128((__m128i*)(data + 0), x0); - _mm_store_si128((__m128i*)(data + 8), x1); - } - data = data_fin + (length & 15) - 16; - x0 = _mm_loadu_si128((__m128i*)(data + 0)); - x1 = _mm_loadu_si128((__m128i*)(data + 8)); - x0 = _mm_and_si128(x0, xMask); - x1 = _mm_and_si128(x1, xMask); - _mm_storeu_si128((__m128i*)(data + 0), x0); - _mm_storeu_si128((__m128i*)(data + 8), x1); -} -static void __forceinline audio_pass_lower8bit_sse2(short *data, int length) { - short *data_fin = (short *)(((size_t)data + 15) & ~15); - length -= (data_fin - data); - for ( ; data < data_fin; data++) - *data <<= 8; - //メインループ - data_fin = data + (length & ~15); - __m128i x0, x1; - for ( ; data < data_fin; data += 16) { - x0 = _mm_load_si128((__m128i*)(data + 0)); - x1 = _mm_load_si128((__m128i*)(data + 8)); - x0 = _mm_slli_epi16(x0, 8); - x1 = _mm_slli_epi16(x1, 8); - _mm_store_si128((__m128i*)(data + 0), x0); - _mm_store_si128((__m128i*)(data + 8), x1); - } - data_fin += (length & 15); - for ( ; data < data_fin; data++) - *data <<= 8; -} - -//音声通常処理用 -static void *auo_get_audio_normal_upper8bit(int start, int length, int *readed) { - auto oip = g_oip; - if (oip == nullptr) { - throw std::exception(); - } - short *dat = (short *)oip->func_get_audio(start, length, readed); - audio_pass_upper8bit(dat, *readed * oip->audio_ch); - return dat; -} -static void *auo_get_audio_normal_lower8bit(int start, int length, int *readed) { - auto oip = g_oip; - if (oip == nullptr) { - throw std::exception(); - } - short *dat = (short *)oip->func_get_audio(start, length, readed); - audio_pass_lower8bit(dat, *readed * oip->audio_ch); - return dat; -} -static void *auo_get_audio_normal_upper8bit_sse2(int start, int length, int *readed) { - auto oip = g_oip; - if (oip == nullptr) { - throw std::exception(); - } - short *dat = (short *)oip->func_get_audio(start, length, readed); - audio_pass_upper8bit_sse2(dat, *readed * oip->audio_ch); - return dat; -} -static void *auo_get_audio_normal_lower8bit_sse2(int start, int length, int *readed) { - auto oip = g_oip; - if (oip == nullptr) { - throw std::exception(); - } - short *dat = (short *)oip->func_get_audio(start, length, readed); - audio_pass_lower8bit_sse2(dat, *readed * oip->audio_ch); - return dat; -} -//音声並列処理用 -static void *auo_get_audio_parallel_buf(int length) { - const auto thid = GetCurrentThreadId(); - int thidx = -1; - for (int i = 0; i < 2 && thidx < 0; i++) { - if (g_pe->aud_parallel.faw2aac[i].threadid == 0) { - g_pe->aud_parallel.faw2aac[i].threadid = thid; - } - if (g_pe->aud_parallel.faw2aac[i].threadid == thid) { - thidx = i; - } - } - if (thidx < 0) { - return nullptr; - } - if ((int)g_pe->aud_parallel.faw2aac[thidx].buf_len < length) { - if (g_pe->aud_parallel.faw2aac[thidx].buffer) { - _aligned_free(g_pe->aud_parallel.faw2aac[thidx].buffer); +static size_t write_file(faw2aac_data_t *aud_dat, const PRM_ENC *pe, const void *buf, size_t size) { + if (aud_dat->is_internal) { + while (WaitForSingleObject(aud_dat->heOutputDataWritten, 50) != WAIT_OBJECT_0) { + if (pe->aud_parallel.abort) { + return 0; + } } - g_pe->aud_parallel.faw2aac[thidx].buffer = _aligned_malloc(length, 32); - g_pe->aud_parallel.faw2aac[thidx].buf_len = length; - } - return g_pe->aud_parallel.faw2aac[thidx].buffer; -} -static void *auo_get_audio_parallel(int start, int length, int *readed) { - auto pe = g_pe; - auto oip = g_oip; - if (oip == nullptr || pe == nullptr) { - throw std::exception(); - } - std::lock_guard lock(*pe->aud_parallel.mtx_aud); - short *dat = (short *)get_audio_data(oip, pe, start, length, readed); - short *buf = (short *)auo_get_audio_parallel_buf(*readed * oip->audio_size); - memcpy(buf, dat, *readed * oip->audio_size); - return buf; -} -static void *auo_get_audio_parallel_upper8bit(int start, int length, int *readed) { - auto pe = g_pe; - auto oip = g_oip; - if (oip == nullptr || pe == nullptr) { - throw std::exception(); - } - std::lock_guard lock(*pe->aud_parallel.mtx_aud); - short *dat = (short *)get_audio_data(oip, pe, start, length, readed); - short *buf = (short *)auo_get_audio_parallel_buf(*readed * oip->audio_size); - memcpy(buf, dat, *readed * oip->audio_size); - audio_pass_upper8bit(buf, *readed * oip->audio_ch); - return buf; -} -static void *auo_get_audio_parallel_lower8bit(int start, int length, int *readed) { - auto pe = g_pe; - auto oip = g_oip; - if (oip == nullptr || pe == nullptr) { - throw std::exception(); - } - std::lock_guard lock(*pe->aud_parallel.mtx_aud); - short *dat = (short *)get_audio_data(g_oip, pe, start, length, readed); - short *buf = (short *)auo_get_audio_parallel_buf(*readed * oip->audio_size); - memcpy(buf, dat, *readed * oip->audio_size); - audio_pass_lower8bit(buf, *readed * oip->audio_ch); - return buf; -} -static void *auo_get_audio_parallel_upper8bit_sse2(int start, int length, int *readed) { - auto pe = g_pe; - auto oip = g_oip; - if (oip == nullptr || pe == nullptr) { - throw std::exception(); - } - std::lock_guard lock(*pe->aud_parallel.mtx_aud); - short *dat = (short *)get_audio_data(g_oip, pe, start, length, readed); - short *buf = (short *)auo_get_audio_parallel_buf(*readed * oip->audio_size); - memcpy(buf, dat, *readed * oip->audio_size); - audio_pass_upper8bit_sse2(buf, *readed * oip->audio_ch); - return buf; -} -static void *auo_get_audio_parallel_lower8bit_sse2(int start, int length, int *readed) { - auto pe = g_pe; - auto oip = g_oip; - if (oip == nullptr || pe == nullptr) { - throw std::exception(); - } - std::lock_guard lock(*pe->aud_parallel.mtx_aud); - short *dat = (short *)get_audio_data(g_oip, pe, start, length, readed); - short *buf = (short *)auo_get_audio_parallel_buf(*readed * oip->audio_size); - memcpy(buf, dat, *readed * oip->audio_size); - audio_pass_lower8bit_sse2(buf, *readed * oip->audio_ch); - return buf; -} -static const auo_func_get_audio FAW2AAC_AUDIO_NORMAL[][2] = { - { auo_get_audio_normal_upper8bit, auo_get_audio_normal_upper8bit_sse2 }, - { auo_get_audio_normal_lower8bit, auo_get_audio_normal_lower8bit_sse2 }, -}; -static const auo_func_get_audio FAW2AAC_AUDIO_PARALLEL[][2] = { - { auo_get_audio_parallel_upper8bit, auo_get_audio_parallel_upper8bit_sse2 }, - { auo_get_audio_parallel_lower8bit, auo_get_audio_parallel_lower8bit_sse2 }, -}; - - -static BOOL auo_get_if_abort() { - return (g_pe) ? g_pe->aud_parallel.abort : FALSE; -} -static int auo_kill_update_preview() { - return TRUE; -} - -static AUO_RESULT audio_faw2aac_check(const char *audfile) { - AUO_RESULT ret = AUO_RESULT_SUCCESS; - UINT64 audfilesize = 0; - if (!PathFileExists(audfile) || - (GetFileSizeUInt64(audfile, &audfilesize) && audfilesize == 0)) { - //エラーが発生した場合 - ret |= AUO_RESULT_ERROR; error_audenc_failed(L"faw2aac.auo", NULL); + const auto origSize = aud_dat->outBuffer.size(); + aud_dat->outBuffer.resize(origSize + size); + memcpy(aud_dat->outBuffer.data() + origSize, buf, size); + SetEvent(aud_dat->heOutputDataPushed); + return size; + } else { + return _fwrite_nolock(buf, 1, size, aud_dat->fp_out); } - return ret; } -typedef struct { - char name[MAX_PATH_LEN]; - HANDLE h_pipe; - HANDLE he_ov_aud_namedpipe; -} faw2aac_named_pipeset_t; - -typedef struct { - OUTPUT_INFO oip; - std::future th_faw2aac; - std::future th_transfer; - bool th_transfer_started; - faw2aac_named_pipeset_t from_auo; - faw2aac_named_pipeset_t to_exe; - char audfile[MAX_PATH_LEN]; -} faw2aac_data_t; - AUO_RESULT audio_faw2aac(CONF_GUIEX *conf, const OUTPUT_INFO *oip, PRM_ENC *pe, const SYSTEM_DATA *sys_dat) { AUO_RESULT ret = AUO_RESULT_SUCCESS; - HMODULE hModule = NULL; - func_get_auo_table getFAW2AACTable = NULL; - OUTPUT_PLUGIN_TABLE *opt = NULL; - char aviutl_dir[MAX_PATH_LEN]; - get_aviutl_dir(aviutl_dir, _countof(aviutl_dir)); - for (int i = 0; i < _countof(FAW2AAC_NAME); i++) { - char faw2aac_path[MAX_PATH_LEN]; - PathCombineLong(faw2aac_path, _countof(faw2aac_path), aviutl_dir, FAW2AAC_NAME[i]); - if (PathFileExists(faw2aac_path)) { - hModule = LoadLibrary(faw2aac_path); - break; + set_window_title(L"faw2aac", PROGRESSBAR_CONTINUOUS); + write_log_auo_line_fmt(LOG_INFO, L"faw2aac %s", g_auo_mes.get(AUO_AUDIO_START_ENCODE)); + const int bufsize = sys_dat->exstg->s_local.audio_buffer_size; + + faw2aac_data_t aud_dat[2]; + //パイプ or ファイルオープン + for (int i_aud = 0; !ret && i_aud < pe->aud_count; i_aud++) { + // 初期化 + aud_dat[i_aud].id = i_aud; + memset(aud_dat[i_aud].audfile, 0, sizeof(aud_dat[i_aud].audfile)); + aud_dat[i_aud].is_internal = conf->aud.use_internal; + aud_dat[i_aud].h_aud_namedpipe = nullptr; + aud_dat[i_aud].he_ov_aud_namedpipe = nullptr; + aud_dat[i_aud].thAbort = false; + aud_dat[i_aud].heOutputDataPushed = nullptr; + aud_dat[i_aud].heOutputDataWritten = nullptr; + aud_dat[i_aud].fp_out = nullptr; + if (conf->aud.use_internal) { + char pipename[MAX_PATH_LEN]; + get_audio_pipe_name(pipename, _countof(pipename), i_aud); + aud_dat[i_aud].h_aud_namedpipe = CreateNamedPipeA(pipename, PIPE_ACCESS_OUTBOUND | FILE_FLAG_OVERLAPPED, PIPE_TYPE_BYTE | PIPE_READMODE_BYTE | PIPE_WAIT, 1, 4096, 4096, 0, NULL); + aud_dat[i_aud].he_ov_aud_namedpipe = CreateEvent(NULL, FALSE, FALSE, NULL); + aud_dat[i_aud].heOutputDataPushed = CreateEvent(NULL, FALSE, FALSE, NULL); + aud_dat[i_aud].heOutputDataWritten = CreateEvent(NULL, FALSE, TRUE, NULL); } } - if (hModule == NULL) { - ret = AUO_RESULT_WARNING; write_log_auo_line(LOG_INFO, g_auo_mes.get(AUO_FAW2AAC_NOT_FOUND)); - } else if ( - NULL == (getFAW2AACTable = (func_get_auo_table)GetProcAddress(hModule, "GetOutputPluginTable")) - || NULL == (opt = getFAW2AACTable()) - || NULL == opt->func_output) { - ret = AUO_RESULT_WARNING; write_log_auo_line(LOG_WARNING, g_auo_mes.get(AUO_FAW2AAC_NOT_LOADED)); - } else { - //進捗表示の取り込み - g_oip = oip; - g_pe = pe; - - set_window_title(L"faw2aac", PROGRESSBAR_CONTINUOUS); - write_log_auo_line_fmt(LOG_INFO, L"faw2aac %s", g_auo_mes.get(AUO_AUDIO_START_ENCODE)); - static const int PIPE_BUF = 4096; + //確実なfcloseのために何故か一度ここで待機する必要あり + if_valid_set_event(pe->aud_parallel.he_vid_start); + if_valid_wait_for_single_object(pe->aud_parallel.he_aud_start, INFINITE); - faw2aac_data_t aud_dat[2]; - for (int i_aud = 0; !ret && i_aud < pe->aud_count; i_aud++) { - aud_dat[i_aud].oip = *oip; - aud_dat[i_aud].from_auo.h_pipe = NULL; - aud_dat[i_aud].to_exe.h_pipe = NULL; - aud_dat[i_aud].th_transfer_started = false; - pe->aud_parallel.faw2aac[i_aud].threadid = 0; - pe->aud_parallel.faw2aac[i_aud].buffer = 0; - pe->aud_parallel.faw2aac[i_aud].buf_len = 0; - if (conf->aud.use_internal) { - static const char *const FAW2AAC_NAMED_PIPE_BASE = "\\\\.\\pipe\\Aviutl%08x_AuoFAW2AACPipe%d.aac"; - sprintf_s(aud_dat[i_aud].from_auo.name, FAW2AAC_NAMED_PIPE_BASE, GetCurrentProcessId(), i_aud); - aud_dat[i_aud].from_auo.h_pipe = CreateNamedPipeA(aud_dat[i_aud].from_auo.name, PIPE_ACCESS_INBOUND, PIPE_TYPE_BYTE | PIPE_READMODE_BYTE | PIPE_WAIT, 1, PIPE_BUF, PIPE_BUF, 0, NULL); - aud_dat[i_aud].oip.savefile = aud_dat[i_aud].from_auo.name; - - get_audio_pipe_name(aud_dat[i_aud].to_exe.name, _countof(aud_dat[i_aud].to_exe.name), i_aud); - aud_dat[i_aud].to_exe.h_pipe = CreateNamedPipeA(aud_dat[i_aud].to_exe.name, PIPE_ACCESS_OUTBOUND | FILE_FLAG_OVERLAPPED, PIPE_TYPE_BYTE | PIPE_READMODE_BYTE | PIPE_WAIT, 1, PIPE_BUF, PIPE_BUF, 0, NULL); - aud_dat[i_aud].to_exe.he_ov_aud_namedpipe = CreateEvent(NULL, FALSE, FALSE, NULL); - } else { - //audfile名作成 - const CONF_AUDIO_BASE *cnf_aud = (conf->aud.use_internal) ? &conf->aud.in : &conf->aud.ext; - const AUDIO_SETTINGS *aud_stg = (conf->aud.use_internal) ? &sys_dat->exstg->s_aud_int[cnf_aud->encoder] : &sys_dat->exstg->s_aud_ext[cnf_aud->encoder]; - strcpy_s(pe->append.aud[i_aud], _countof(pe->append.aud[i_aud]), aud_stg->aud_appendix); //pe一時パラメータにコピーしておく - if (i_aud) - insert_before_ext(pe->append.aud[i_aud], _countof(pe->append.aud[i_aud]), i_aud); - get_aud_filename(aud_dat[i_aud].audfile, _countof(aud_dat[i_aud].audfile), pe, i_aud); - aud_dat[i_aud].oip.savefile = aud_dat[i_aud].audfile; - } - aud_dat[i_aud].oip.func_rest_time_disp = auo_rest_time_disp; - //並列処理制御用 - if (pe->aud_parallel.th_aud) { - aud_dat[i_aud].oip.func_get_audio = (pe->aud_count > 1) ? FAW2AAC_AUDIO_PARALLEL[!!i_aud][!!check_sse2()] : auo_get_audio_parallel; - aud_dat[i_aud].oip.func_is_abort = auo_get_if_abort; - aud_dat[i_aud].oip.func_update_preview = auo_kill_update_preview; - //通常処理用 - } else if (pe->aud_count > 1) { - aud_dat[i_aud].oip.func_get_audio = FAW2AAC_AUDIO_NORMAL[!!i_aud][!!check_sse2()]; - } - } - if_valid_set_event(pe->aud_parallel.he_vid_start); - if_valid_wait_for_single_object(pe->aud_parallel.he_aud_start, INFINITE); - HANDLE threadStarted[2] = { NULL, NULL }; - auto run_faw2aac = [&](int audio_idx) { - int ret = AUO_RESULT_SUCCESS; - //開始 - if (opt->func_init && !opt->func_init()) { - ret = AUO_RESULT_ERROR; write_log_auo_line(LOG_WARNING, g_auo_mes.get(AUO_FAW2AAC_ERR_INIT)); - } else { - //faw2aac用の処理スレッドが処理を開始したことを通知 - if (threadStarted[audio_idx]) SetEvent(threadStarted[audio_idx]); - if (FALSE == opt->func_output(&aud_dat[audio_idx].oip)) { - ret = AUO_RESULT_ERROR; write_log_auo_line(LOG_WARNING, g_auo_mes.get(AUO_FAW2AAC_ERR_RUN)); - } - } - if (opt->func_exit) - opt->func_exit(); - return ret; - }; - - if (conf->aud.use_internal) { - auto th_faw2aac_finished = [&]() { - for (int i_aud = 0; i_aud < pe->aud_count; i_aud++) { - if (aud_dat[i_aud].th_faw2aac.wait_for(std::chrono::milliseconds(0)) != std::future_status::ready) { - return false; - } - } - return true; - }; - auto th_transfer_finished = [&]() { - for (int i_aud = 0; i_aud < pe->aud_count; i_aud++) { - if (aud_dat[i_aud].th_transfer.wait_for(std::chrono::milliseconds(0)) != std::future_status::ready) { - return false; - } - } - return true; - }; - auto run_transfer_pipe = [&](int audio_idx) { - int ret = 0; - auto aud_track = &aud_dat[audio_idx]; - //faw2aacを実行するスレッドの起動を確認する - while (WaitForSingleObject(threadStarted[audio_idx], 10) == WAIT_TIMEOUT) { - if (pe->aud_parallel.abort || aud_track->th_faw2aac.wait_for(std::chrono::milliseconds(0)) == std::future_status::ready) { - return 1; //faw2aacを実行するスレッドが異常終了した場合 - } - } - //少し待って様子を見る(func_outputが失敗しないかどうか) - std::this_thread::sleep_for(std::chrono::milliseconds(50)); - if (aud_track->th_faw2aac.wait_for(std::chrono::milliseconds(0)) == std::future_status::ready) { - return 1; //faw2aacを実行するスレッドが異常終了した場合 - } - //エンコーダプロセスの起動を確認 + //パイプ or ファイルオープン + if (conf->aud.use_internal) { + auto run_transfer_pipe = [&](int audio_idx) { + int ret = 0; + auto aud_track = &aud_dat[audio_idx]; + //エンコーダプロセスの起動を確認 + { OVERLAPPED overlapped; memset(&overlapped, 0, sizeof(overlapped)); - overlapped.hEvent = aud_track->to_exe.he_ov_aud_namedpipe; - ConnectNamedPipe(aud_track->to_exe.h_pipe, &overlapped); - while ((ret = WaitForSingleObject(aud_track->to_exe.he_ov_aud_namedpipe, 50)) != WAIT_OBJECT_0) { - if (pe->aud_parallel.abort || aud_track->th_faw2aac.wait_for(std::chrono::milliseconds(0)) == std::future_status::ready) { + overlapped.hEvent = aud_track->he_ov_aud_namedpipe; + ConnectNamedPipe(aud_track->h_aud_namedpipe, &overlapped); + while ((ret = WaitForSingleObject(overlapped.hEvent, 50)) != WAIT_OBJECT_0) { + if (pe->aud_parallel.abort) { return 1; } } - //転送を実行 - while (!pe->aud_parallel.abort) { - bool faw2aac_finished = th_faw2aac_finished(); - bool readFinished = true; - DWORD sizeRead = 0; - char buffer[PIPE_BUF]; - if (ReadFile(aud_track->from_auo.h_pipe, buffer, sizeof(buffer), &sizeRead, NULL) == 0) { - auto err = GetLastError(); - if (err == ERROR_BROKEN_PIPE || err == ERROR_HANDLE_EOF || err == ERROR_OPERATION_ABORTED) { - faw2aac_finished = true; - } else if (err != ERROR_IO_PENDING) { - return 1; - } - } - if (sizeRead > 0) { - readFinished = false; - DWORD sizeWritten = 0; + } + //転送を実行 + while (!pe->aud_parallel.abort) { + if (WaitForSingleObject(aud_track->heOutputDataPushed, 50) == WAIT_OBJECT_0) { + if (aud_track->outBuffer.size() > 0) { + OVERLAPPED overlapped; memset(&overlapped, 0, sizeof(overlapped)); - overlapped.hEvent = aud_track->to_exe.he_ov_aud_namedpipe; + overlapped.hEvent = aud_track->he_ov_aud_namedpipe; + DWORD sizeWritten = 0; //非同期処理中は0を返すことがある - WriteFile(aud_track->to_exe.h_pipe, buffer, sizeRead, &sizeWritten, &overlapped); - while (WaitForSingleObject(aud_track->to_exe.he_ov_aud_namedpipe, 50) != WAIT_OBJECT_0) { + WriteFile(aud_track->h_aud_namedpipe, aud_track->outBuffer.data(), aud_track->outBuffer.size(), &sizeWritten, &overlapped); + while (WaitForSingleObject(overlapped.hEvent, 1000) != WAIT_OBJECT_0) { if (pe->aud_parallel.abort) { - return 1; + return 0; } } - } - //faw2aacが終了し、かつ転送するものがなくなったら終了 - if (faw2aac_finished && readFinished) { - break; + aud_track->outBuffer.clear(); } } - return pe->aud_parallel.abort ? 1 : 0; - }; - //スレッドを起動 - for (int i_aud = 0; !ret && i_aud < pe->aud_count; i_aud++) { - threadStarted[i_aud] = CreateEvent(NULL, FALSE, FALSE, NULL); - aud_dat[i_aud].th_faw2aac = std::async(run_faw2aac, i_aud); - aud_dat[i_aud].th_transfer = std::async(run_transfer_pipe, i_aud); - aud_dat[i_aud].th_transfer_started = true; + SetEvent(aud_track->heOutputDataWritten); + if (aud_track->thAbort && aud_track->outBuffer.size() == 0) { + break; + } + } + return pe->aud_parallel.abort ? 1 : 0; + }; + for (int i_aud = 0; !ret && i_aud < pe->aud_count; i_aud++) { + aud_dat[i_aud].thOut = std::async(run_transfer_pipe, i_aud); + } + } else { + for (int i_aud = 0; !ret && i_aud < pe->aud_count; i_aud++) { + const CONF_AUDIO_BASE *cnf_aud = (conf->aud.use_internal) ? &conf->aud.in : &conf->aud.ext; + const AUDIO_SETTINGS *aud_stg = (conf->aud.use_internal) ? &sys_dat->exstg->s_aud_int[cnf_aud->encoder] : &sys_dat->exstg->s_aud_ext[cnf_aud->encoder]; + strcpy_s(pe->append.aud[i_aud], _countof(pe->append.aud[i_aud]), aud_stg->aud_appendix); //pe一時パラメータにコピーしておく + if (i_aud) + insert_before_ext(pe->append.aud[i_aud], _countof(pe->append.aud[i_aud]), i_aud); + get_aud_filename(aud_dat[i_aud].audfile, _countof(aud_dat[i_aud].audfile), pe, i_aud); + if (fopen_s(&aud_dat[i_aud].fp_out, aud_dat[i_aud].audfile, "wbS")) { + ret |= AUO_RESULT_ABORT; + break; } + } + } + + if (!ret) { + const int elemsize = sizeof(short); + const int wav_sample_size = oip->audio_ch * elemsize; + + RGYWAVHeader wavheader = { 0 }; + wavheader.file_size = 0; + wavheader.subchunk_size = 16; + wavheader.audio_format = 1; + wavheader.number_of_channels = oip->audio_ch; + wavheader.sample_rate = oip->audio_rate; + wavheader.byte_rate = oip->audio_rate * oip->audio_ch * elemsize; + wavheader.block_align = wav_sample_size; + wavheader.bits_per_sample = elemsize * 8; + wavheader.data_size = oip->audio_n * wavheader.number_of_channels * elemsize; + + RGYFAWDecoder fawdec; + fawdec.init(&wavheader); + + RGYFAWDecoderOutput output; + int samples_read = 0; + int samples_get = bufsize; + + //wav出力ループ + while (oip->audio_n - samples_read > 0 && samples_get) { + //中断 + if ((pe->aud_parallel.he_aud_start) ? pe->aud_parallel.abort : oip->func_is_abort()) { + ret |= AUO_RESULT_ABORT; + break; + } + uint8_t *audio_dat = (uint8_t *)get_audio_data(oip, pe, samples_read, std::min(oip->audio_n - samples_read, bufsize), &samples_get); + samples_read += samples_get; + set_log_progress(samples_read / (double)oip->audio_n); + + fawdec.decode(output, audio_dat, samples_get * wav_sample_size); for (int i_aud = 0; i_aud < pe->aud_count; i_aud++) { - if (aud_dat[i_aud].th_transfer_started) { - if (aud_dat[i_aud].th_transfer.get() != AUO_RESULT_SUCCESS) { - ret = AUO_RESULT_ERROR; + if (output[i_aud].size() > 0) { + if (write_file(&aud_dat[i_aud], pe, output[i_aud].data(), output[i_aud].size()) == 0) { + ret |= AUO_RESULT_ABORT; + break; } } } - //あと片付け - for (int i_aud = 0; i_aud < pe->aud_count; i_aud++) { - if (aud_dat[i_aud].from_auo.h_pipe) { - DisconnectNamedPipe(aud_dat[i_aud].from_auo.h_pipe); - CloseHandle(aud_dat[i_aud].from_auo.h_pipe); - } - if (aud_dat[i_aud].to_exe.h_pipe) { - FlushFileBuffers(aud_dat[i_aud].to_exe.h_pipe); - //DisconnectNamedPipe(aud_dat[i_aud].to_exe.h_pipe); //これをするとなぜかInvalid argumentというメッセージが出てしまう - CloseHandle(aud_dat[i_aud].to_exe.h_pipe); + } + + fawdec.fin(output); + for (int i_aud = 0; i_aud < pe->aud_count; i_aud++) { + if (output[i_aud].size() > 0) { + if (write_file(&aud_dat[i_aud], pe, output[i_aud].data(), output[i_aud].size()) == 0) { + ret |= AUO_RESULT_ABORT; + break; } - if (aud_dat[i_aud].to_exe.he_ov_aud_namedpipe) { - CloseHandle(aud_dat[i_aud].to_exe.he_ov_aud_namedpipe); + } + aud_dat[i_aud].thAbort = true; + } + for (int i_aud = 0; i_aud < pe->aud_count; i_aud++) { + if (conf->aud.use_internal) { + if (aud_dat[i_aud].thOut.get() != AUO_RESULT_SUCCESS) { + ret = AUO_RESULT_ERROR; } } - } else { - for (int i_aud = 0; !ret && i_aud < pe->aud_count; i_aud++) { - ret = run_faw2aac(i_aud); - if (!ret) - ret |= audio_faw2aac_check(aud_dat[i_aud].audfile); + } + + //動画との音声との同時処理が終了 + release_audio_parallel_events(pe); + + //ファイルクローズ + for (int i_aud = 0; i_aud < pe->aud_count; i_aud++) { + if (aud_dat[i_aud].fp_out) { + fclose(aud_dat[i_aud].fp_out); + aud_dat[i_aud].fp_out = nullptr; } } + } else { + //これをやっておかないとプラグインがフリーズしてしまう + //動画との音声との同時処理が終了 release_audio_parallel_events(pe); - g_oip = NULL; - g_pe = NULL; } - if (hModule) - FreeLibrary(hModule); + for (int i_aud = 0; !ret && i_aud < pe->aud_count; i_aud++) { + if (aud_dat[i_aud].he_ov_aud_namedpipe) { + CloseHandle(aud_dat[i_aud].he_ov_aud_namedpipe); + } + if (aud_dat[i_aud].h_aud_namedpipe) { + FlushFileBuffers(aud_dat->h_aud_namedpipe); + //DisconnectNamedPipe(aud_dat->h_aud_namedpipe); //これをするとなぜかInvalid argumentというメッセージが出てしまう + CloseHandle(aud_dat[i_aud].h_aud_namedpipe); + } + if (aud_dat[i_aud].heOutputDataPushed) { + CloseHandle(aud_dat[i_aud].heOutputDataPushed); + } + if (aud_dat[i_aud].heOutputDataWritten) { + CloseHandle(aud_dat[i_aud].heOutputDataWritten); + } + } set_window_title(g_auo_mes.get(AUO_GUIEX_FULL_NAME), PROGRESSBAR_DISABLED); - return ret; } diff --git a/ffmpegOut/encode/auo_video.cpp b/ffmpegOut/encode/auo_video.cpp index e2b27b1..ef27970 100644 --- a/ffmpegOut/encode/auo_video.cpp +++ b/ffmpegOut/encode/auo_video.cpp @@ -64,6 +64,7 @@ #include "auo_video.h" #include "auo_audio_parallel.h" #include "cpu_info.h" +#include "rgy_thread_affinity.h" typedef struct video_output_thread_t { CONVERT_CF_DATA *pixel_data; @@ -566,7 +567,7 @@ static AUO_RESULT ffmpeg_out(CONF_GUIEX *conf, const OUTPUT_INFO *oip, PRM_ENC * write_args(enc_cmd); sprintf_s(enc_args, _countof(enc_args), "\"%s\" %s", enc_path, enc_cmd); - if ((jitter = (int *)calloc(oip->n + 1, sizeof(int))) == NULL) { + if (afs && (jitter = (int *)calloc(oip->n + 1, sizeof(int))) == NULL) { ret |= AUO_RESULT_ERROR; error_malloc_tc(); //Aviutl(afs)からのフレーム読み込み } else if (!setup_afsvideo(oip, sys_dat, conf, pe)) { @@ -590,6 +591,12 @@ static AUO_RESULT ffmpeg_out(CONF_GUIEX *conf, const OUTPUT_INFO *oip, PRM_ENC * GetProcessTime(pe->h_p_aviutl, &time_aviutl); pe->h_p_videnc = pi_enc.hProcess; + //Aviutlのpower throttlingを設定 + const auto thread_pthrottling_mode = (RGYThreadPowerThrottlingMode)sys_dat->exstg->s_local.thread_pthrottling_mode; + if (thread_pthrottling_mode != RGYThreadPowerThrottlingMode::Unset) { + SetThreadPowerThrottolingModeForModule(GetCurrentProcessId(), nullptr, thread_pthrottling_mode); + } + //x264が待機に入るまでこちらも待機 while (WaitForInputIdle(pi_enc.hProcess, LOG_UPDATE_INTERVAL) == WAIT_TIMEOUT) log_process_events(); @@ -619,6 +626,12 @@ static AUO_RESULT ffmpeg_out(CONF_GUIEX *conf, const OUTPUT_INFO *oip, PRM_ENC * //x264優先度 check_enc_priority(pe->h_p_aviutl, pi_enc.hProcess, set_priority); + if (!(i & 255)) { + if (thread_pthrottling_mode != RGYThreadPowerThrottlingMode::Unset) { + SetThreadPowerThrottolingModeForModule(pi_enc.dwProcessId, nullptr, thread_pthrottling_mode); + } + } + //音声同時処理 if (!conf->aud.use_internal) { //音声同時処理 @@ -682,7 +695,7 @@ static AUO_RESULT ffmpeg_out(CONF_GUIEX *conf, const OUTPUT_INFO *oip, PRM_ENC * //標準入力への書き込みを開始 SetEvent(thread_data.he_out_start); } else { - *(next_jitter - 1) = DROP_FRAME_FLAG; + if (jitter) *(next_jitter - 1) = DROP_FRAME_FLAG; pe->drop_count++; //次のフレームの変換を許可 SetEvent(thread_data.he_out_fin); @@ -722,6 +735,10 @@ static AUO_RESULT ffmpeg_out(CONF_GUIEX *conf, const OUTPUT_INFO *oip, PRM_ENC * DWORD tm_vid_enc_fin = timeGetTime(); + if (thread_pthrottling_mode != RGYThreadPowerThrottlingMode::Unset) { + SetThreadPowerThrottolingModeForModule(GetCurrentProcessId(), nullptr, RGYThreadPowerThrottlingMode::Unset); + } + //最後にメッセージを取得 while (ReadLogEnc(&pipes, pe->drop_count, i) > 0); @@ -760,7 +777,7 @@ static AUO_RESULT video_output_inside(CONF_GUIEX *conf, const OUTPUT_INFO *oip, for (; !ret && pe->current_x264_pass <= pe->total_x264_pass; pe->current_x264_pass++) { if (pe->current_x264_pass > 1) - open_log_window(oip->savefile, sys_dat, pe->current_x264_pass, pe->total_x264_pass); + open_log_window(oip, sys_dat, pe->current_x264_pass, pe->total_x264_pass); set_window_title_ffmpegout(pe); ret |= ffmpeg_out(conf, oip, pe, sys_dat); set_window_title(AUO_FULL_NAME_W, PROGRESSBAR_DISABLED); diff --git a/ffmpegOut/ffmpegOut.cpp b/ffmpegOut/ffmpegOut.cpp index fca91c0..63bddc9 100644 --- a/ffmpegOut/ffmpegOut.cpp +++ b/ffmpegOut/ffmpegOut.cpp @@ -194,7 +194,7 @@ BOOL func_output( OUTPUT_INFO *oip ) make_outfilename_and_set_to_oipsavefile(oip, outfilename, _countof(outfilename), &conf_out); //ログウィンドウを開く - open_log_window(oip->savefile, &g_sys_dat, 1, (conf_out.enc.use_auto_npass) ? conf_out.enc.auto_npass : 1); + open_log_window(oip, &g_sys_dat, 1, (conf_out.enc.use_auto_npass) ? conf_out.enc.auto_npass : 1); if (conf_not_initialized) { warning_conf_not_initialized(default_stg_file); } diff --git a/ffmpegOut/ffmpegOut.en.ini b/ffmpegOut/ffmpegOut.en.ini index 3974e7c..d08152c 100644 --- a/ffmpegOut/ffmpegOut.en.ini +++ b/ffmpegOut/ffmpegOut.en.ini @@ -80,12 +80,10 @@ mode_4_delay=2624 [SETTING_AAC_FAW] dispname="FakeAacWav(FAW)" -filename="fawcl.exe" +filename="" aud_appendix="_audio.aac" pipe_input=0 -base_cmd=" "%{wavpath}" "%{audpath}" " -help_cmd="-h" -ver_cmd="-h" +base_cmd="" [MODE_AAC_FAW] diff --git a/ffmpegOut/ffmpegOut.en.lng b/ffmpegOut/ffmpegOut.en.lng index 4448707..59f504e 100644 --- a/ffmpegOut/ffmpegOut.en.lng +++ b/ffmpegOut/ffmpegOut.en.lng @@ -87,6 +87,9 @@ AUO_ERR_INVALID_INI_FILE=Settings in plugin (auo) and ini file does not match. AUO_ERR_UNSUPPORTED_AUDIO_FORMAT_BY_MUXER1=Audio encoder %s is not supported in %s format. AUO_ERR_UNSUPPORTED_AUDIO_FORMAT_BY_MUXER2=Please select other audio encoder such as %s, and try again. AUO_ERR_UNSUPPORTED_AUDIO_FORMAT_BY_MUXER3=Please select other audio encoder and try again. +AUO_ERR_FAILED_TO_RUN_AUDIO_ENCODER1=Audio encoder %s cannot be run properly. +AUO_ERR_FAILED_TO_RUN_AUDIO_ENCODER2=Default audio encoder %s will be used. +AUO_ERR_FAILED_TO_RUN_AUDIO_ENCODER3=Audio encoder might not be installed properly. AUO_ERR_MP4_MUXER_ERROR=Using mp4box as muxer is not supported in ffmpeg 1.xx or later. Please update ffmpegOut.ini to the latest version. AUO_ERR_AFS_AUTO_DISABLE1=Video input initialization has failed and retried AUO_ERR_AFS_AUTO_DISABLE2=after changing auto field shift (afs) disabled. @@ -655,6 +658,7 @@ AuofosCBAutoAFSDisable=When failed initialization when Auto field shift (afs) on AuofosCBAutoDelChap=Delete chapter file after muxing AuofostabPageGeneral=Encode AuofosLBDefaultAudioEncoder=Default audio encoder +AuofosLBPowerThrottling=Power Throttling AuofosCBAutoRefLimitByLevel=Limit ref by H.264 level AuofosCBChapConvertToUTF8=Convert to UTF-8 when muxing chapter AuofosCBKeepQPFile=Save keyframe list when keyframe detection is enabled diff --git a/ffmpegOut/ffmpegOut.ini b/ffmpegOut/ffmpegOut.ini index 258cd56..049a354 100644 --- a/ffmpegOut/ffmpegOut.ini +++ b/ffmpegOut/ffmpegOut.ini @@ -107,12 +107,10 @@ mode_4_delay=2624 [SETTING_AAC_FAW] dispname="FakeAacWav(FAW)" -filename="fawcl.exe" +filename="" aud_appendix="_audio.aac" pipe_input=0 -base_cmd=" "%{wavpath}" "%{audpath}" " -help_cmd="-h" -ver_cmd="-h" +base_cmd="" [MODE_AAC_FAW] diff --git a/ffmpegOut/ffmpegOut.ja.lng b/ffmpegOut/ffmpegOut.ja.lng index 3545c2c..578c4b4 100644 --- a/ffmpegOut/ffmpegOut.ja.lng +++ b/ffmpegOut/ffmpegOut.ja.lng @@ -645,6 +645,7 @@ AuofosCBAutoAFSDisable=自動フィールドシフト(afs)オンで初期化に AuofosCBAutoDelChap=mux正常終了後、チャプターファイルを自動的に削除する AuofostabPageGeneral=エンコード AuofosLBDefaultAudioEncoder=デフォルトの音声エンコーダ +AuofosLBPowerThrottling=電力スロットリング AuofosCBAutoRefLimitByLevel=参照距離をH.264のレベルに応じて自動的に制限する AuofosCBChapConvertToUTF8=チャプターmux時、UTF-8に変換する AuofosCBKeepQPFile=キーフレーム検出時、キーフレームリストを保存する diff --git a/ffmpegOut/ffmpegOut.vcxproj b/ffmpegOut/ffmpegOut.vcxproj index ef36f1a..d8c43eb 100644 --- a/ffmpegOut/ffmpegOut.vcxproj +++ b/ffmpegOut/ffmpegOut.vcxproj @@ -57,7 +57,7 @@ Disabled - .\;.\encode;.\prm;.\frm;..\tinyxml2;%(AdditionalIncludeDirectories) + .\;.\encode;.\prm;.\frm;..\tinyxml2;..\auoCommon;%(AdditionalIncludeDirectories) WIN32;_DEBUG;%(PreprocessorDefinitions) MultiThreadedDebugDLL Level4 @@ -94,7 +94,7 @@ copy /y ffmpegOut.zh.ini "$(OutDir)" Speed false false - .\;.\encode;.\prm;.\frm;..\tinyxml2;%(AdditionalIncludeDirectories) + .\;.\encode;.\prm;.\frm;..\tinyxml2;..\auoCommon;%(AdditionalIncludeDirectories) WIN32;NDEBUG;%(PreprocessorDefinitions) true MultiThreadedDLL @@ -405,6 +405,9 @@ copy /y ffmpegOut.zh.ini "$(OutDir)" + + {c7c2269e-87f0-45d4-a5f3-2c6c158d65c5} + {a34ca86d-6c2b-482f-984e-2687459e65e9} diff --git a/ffmpegOut/ffmpegOut.zh.ini b/ffmpegOut/ffmpegOut.zh.ini index 90acbc8..fc79c9d 100644 --- a/ffmpegOut/ffmpegOut.zh.ini +++ b/ffmpegOut/ffmpegOut.zh.ini @@ -80,12 +80,10 @@ mode_4_delay=2624 [SETTING_AAC_FAW] dispname="FakeAacWav(FAW)" -filename="fawcl.exe" +filename="" aud_appendix="_audio.aac" pipe_input=0 -base_cmd=" "%{wavpath}" "%{audpath}" " -help_cmd="-h" -ver_cmd="-h" +base_cmd="" [MODE_AAC_FAW] diff --git a/ffmpegOut/ffmpegOut.zh.lng b/ffmpegOut/ffmpegOut.zh.lng index 2465846..280c388 100644 --- a/ffmpegOut/ffmpegOut.zh.lng +++ b/ffmpegOut/ffmpegOut.zh.lng @@ -86,6 +86,9 @@ AUO_ERR_INVALID_INI_FILE=插件(auo)与ini文件的音频编码器描述不匹 AUO_ERR_UNSUPPORTED_AUDIO_FORMAT_BY_MUXER1=音频编码器 %s 不支持以 %s 格式导出。 AUO_ERR_UNSUPPORTED_AUDIO_FORMAT_BY_MUXER2=请选择其他音频编码器如 %s 进行导出。 AUO_ERR_UNSUPPORTED_AUDIO_FORMAT_BY_MUXER3=请另选音频编码器导出。 +AUO_ERR_FAILED_TO_RUN_AUDIO_ENCODER1=无法执行音频编码器 %s。 +AUO_ERR_FAILED_TO_RUN_AUDIO_ENCODER2=使用默认音频编码器 %s。 +AUO_ERR_FAILED_TO_RUN_AUDIO_ENCODER3=音频编码器可能未正确安装。 AUO_ERR_MP4_MUXER_ERROR=ffmpegOut 1.xx版本后,不再支持mp4box。请务必将ffmpegOut.ini更新至最新版本。 AUO_ERR_AFS_AUTO_DISABLE1=从Aviutl中导入的视频初始化失败, AUO_ERR_AFS_AUTO_DISABLE2=请关闭自动场移位(afs)后,重新初始化。 @@ -653,6 +656,7 @@ AuofosCBAutoAFSDisable=若启用自动场移位(afs)后初始化失败,请关 AuofosCBAutoDelChap=mux正常完成后将自动删除章节文件 AuofostabPageGeneral=编码 AuofosLBDefaultAudioEncoder=默认音频编码器 +AuofosLBPowerThrottling=功率节流 AuofosCBAutoRefLimitByLevel=根据H.264级别自动限制参考距离 AuofosCBChapConvertToUTF8=章节mux时,将其转换为UTF-8 AuofosCBKeepQPFile=检测到关键帧时保存关键帧列表 diff --git a/ffmpegOut/frm/auo_mes.cpp b/ffmpegOut/frm/auo_mes.cpp index d50c77c..1fb0398 100644 --- a/ffmpegOut/frm/auo_mes.cpp +++ b/ffmpegOut/frm/auo_mes.cpp @@ -686,6 +686,7 @@ static const char * AUO_MES_ID_NAME_STR[] = { "AuofosCBAutoDelChap", "AuofostabPageGeneral", "AuofosLBDefaultAudioEncoder", +"AuofosLBPowerThrottling", "AuofosCBAutoRefLimitByLevel", "AuofosCBChapConvertToUTF8", "AuofosCBKeepQPFile", diff --git a/ffmpegOut/frm/auo_mes.h b/ffmpegOut/frm/auo_mes.h index e2c935c..d4b9630 100644 --- a/ffmpegOut/frm/auo_mes.h +++ b/ffmpegOut/frm/auo_mes.h @@ -792,6 +792,7 @@ enum AuoMes { AuofosCBAutoDelChap, AuofostabPageGeneral, AuofosLBDefaultAudioEncoder, + AuofosLBPowerThrottling, AuofosCBAutoRefLimitByLevel, AuofosCBChapConvertToUTF8, AuofosCBKeepQPFile, diff --git a/ffmpegOut/frm/frmConfig.cpp b/ffmpegOut/frm/frmConfig.cpp index 7d0ba56..6d20b30 100644 --- a/ffmpegOut/frm/frmConfig.cpp +++ b/ffmpegOut/frm/frmConfig.cpp @@ -249,8 +249,7 @@ System::Boolean frmConfig::CheckLocalStg() { String^ AudioEncoderPath = LocalStg.audEncPath[fcgCXAudioEncoder->SelectedIndex]; if (AudioEncoderPath->Length > 0 && !File::Exists(AudioEncoderPath) - && (fcgCXAudioEncoder->SelectedIndex != sys_dat->exstg->get_faw_index(fcgCBAudioUseInternal->Checked) - || !check_if_faw2aac_exists()) ) { + && (fcgCXAudioEncoder->SelectedIndex != sys_dat->exstg->get_faw_index(fcgCBAudioUseInternal->Checked)) ) { //音声実行ファイルがない かつ //選択された音声がfawでない または fawであってもfaw2aacがない if (error) err += L"\n\n"; @@ -266,13 +265,6 @@ System::Boolean frmConfig::CheckLocalStg() { err += LOAD_CLI_STRING(AUO_CONFIG_FAW_STG_NOT_FOUND_IN_INI1) + L"\n" + LOAD_CLI_STRING(AUO_CONFIG_FAW_STG_NOT_FOUND_IN_INI2) + L"\n" + LOAD_CLI_STRING(AUO_CONFIG_FAW_STG_NOT_FOUND_IN_INI3); - } else if (!File::Exists(LocalStg.audEncPath[sys_dat->exstg->get_faw_index(fcgCBAudioUseInternal->Checked)]) - && !check_if_faw2aac_exists()) { - //fawの実行ファイルが存在しない かつ faw2aacも存在しない - if (error) err += L"\n\n"; - error = true; - err += LOAD_CLI_STRING(AUO_CONFIG_FAW_PATH_UNSET1) + L"\n" - + LOAD_CLI_STRING(AUO_CONFIG_FAW_PATH_UNSET2); } } if (error) diff --git a/ffmpegOut/frm/frmOtherSettings.h b/ffmpegOut/frm/frmOtherSettings.h index 8de6f61..234a75a 100644 --- a/ffmpegOut/frm/frmOtherSettings.h +++ b/ffmpegOut/frm/frmOtherSettings.h @@ -30,6 +30,7 @@ #include "auo_version.h" #include "auo_settings.h" #include "auo_mes.h" +#include "rgy_thread_affinity.h" using namespace System; using namespace System::ComponentModel; @@ -120,6 +121,8 @@ namespace AUO_NAME_R { private: System::Windows::Forms::Label^ fosLBStgDir; private: System::Windows::Forms::Button^ fosBTStgDir; private: System::Windows::Forms::TextBox^ fosTXStgDir; + private: System::Windows::Forms::ComboBox^ fosCXPowerThrottling; + private: System::Windows::Forms::Label^ fosLBPowerThrottling; @@ -191,9 +194,14 @@ namespace AUO_NAME_R { this->fosfontDialog = (gcnew System::Windows::Forms::FontDialog()); this->fosTabControl = (gcnew System::Windows::Forms::TabControl()); this->fostabPageGeneral = (gcnew System::Windows::Forms::TabPage()); + this->fosCXPowerThrottling = (gcnew System::Windows::Forms::ComboBox()); + this->fosLBPowerThrottling = (gcnew System::Windows::Forms::Label()); this->fosCXDefaultAudioEncoder = (gcnew System::Windows::Forms::ComboBox()); this->fosLBDefaultAudioEncoder = (gcnew System::Windows::Forms::Label()); this->fostabPageGUI = (gcnew System::Windows::Forms::TabPage()); + this->fosLBStgDir = (gcnew System::Windows::Forms::Label()); + this->fosBTStgDir = (gcnew System::Windows::Forms::Button()); + this->fosTXStgDir = (gcnew System::Windows::Forms::TextBox()); this->fosCBOutputMoreLog = (gcnew System::Windows::Forms::CheckBox()); this->fosCBRunBatMinimized = (gcnew System::Windows::Forms::CheckBox()); this->fosCBGetRelativePath = (gcnew System::Windows::Forms::CheckBox()); @@ -205,9 +213,6 @@ namespace AUO_NAME_R { this->fosLBDisableVisualStyles = (gcnew System::Windows::Forms::Label()); this->fosCBLogStartMinimized = (gcnew System::Windows::Forms::CheckBox()); this->fosPNHideTabPage = (gcnew System::Windows::Forms::Panel()); - this->fosLBStgDir = (gcnew System::Windows::Forms::Label()); - this->fosBTStgDir = (gcnew System::Windows::Forms::Button()); - this->fosTXStgDir = (gcnew System::Windows::Forms::TextBox()); this->fosTabControl->SuspendLayout(); this->fostabPageGeneral->SuspendLayout(); this->fostabPageGUI->SuspendLayout(); @@ -259,6 +264,8 @@ namespace AUO_NAME_R { // // fostabPageGeneral // + this->fostabPageGeneral->Controls->Add(this->fosCXPowerThrottling); + this->fostabPageGeneral->Controls->Add(this->fosLBPowerThrottling); this->fostabPageGeneral->Controls->Add(this->fosCXDefaultAudioEncoder); this->fostabPageGeneral->Controls->Add(this->fosLBDefaultAudioEncoder); this->fostabPageGeneral->Location = System::Drawing::Point(4, 24); @@ -269,6 +276,24 @@ namespace AUO_NAME_R { this->fostabPageGeneral->Text = L"一般設定"; this->fostabPageGeneral->UseVisualStyleBackColor = true; // + // fosCXPowerThrottling + // + this->fosCXPowerThrottling->DropDownStyle = System::Windows::Forms::ComboBoxStyle::DropDownList; + this->fosCXPowerThrottling->FormattingEnabled = true; + this->fosCXPowerThrottling->Location = System::Drawing::Point(41, 100); + this->fosCXPowerThrottling->Name = L"fosCXPowerThrottling"; + this->fosCXPowerThrottling->Size = System::Drawing::Size(190, 23); + this->fosCXPowerThrottling->TabIndex = 27; + // + // fosLBPowerThrottling + // + this->fosLBPowerThrottling->AutoSize = true; + this->fosLBPowerThrottling->Location = System::Drawing::Point(14, 77); + this->fosLBPowerThrottling->Name = L"fosLBPowerThrottling"; + this->fosLBPowerThrottling->Size = System::Drawing::Size(89, 15); + this->fosLBPowerThrottling->TabIndex = 26; + this->fosLBPowerThrottling->Text = L"電力スロットリング"; + // // fosCXDefaultAudioEncoder // this->fosCXDefaultAudioEncoder->DropDownStyle = System::Windows::Forms::ComboBoxStyle::DropDownList; @@ -309,6 +334,32 @@ namespace AUO_NAME_R { this->fostabPageGUI->Text = L"ログ・設定画面"; this->fostabPageGUI->UseVisualStyleBackColor = true; // + // fosLBStgDir + // + this->fosLBStgDir->AutoSize = true; + this->fosLBStgDir->Location = System::Drawing::Point(7, 7); + this->fosLBStgDir->Name = L"fosLBStgDir"; + this->fosLBStgDir->Size = System::Drawing::Size(123, 15); + this->fosLBStgDir->TabIndex = 34; + this->fosLBStgDir->Text = L"設定ファイルの保存場所"; + // + // fosBTStgDir + // + this->fosBTStgDir->Location = System::Drawing::Point(334, 27); + this->fosBTStgDir->Name = L"fosBTStgDir"; + this->fosBTStgDir->Size = System::Drawing::Size(35, 23); + this->fosBTStgDir->TabIndex = 35; + this->fosBTStgDir->Text = L"..."; + this->fosBTStgDir->UseVisualStyleBackColor = true; + this->fosBTStgDir->Click += gcnew System::EventHandler(this, &frmOtherSettings::fosBTStgDir_Click); + // + // fosTXStgDir + // + this->fosTXStgDir->Location = System::Drawing::Point(34, 27); + this->fosTXStgDir->Name = L"fosTXStgDir"; + this->fosTXStgDir->Size = System::Drawing::Size(294, 23); + this->fosTXStgDir->TabIndex = 33; + // // fosCBOutputMoreLog // this->fosCBOutputMoreLog->AutoSize = true; @@ -419,32 +470,6 @@ namespace AUO_NAME_R { this->fosPNHideTabPage->Size = System::Drawing::Size(392, 415); this->fosPNHideTabPage->TabIndex = 18; // - // fosLBStgDir - // - this->fosLBStgDir->AutoSize = true; - this->fosLBStgDir->Location = System::Drawing::Point(7, 7); - this->fosLBStgDir->Name = L"fosLBStgDir"; - this->fosLBStgDir->Size = System::Drawing::Size(123, 15); - this->fosLBStgDir->TabIndex = 34; - this->fosLBStgDir->Text = L"設定ファイルの保存場所"; - // - // fosBTStgDir - // - this->fosBTStgDir->Location = System::Drawing::Point(334, 27); - this->fosBTStgDir->Name = L"fosBTStgDir"; - this->fosBTStgDir->Size = System::Drawing::Size(35, 23); - this->fosBTStgDir->TabIndex = 35; - this->fosBTStgDir->Text = L"..."; - this->fosBTStgDir->UseVisualStyleBackColor = true; - this->fosBTStgDir->Click += gcnew System::EventHandler(this, &frmOtherSettings::fosBTStgDir_Click); - // - // fosTXStgDir - // - this->fosTXStgDir->Location = System::Drawing::Point(34, 27); - this->fosTXStgDir->Name = L"fosTXStgDir"; - this->fosTXStgDir->Size = System::Drawing::Size(294, 23); - this->fosTXStgDir->TabIndex = 33; - // // frmOtherSettings // this->AcceptButton = this->fosCBOK; @@ -486,6 +511,7 @@ namespace AUO_NAME_R { //LOAD_CLI_TEXT(fosCBAutoDelChap); LOAD_CLI_TEXT(fostabPageGeneral); LOAD_CLI_TEXT(fosLBDefaultAudioEncoder); + LOAD_CLI_TEXT(fosLBPowerThrottling); //LOAD_CLI_TEXT(fosCBAutoRefLimitByLevel); //LOAD_CLI_TEXT(fosCBChapConvertToUTF8); //LOAD_CLI_TEXT(fosCBKeepQPFile); @@ -544,6 +570,7 @@ namespace AUO_NAME_R { fos_ex_stg->s_local.get_relative_path = fosCBGetRelativePath->Checked; fos_ex_stg->s_local.run_bat_minimized = fosCBRunBatMinimized->Checked; fos_ex_stg->s_local.default_audio_encoder_ext= fosCXDefaultAudioEncoder->SelectedIndex; + fos_ex_stg->s_local.thread_pthrottling_mode = (int)RGY_THREAD_POWER_THROTTOLING_MODE_STR[fosCXPowerThrottling->SelectedIndex].first; fos_ex_stg->save_local(); fos_ex_stg->save_log_win(); this->Close(); @@ -555,6 +582,12 @@ namespace AUO_NAME_R { for (int i = 0; i < fos_ex_stg->s_aud_ext_count; i++) fosCXDefaultAudioEncoder->Items->Add(LOAD_CLI_STRING(AUO_OTHER_SETTINGS_AUDIO_ENCODER_EXTERNAL) + L": " + String(fos_ex_stg->s_aud_ext[i].dispname).ToString()); fosCXDefaultAudioEncoder->ResumeLayout(); + + fosCXPowerThrottling->SuspendLayout(); + fosCXPowerThrottling->Items->Clear(); + for (int i = 0; i < RGY_THREAD_POWER_THROTTOLING_MODE_STR.size(); i++) + fosCXPowerThrottling->Items->Add(String(RGY_THREAD_POWER_THROTTOLING_MODE_STR[i].second).ToString()); + fosCXPowerThrottling->ResumeLayout(); } private: System::Void frmOtherSettings_Load(System::Object^ sender, System::EventArgs^ e) { @@ -574,6 +607,12 @@ namespace AUO_NAME_R { fosCBGetRelativePath->Checked = fos_ex_stg->s_local.get_relative_path != 0; fosCBRunBatMinimized->Checked = fos_ex_stg->s_local.run_bat_minimized != 0; fosCXDefaultAudioEncoder->SelectedIndex = clamp(fos_ex_stg->s_local.default_audio_encoder_ext, 0, fosCXDefaultAudioEncoder->Items->Count); + for (int i = 0; i < RGY_THREAD_POWER_THROTTOLING_MODE_STR.size(); i++) { + if ((int)RGY_THREAD_POWER_THROTTOLING_MODE_STR[i].first == fos_ex_stg->s_local.thread_pthrottling_mode) { + fosCXPowerThrottling->SelectedIndex = i; + break; + } + } if (str_has_char(fos_ex_stg->s_local.conf_font.name)) SetFontFamilyToForm(this, gcnew FontFamily(String(fos_ex_stg->s_local.conf_font.name).ToString()), this->Font->FontFamily); } diff --git a/ffmpegOut/prm/auo_settings.cpp b/ffmpegOut/prm/auo_settings.cpp index 33d509e..e3642e2 100644 --- a/ffmpegOut/prm/auo_settings.cpp +++ b/ffmpegOut/prm/auo_settings.cpp @@ -359,7 +359,13 @@ void guiEx_settings::set_last_out_stg(const char *stg) { } BOOL guiEx_settings::is_faw(const AUDIO_SETTINGS *aud_stg) const { - return stristr((aud_stg->is_internal) ? aud_stg->codec : aud_stg->filename, "faw") ? TRUE : FALSE; + if (stristr(aud_stg->codec, "faw")) { + return TRUE; + } + if (!aud_stg->is_internal) { + return wcsstr(aud_stg->dispname, L"FAW") ? TRUE : FALSE; + } + return FALSE; } int guiEx_settings::get_faw_index(BOOL internal) const { @@ -624,6 +630,7 @@ void guiEx_settings::load_local() { s_local.default_audio_encoder_ext = GetPrivateProfileInt( ini_section_main, "default_audio_encoder", DEFAULT_AUDIO_ENCODER_EXT, conf_fileName); s_local.default_audio_encoder_in = GetPrivateProfileInt( ini_section_main, "default_audio_encoder_in", DEFAULT_AUDIO_ENCODER_IN, conf_fileName); s_local.default_audenc_use_in = GetPrivateProfileInt( ini_section_main, "default_audenc_use_in", DEFAULT_AUDIO_ENCODER_USE_IN, conf_fileName); + s_local.thread_pthrottling_mode = GetPrivateProfileInt( ini_section_main, "thread_pthrottling_mode", DEFAULT_THREAD_PTHROTTLING, conf_fileName); //s_local.amp_retry_limit = GetPrivateProfileInt( INI_SECTION_AMP, "amp_retry_limit", DEFAULT_AMP_RETRY_LIMIT, conf_fileName); //s_local.amp_bitrate_margin_multi = GetPrivateProfileDouble(INI_SECTION_AMP, "amp_bitrate_margin_multi", DEFAULT_AMP_MARGIN, conf_fileName); @@ -715,6 +722,7 @@ void guiEx_settings::save_local() { WritePrivateProfileIntWithDefault( ini_section_main, "default_audio_encoder", s_local.default_audio_encoder_ext, DEFAULT_AUDIO_ENCODER_EXT, conf_fileName); WritePrivateProfileIntWithDefault( ini_section_main, "default_audio_encoder_in", s_local.default_audio_encoder_in, DEFAULT_AUDIO_ENCODER_IN, conf_fileName); WritePrivateProfileIntWithDefault( ini_section_main, "default_audenc_use_in", s_local.default_audenc_use_in, DEFAULT_AUDIO_ENCODER_USE_IN, conf_fileName); + WritePrivateProfileIntWithDefault( ini_section_main, "thread_pthrottling_mode", s_local.thread_pthrottling_mode, DEFAULT_THREAD_PTHROTTLING, conf_fileName); //WritePrivateProfileIntWithDefault( INI_SECTION_AMP, "amp_retry_limit", s_local.amp_retry_limit, DEFAULT_AMP_RETRY_LIMIT, conf_fileName); //WritePrivateProfileDoubleWithDefault(INI_SECTION_AMP, "amp_bitrate_margin_multi", s_local.amp_bitrate_margin_multi, DEFAULT_AMP_MARGIN, conf_fileName); diff --git a/ffmpegOut/prm/auo_settings.h b/ffmpegOut/prm/auo_settings.h index c104b72..83dcb59 100644 --- a/ffmpegOut/prm/auo_settings.h +++ b/ffmpegOut/prm/auo_settings.h @@ -53,6 +53,7 @@ static const BOOL DEFAULT_CHAP_NERO_TO_UTF8 = 0; static const BOOL DEFAULT_AUDIO_ENCODER_EXT = 0; static const BOOL DEFAULT_AUDIO_ENCODER_IN = 1; static const BOOL DEFAULT_AUDIO_ENCODER_USE_IN = 1; +static const int DEFAULT_THREAD_PTHROTTLING = 0; static const int DEFAULT_AMP_RETRY_LIMIT = 2; static const double DEFAULT_AMP_MARGIN = 0.05; static const double DEFAULT_AMP_REENC_AUDIO_MULTI = 0.15; @@ -346,6 +347,7 @@ typedef struct LOCAL_SETTINGS { BOOL default_audenc_use_in; //デフォルトの音声エンコーダとして、内蔵エンコーダを選択する int default_audio_encoder_ext; //デフォルトの外部音声エンコーダ int default_audio_encoder_in; //デフォルトの内蔵音声エンコーダ + int thread_pthrottling_mode; //スレッドの電力スロットリングモード //int amp_retry_limit; //自動マルチパス試行回数制限 //double amp_bitrate_margin_multi; //自動マルチパスで、上限ファイルサイズからビットレートを再計算するときの倍率 //double amp_reenc_audio_multi; //自動マルチパスで、音声側を再エンコしてビットレート調整をする上限倍率 diff --git a/ffmpegOut_readme.txt b/ffmpegOut_readme.txt index 1c6c378..8934adf 100644 --- a/ffmpegOut_readme.txt +++ b/ffmpegOut_readme.txt @@ -216,6 +216,13 @@ ini VC++ 2022 Community yǂłz +2023.10.25 (1.09) +- fawB +- d̓XbgO𖳌AAviutl̗DxႭĂERÂ݂Ŏsɂ悤ύXB +- muxt@CȂAt@CTCY铙̃G[bZ[WŁAt@C\悤ɁB +- avx512yuy2/yc48yv12ϊŁA[32x2pixelقǗΐF̐oꍇCB +- mۂplaneƂɍs悤ɂāAsG[oÂ炢悤ɁB + 2023.05.09 (1.08) - |XVB