diff --git a/mypyc/build.py b/mypyc/build.py index 8505a2d95701..02f427c83426 100644 --- a/mypyc/build.py +++ b/mypyc/build.py @@ -36,7 +36,7 @@ from mypy.util import write_junit_xml from mypyc.annotate import generate_annotated_html from mypyc.codegen import emitmodule -from mypyc.common import IS_FREE_THREADED, RUNTIME_C_FILES, shared_lib_name +from mypyc.common import IS_FREE_THREADED, RUNTIME_C_FILES, X86_64, shared_lib_name from mypyc.errors import Errors from mypyc.ir.pprint import format_modules from mypyc.namegen import exported_name @@ -77,6 +77,12 @@ class ModDesc(NamedTuple): "base64/arch/generic/enc_tail.c", "base64/arch/generic/dec_head.c", "base64/arch/generic/dec_tail.c", + "base64/arch/ssse3/dec_reshuffle.c", + "base64/arch/ssse3/dec_loop.c", + "base64/arch/ssse3/enc_loop_asm.c", + "base64/arch/ssse3/enc_translate.c", + "base64/arch/ssse3/enc_reshuffle.c", + "base64/arch/ssse3/enc_loop.c", "base64/arch/neon64/dec_loop.c", "base64/arch/neon64/enc_loop_asm.c", "base64/codecs.h", @@ -655,6 +661,9 @@ def mypycify( # See https://github.com/mypyc/mypyc/issues/956 "-Wno-cpp", ] + if X86_64: + # Enable SIMD extensions. All CPUs released since ~2010 support SSE4.2. + cflags.append("-msse4.2") if log_trace: cflags.append("-DMYPYC_LOG_TRACE") if experimental_features: @@ -683,6 +692,10 @@ def mypycify( # that we actually get the compilation speed and memory # use wins that multi-file mode is intended for. cflags += ["/GL-", "/wd9025"] # warning about overriding /GL + if X86_64: + # Enable SIMD extensions. All CPUs released since ~2010 support SSE4.2. + # Also Windows 11 requires SSE4.2 since 24H2. + cflags.append("/arch:SSE4.2") if log_trace: cflags.append("/DMYPYC_LOG_TRACE") if experimental_features: diff --git a/mypyc/common.py b/mypyc/common.py index 2de63c09bb2c..98f8a89f6fcb 100644 --- a/mypyc/common.py +++ b/mypyc/common.py @@ -1,5 +1,6 @@ from __future__ import annotations +import platform import sys import sysconfig from typing import Any, Final @@ -44,6 +45,8 @@ IS_32_BIT_PLATFORM: Final = int(SIZEOF_SIZE_T) == 4 +X86_64: Final = platform.machine() in ("x86_64", "AMD64", "amd64") + PLATFORM_SIZE = 4 if IS_32_BIT_PLATFORM else 8 # Maximum value for a short tagged integer. diff --git a/mypyc/lib-rt/base64/config.h b/mypyc/lib-rt/base64/config.h index fd516c4be2d6..b5e47fb04e75 100644 --- a/mypyc/lib-rt/base64/config.h +++ b/mypyc/lib-rt/base64/config.h @@ -7,7 +7,12 @@ #define BASE64_WITH_SSE41 0 #define HAVE_SSE41 BASE64_WITH_SSE41 +#if defined(__x86_64__) || defined(_M_X64) +#define BASE64_WITH_SSE42 1 +#else #define BASE64_WITH_SSE42 0 +#endif + #define HAVE_SSE42 BASE64_WITH_SSE42 #define BASE64_WITH_AVX 0 diff --git a/mypyc/lib-rt/setup.py b/mypyc/lib-rt/setup.py index acd61458e516..6a56c65306ae 100644 --- a/mypyc/lib-rt/setup.py +++ b/mypyc/lib-rt/setup.py @@ -6,6 +6,7 @@ from __future__ import annotations import os +import platform import subprocess import sys from distutils import ccompiler, sysconfig @@ -24,6 +25,8 @@ "pythonsupport.c", ] +X86_64 = platform.machine() in ("x86_64", "AMD64", "amd64") + class BuildExtGtest(build_ext): def get_library_names(self) -> list[str]: @@ -79,8 +82,12 @@ def run(self) -> None: cflags: list[str] = [] if compiler.compiler_type == "unix": cflags += ["-O3"] + if X86_64: + cflags.append("-msse4.2") # Enable SIMD (see also mypyc/build.py) elif compiler.compiler_type == "msvc": cflags += ["/O2"] + if X86_64: + cflags.append("/arch:SSE4.2") # Enable SIMD (see also mypyc/build.py) setup( ext_modules=[