From 076ba4d6f9f57e56815859e9f4624c07dbf3d05f Mon Sep 17 00:00:00 2001 From: Leo White Date: Thu, 19 May 2022 20:59:30 +0100 Subject: [PATCH] Squashed 'ocaml/' changes from da6ff04d2..fe8a98b0c fe8a98b0c flambda-backend: Save Mach as Cfg after Selection (#624) 2b205d886 flambda-backend: Clean up algorithms (#611) 524f0b435 flambda-backend: Initial refactoring of To_cmm (#619) 0bf75de86 flambda-backend: Refactor and correct the "is pure" and "can raise" (port upstream PR#10354 and PR#10387) (#555) d234bfdbe flambda-backend: Cpp mangling is now a configuration option (#614) 20fc614bf flambda-backend: Check that stack frames are not too large (#10085) (#561) 5fc2e9503 flambda-backend: Allow CSE of immutable loads across stores (port upstream PR#9562) (#562) 2a650deec flambda-backend: Backport commit fc9534746bf5d08a4c109f22e344cf49d5d46d54 from trunk (#584) 31651b87e flambda-backend: Improved ARM64 code generation (port upstream PR#9937) (#556) f0b6d68e8 flambda-backend: Simplify processing and remove dead code (error paths) in asmlink (port upstream PR#9943) (#557) 90c674687 flambda-backend: Improve code-generation for inlined comparisons (port upstream PR#10228) (#563) git-subtree-dir: ocaml git-subtree-split: fe8a98b0cd38bf1872b8faf3b93542caebabad18 --- .depend | 11 ++- Makefile.config.in | 1 + asmcomp/CSEgen.ml | 37 +++---- asmcomp/CSEgen.mli | 2 +- asmcomp/amd64/CSE.ml | 2 +- asmcomp/amd64/arch.ml | 13 +++ asmcomp/amd64/emit.mlp | 2 +- asmcomp/amd64/proc.ml | 13 --- asmcomp/arm/arch.ml | 12 +++ asmcomp/arm/emit.mlp | 6 +- asmcomp/arm/proc.ml | 15 +-- asmcomp/arm/scheduling.ml | 2 +- asmcomp/arm64/arch.ml | 93 +++++++++++++++++ asmcomp/arm64/emit.mlp | 99 +++++++++---------- asmcomp/arm64/proc.ml | 16 +-- asmcomp/arm64/selection.ml | 49 ++------- asmcomp/asmgen.ml | 15 ++- asmcomp/asmgen.mli | 1 + asmcomp/asmlink.ml | 84 +++++++--------- asmcomp/cmmgen.ml | 15 +-- asmcomp/deadcode.ml | 2 +- asmcomp/emitaux.ml | 21 +++- asmcomp/emitaux.mli | 7 +- asmcomp/i386/CSE.ml | 2 +- asmcomp/i386/arch.ml | 12 +++ asmcomp/i386/emit.mlp | 2 +- asmcomp/i386/proc.ml | 12 --- asmcomp/i386/selection.ml | 2 +- asmcomp/liveness.ml | 23 ++--- asmcomp/mach.ml | 13 ++- asmcomp/mach.mli | 9 +- asmcomp/power/arch.ml | 8 ++ asmcomp/power/emit.mlp | 4 +- asmcomp/power/proc.ml | 11 --- asmcomp/power/scheduling.ml | 4 +- asmcomp/printmach.ml | 5 +- asmcomp/proc.mli | 3 - asmcomp/riscv/arch.ml | 8 ++ asmcomp/riscv/emit.mlp | 4 +- asmcomp/riscv/proc.ml | 10 -- asmcomp/s390x/arch.ml | 8 ++ asmcomp/s390x/emit.mlp | 2 +- asmcomp/s390x/proc.ml | 10 -- asmcomp/s390x/scheduling.ml | 2 +- asmcomp/schedgen.ml | 4 +- asmcomp/selectgen.ml | 4 +- asmcomp/spill.ml | 12 +-- configure | 20 ++++ configure.ac | 12 +++ driver/maindriver.ml | 3 +- driver/optmaindriver.ml | 2 +- runtime/caml/signals.h | 2 +- runtime/signals_byt.c | 2 +- runtime/signals_nat.c | 25 ++--- testsuite/tests/asmcomp/try_checkbound.ml | 12 +++ .../save_ir_after_typing.compilers.reference | 2 +- utils/Makefile | 1 + utils/clflags.ml | 11 ++- utils/clflags.mli | 2 +- utils/config.mli | 3 + utils/config.mlp | 2 + utils/strongly_connected_components.ml | 55 ++++++++++- utils/strongly_connected_components.mli | 48 ++++++++- utils/target_system.ml | 12 +++ utils/target_system.mli | 4 + 65 files changed, 580 insertions(+), 330 deletions(-) create mode 100644 testsuite/tests/asmcomp/try_checkbound.ml diff --git a/.depend b/.depend index 085a0f0cd5..92e3a68775 100644 --- a/.depend +++ b/.depend @@ -2093,15 +2093,18 @@ asmcomp/CSEgen.cmo : \ asmcomp/proc.cmi \ asmcomp/mach.cmi \ asmcomp/cmm.cmi \ + parsing/asttypes.cmi \ asmcomp/CSEgen.cmi asmcomp/CSEgen.cmx : \ asmcomp/reg.cmx \ asmcomp/proc.cmx \ asmcomp/mach.cmx \ asmcomp/cmm.cmx \ + parsing/asttypes.cmi \ asmcomp/CSEgen.cmi asmcomp/CSEgen.cmi : \ - asmcomp/mach.cmi + asmcomp/mach.cmi \ + parsing/asttypes.cmi asmcomp/afl_instrument.cmo : \ lambda/lambda.cmi \ asmcomp/cmm.cmi \ @@ -2146,6 +2149,7 @@ asmcomp/asmgen.cmo : \ asmcomp/linscan.cmi \ asmcomp/linearize.cmi \ file_formats/linear_format.cmi \ + asmcomp/linear.cmi \ lambda/lambda.cmi \ asmcomp/interval.cmi \ asmcomp/interf.cmi \ @@ -2188,6 +2192,7 @@ asmcomp/asmgen.cmx : \ asmcomp/linscan.cmx \ asmcomp/linearize.cmx \ file_formats/linear_format.cmx \ + asmcomp/linear.cmx \ lambda/lambda.cmx \ asmcomp/interval.cmx \ asmcomp/interf.cmx \ @@ -2211,6 +2216,7 @@ asmcomp/asmgen.cmx : \ asmcomp/asmgen.cmi asmcomp/asmgen.cmi : \ lambda/lambda.cmi \ + asmcomp/emitaux.cmi \ asmcomp/cmm.cmi \ middle_end/clambda.cmi \ middle_end/backend_intf.cmi @@ -2719,6 +2725,7 @@ asmcomp/mach.cmo : \ lambda/debuginfo.cmi \ asmcomp/cmm.cmi \ middle_end/backend_var.cmi \ + parsing/asttypes.cmi \ asmcomp/arch.cmo \ asmcomp/mach.cmi asmcomp/mach.cmx : \ @@ -2729,6 +2736,7 @@ asmcomp/mach.cmx : \ lambda/debuginfo.cmx \ asmcomp/cmm.cmx \ middle_end/backend_var.cmx \ + parsing/asttypes.cmi \ asmcomp/arch.cmx \ asmcomp/mach.cmi asmcomp/mach.cmi : \ @@ -2738,6 +2746,7 @@ asmcomp/mach.cmi : \ lambda/debuginfo.cmi \ asmcomp/cmm.cmi \ middle_end/backend_var.cmi \ + parsing/asttypes.cmi \ asmcomp/arch.cmo asmcomp/printcmm.cmo : \ utils/targetint.cmi \ diff --git a/Makefile.config.in b/Makefile.config.in index 542cbcefcb..3972a6f4be 100644 --- a/Makefile.config.in +++ b/Makefile.config.in @@ -225,6 +225,7 @@ WITH_OCAMLDOC=@ocamldoc@ WITH_OCAMLTEST=@ocamltest@ ASM_CFI_SUPPORTED=@asm_cfi_supported@ WITH_FRAME_POINTERS=@frame_pointers@ +WITH_CPP_MANGLING=@cpp_mangling@ WITH_PROFINFO=@profinfo@ PROFINFO_WIDTH=@profinfo_width@ WITH_FPIC=@fpic@ diff --git a/asmcomp/CSEgen.ml b/asmcomp/CSEgen.ml index f788bf9a58..dd5efde13b 100644 --- a/asmcomp/CSEgen.ml +++ b/asmcomp/CSEgen.ml @@ -25,7 +25,7 @@ type valnum = int type op_class = | Op_pure (* pure arithmetic, produce one or several result *) | Op_checkbound (* checkbound-style: no result, can raise an exn *) - | Op_load (* memory load *) + | Op_load of Asttypes.mutable_flag (* memory load *) | Op_store of bool (* memory store, false = init, true = assign *) | Op_other (* anything else that does not allocate nor store in memory *) @@ -40,29 +40,30 @@ module Equations = struct Map.Make(struct type t = rhs let compare = Stdlib.compare end) type 'a t = - { load_equations : 'a Rhs_map.t; + { mutable_load_equations : 'a Rhs_map.t; other_equations : 'a Rhs_map.t } let empty = - { load_equations = Rhs_map.empty; + { mutable_load_equations = Rhs_map.empty; other_equations = Rhs_map.empty } let add op_class op v m = match op_class with - | Op_load -> - { m with load_equations = Rhs_map.add op v m.load_equations } + | Op_load Mutable -> + { m with mutable_load_equations = + Rhs_map.add op v m.mutable_load_equations } | _ -> { m with other_equations = Rhs_map.add op v m.other_equations } let find op_class op m = match op_class with - | Op_load -> - Rhs_map.find op m.load_equations + | Op_load Mutable -> + Rhs_map.find op m.mutable_load_equations | _ -> Rhs_map.find op m.other_equations - let remove_loads m = - { load_equations = Rhs_map.empty; + let remove_mutable_loads m = + { mutable_load_equations = Rhs_map.empty; other_equations = m.other_equations } end @@ -190,8 +191,8 @@ let set_unknown_regs n rs = (* Keep only the equations satisfying the given predicate. *) -let remove_load_numbering n = - { n with num_eqs = Equations.remove_loads n.num_eqs } +let remove_mutable_load_numbering n = + { n with num_eqs = Equations.remove_mutable_loads n.num_eqs } (* Forget everything we know about registers of type [Addr]. *) @@ -225,7 +226,7 @@ method class_of_operation op = | Icall_ind | Icall_imm _ | Itailcall_ind | Itailcall_imm _ | Iextcall _ | Iprobe _ | Iopaque -> assert false (* treated specially *) | Istackoffset _ -> Op_other - | Iload(_,_) -> Op_load + | Iload(_,_,mut) -> Op_load mut | Istore(_,_,asg) -> Op_store asg | Ialloc _ -> assert false (* treated specially *) | Iintop(Icheckbound) -> Op_checkbound @@ -246,11 +247,11 @@ method is_cheap_operation op = | Iconst_int _ -> true | _ -> false -(* Forget all equations involving memory loads. Performed after a - non-initializing store *) +(* Forget all equations involving mutable memory loads. + Performed after a non-initializing store *) method private kill_loads n = - remove_load_numbering n + remove_mutable_load_numbering n (* Perform CSE on the given instruction [i] and its successors. [n] is the value numbering current at the beginning of [i]. *) @@ -292,13 +293,13 @@ method private cse n i = Moreover, allocation can trigger the asynchronous execution of arbitrary Caml code (finalizer, signal handler, context switch), which can contain non-initializing stores. - Hence, all equations over loads must be removed. *) + Hence, all equations over mutable loads must be removed. *) let n1 = kill_addr_regs (self#kill_loads n) in let n2 = set_unknown_regs n1 i.res in {i with next = self#cse n2 i.next} | Iop op -> begin match self#class_of_operation op with - | (Op_pure | Op_checkbound | Op_load) as op_class -> + | (Op_pure | Op_checkbound | Op_load _) as op_class -> let (n1, varg) = valnum_regs n i.arg in let n2 = set_unknown_regs n1 (Proc.destroyed_at_oper i.desc) in begin match find_equation op_class n1 (op, varg) with @@ -336,7 +337,7 @@ method private cse n i = {i with next = self#cse n2 i.next} | Op_store true -> (* A non-initializing store can invalidate - anything we know about prior loads. *) + anything we know about prior mutable loads. *) let n1 = set_unknown_regs n (Proc.destroyed_at_oper i.desc) in let n2 = set_unknown_regs n1 i.res in let n3 = self#kill_loads n2 in diff --git a/asmcomp/CSEgen.mli b/asmcomp/CSEgen.mli index c80e7b4c4a..26e93e913c 100644 --- a/asmcomp/CSEgen.mli +++ b/asmcomp/CSEgen.mli @@ -19,7 +19,7 @@ type op_class = | Op_pure (* pure, produce one result *) | Op_checkbound (* checkbound-style: no result, can raise an exn *) - | Op_load (* memory load *) + | Op_load of Asttypes.mutable_flag (* memory load *) | Op_store of bool (* memory store, false = init, true = assign *) | Op_other (* anything else that does not allocate nor store in memory *) diff --git a/asmcomp/amd64/CSE.ml b/asmcomp/amd64/CSE.ml index 60503d69ce..b473e7b212 100644 --- a/asmcomp/amd64/CSE.ml +++ b/asmcomp/amd64/CSE.ml @@ -30,7 +30,7 @@ method! class_of_operation op = | Ilea _ | Isextend32 | Izextend32 -> Op_pure | Istore_int(_, _, is_asg) -> Op_store is_asg | Ioffset_loc(_, _) -> Op_store true - | Ifloatarithmem _ | Ifloatsqrtf _ -> Op_load + | Ifloatarithmem _ | Ifloatsqrtf _ -> Op_load Mutable | Ibswap _ | Isqrtf -> super#class_of_operation op end | _ -> super#class_of_operation op diff --git a/asmcomp/amd64/arch.ml b/asmcomp/amd64/arch.ml index 581db3dbbc..0d0c540643 100644 --- a/asmcomp/amd64/arch.ml +++ b/asmcomp/amd64/arch.ml @@ -134,7 +134,20 @@ let print_specific_operation printreg op ppf arg = | Izextend32 -> fprintf ppf "zextend32 %a" printreg arg.(0) +(* Are we using the Windows 64-bit ABI? *) + let win64 = match Config.system with | "win64" | "mingw64" | "cygwin" -> true | _ -> false + +(* Specific operations that are pure *) + +let operation_is_pure = function + | Ilea _ | Ibswap _ | Isqrtf | Isextend32 | Izextend32 -> true + | Ifloatarithmem _ | Ifloatsqrtf _ -> true + | _ -> false + +(* Specific operations that can raise *) + +let operation_can_raise _ = false diff --git a/asmcomp/amd64/emit.mlp b/asmcomp/amd64/emit.mlp index 2094774ce3..e55a739901 100644 --- a/asmcomp/amd64/emit.mlp +++ b/asmcomp/amd64/emit.mlp @@ -717,7 +717,7 @@ let emit_instr fallthrough i = end | Lop(Istackoffset n) -> emit_stack_offset n - | Lop(Iload(chunk, addr)) -> + | Lop(Iload(chunk, addr, _mut)) -> let dest = res i 0 in begin match chunk with | Word_int | Word_val -> diff --git a/asmcomp/amd64/proc.ml b/asmcomp/amd64/proc.ml index 53fd080fd5..9441995180 100644 --- a/asmcomp/amd64/proc.ml +++ b/asmcomp/amd64/proc.ml @@ -360,19 +360,6 @@ let max_register_pressure = function if fp then [| 12; 15 |] else [| 13; 15 |] | _ -> if fp then [| 12; 16 |] else [| 13; 16 |] -(* Pure operations (without any side effect besides updating their result - registers). *) - -let op_is_pure = function - | Icall_ind | Icall_imm _ | Itailcall_ind | Itailcall_imm _ - | Iextcall _ | Istackoffset _ | Istore _ | Ialloc _ - | Iintop(Icheckbound) | Iintop_imm(Icheckbound, _) | Iopaque -> false - | Ispecific(Ilea _|Isextend32|Izextend32) -> true - | Ispecific _ -> false - | Iprobe _ | Iprobe_is_enabled _-> false - | Ibeginregion | Iendregion -> false - | _ -> true - (* Layout of the stack frame *) let frame_required fd = diff --git a/asmcomp/arm/arch.ml b/asmcomp/arm/arch.ml index 4b884da6e0..ee4ca76d44 100644 --- a/asmcomp/arm/arch.ml +++ b/asmcomp/arm/arch.ml @@ -262,3 +262,15 @@ let is_immediate n = s := !s + 2 done; !s <= m + +(* Specific operations that are pure *) + +let operation_is_pure = function + | Ishiftcheckbound _ -> false + | _ -> true + +(* Specific operations that can raise *) + +let operation_can_raise = function + | Ishiftcheckbound _ -> true + | _ -> false diff --git a/asmcomp/arm/emit.mlp b/asmcomp/arm/emit.mlp index a460ddce78..220b679c6b 100644 --- a/asmcomp/arm/emit.mlp +++ b/asmcomp/arm/emit.mlp @@ -580,10 +580,10 @@ let emit_instr i = let ninstr = emit_stack_adjustment (-n) in stack_offset := !stack_offset + n; ninstr - | Lop(Iload(Single, addr)) when !fpu >= VFPv2 -> + | Lop(Iload(Single, addr, _mut)) when !fpu >= VFPv2 -> ` flds s14, {emit_addressing addr i.arg 0}\n`; ` fcvtds {emit_reg i.res.(0)}, s14\n`; 2 - | Lop(Iload(Double, addr)) when !fpu = Soft -> + | Lop(Iload(Double, addr, _mut)) when !fpu = Soft -> (* Use LDM or LDRD if possible *) begin match i.res.(0), i.res.(1), addr with {loc = Reg rt}, {loc = Reg rt2}, Iindexed 0 @@ -602,7 +602,7 @@ let emit_instr i = ` ldr {emit_reg i.res.(0)}, {emit_addressing addr i.arg 0}\n` end; 2 end - | Lop(Iload(size, addr)) -> + | Lop(Iload(size, addr, _mut)) -> let r = i.res.(0) in let instr = match size with diff --git a/asmcomp/arm/proc.ml b/asmcomp/arm/proc.ml index c5589b0c96..02e2f3e505 100644 --- a/asmcomp/arm/proc.ml +++ b/asmcomp/arm/proc.ml @@ -311,7 +311,8 @@ let destroyed_at_oper = function | Iop(Iintop (Icomp _) | Iintop_imm(Icomp _, _)) when !arch >= ARMv8 && !thumb -> [| phys_reg 3 |] (* r3 destroyed *) - | Iop(Iintoffloat | Ifloatofint | Iload(Single, _) | Istore(Single, _, _)) -> + | Iop(Iintoffloat | Ifloatofint + | Iload(Single, _, _) | Istore(Single, _, _)) -> [| phys_reg 107 |] (* d7 (s14-s15) destroyed *) | _ -> [||] @@ -335,20 +336,10 @@ let max_register_pressure = function | Ialloc _ -> if abi = EABI then [| 7; 0; 0 |] else [| 7; 8; 8 |] | Iconst_symbol _ when !Clflags.pic_code -> [| 7; 16; 32 |] | Iintoffloat | Ifloatofint - | Iload(Single, _) | Istore(Single, _, _) -> [| 9; 15; 31 |] + | Iload(Single, _, _) | Istore(Single, _, _) -> [| 9; 15; 31 |] | Iintop Imulh when !arch < ARMv6 -> [| 8; 16; 32 |] | _ -> [| 9; 16; 32 |] -(* Pure operations (without any side effect besides updating their result - registers). *) - -let op_is_pure = function - | Icall_ind | Icall_imm _ | Itailcall_ind | Itailcall_imm _ - | Iextcall _ | Istackoffset _ | Istore _ | Ialloc _ - | Iintop(Icheckbound) | Iintop_imm(Icheckbound, _) | Iopaque - | Ispecific(Ishiftcheckbound _) -> false - | _ -> true - (* Layout of the stack *) let frame_required fd = diff --git a/asmcomp/arm/scheduling.ml b/asmcomp/arm/scheduling.ml index 9d847d4cef..e7fd744a9d 100644 --- a/asmcomp/arm/scheduling.ml +++ b/asmcomp/arm/scheduling.ml @@ -29,7 +29,7 @@ method oper_latency = function (* Loads have a latency of two cycles in general *) Iconst_symbol _ | Iconst_float _ - | Iload(_, _) + | Iload(_, _, _) | Ireload | Ifloatofint (* mcr/mrc count as memory access *) | Iintoffloat -> 2 diff --git a/asmcomp/arm64/arch.ml b/asmcomp/arm64/arch.ml index 8d8561bca5..766cee05c7 100644 --- a/asmcomp/arm64/arch.ml +++ b/asmcomp/arm64/arch.ml @@ -58,6 +58,7 @@ type specific_operation = | Isqrtf (* floating-point square root *) | Ibswap of int (* endianness conversion *) | Imove32 (* 32-bit integer move *) + | Isignext of int (* sign extension *) and arith_operation = Ishiftadd @@ -169,3 +170,95 @@ let print_specific_operation printreg op ppf arg = | Imove32 -> fprintf ppf "move32 %a" printreg arg.(0) + | Isignext n -> + fprintf ppf "signext%d %a" + n printreg arg.(0) + +(* Recognition of logical immediate arguments *) + +(* An automaton to recognize ( 0+1+0* | 1+0+1* ) + + 0 1 0 + / \ / \ / \ + \ / \ / \ / + -0--> [1] --1--> [2] --0--> [3] + / + [0] + \ + -1--> [4] --0--> [5] --1--> [6] + / \ / \ / \ + \ / \ / \ / + 1 0 1 + +The accepting states are 2, 3, 5 and 6. *) + +let auto_table = [| (* accepting?, next on 0, next on 1 *) + (* state 0 *) (false, 1, 4); + (* state 1 *) (false, 1, 2); + (* state 2 *) (true, 3, 2); + (* state 3 *) (true, 3, 7); + (* state 4 *) (false, 5, 4); + (* state 5 *) (true, 5, 6); + (* state 6 *) (true, 7, 6); + (* state 7 *) (false, 7, 7) (* error state *) +|] + +let rec run_automata nbits state input = + let (acc, next0, next1) = auto_table.(state) in + if nbits <= 0 + then acc + else run_automata (nbits - 1) + (if Nativeint.logand input 1n = 0n then next0 else next1) + (Nativeint.shift_right_logical input 1) + +(* The following function determines a length [e] + such that [x] is a repetition [BB...B] of a bit pattern [B] of length [e]. + [e] ranges over 64, 32, 16, 8, 4, 2. The smaller [e] the better. *) + +let logical_imm_length x = + (* [test n] checks that the low [2n] bits of [x] are of the + form [BB], that is, two occurrences of the same [n] bits *) + let test n = + let mask = Nativeint.(sub (shift_left 1n n) 1n) in + let low_n_bits = Nativeint.(logand x mask) in + let next_n_bits = Nativeint.(logand (shift_right_logical x n) mask) in + low_n_bits = next_n_bits in + (* If [test n] fails, we know that the length [e] is + at least [2n]. Hence we test with decreasing values of [n]: + 32, 16, 8, 4, 2. *) + if not (test 32) then 64 + else if not (test 16) then 32 + else if not (test 8) then 16 + else if not (test 4) then 8 + else if not (test 2) then 4 + else 2 + +(* A valid logical immediate is +- neither [0] nor [-1]; +- composed of a repetition [BBBBB] of a bit-pattern [B] of length [e] +- the low [e] bits of the number, that is, [B], match [0+1+0*] or [1+0+1*]. +*) + +let is_logical_immediate x = + x <> 0n && x <> -1n && run_automata (logical_imm_length x) 0 x + +(* Specific operations that are pure *) + +let operation_is_pure = function + | Ifar_alloc _ + | Ifar_intop_checkbound + | Ifar_intop_imm_checkbound _ + | Ishiftcheckbound _ + | Ifar_shiftcheckbound _ -> false + | _ -> true + +(* Specific operations that can raise *) + +let operation_can_raise = function + | Ifar_alloc _ + | Ifar_intop_checkbound + | Ifar_intop_imm_checkbound _ + | Ishiftcheckbound _ + | Ifar_shiftcheckbound _ -> true + | _ -> false + diff --git a/asmcomp/arm64/emit.mlp b/asmcomp/arm64/emit.mlp index 0f80a1c1c9..f275bc67c9 100644 --- a/asmcomp/arm64/emit.mlp +++ b/asmcomp/arm64/emit.mlp @@ -230,63 +230,55 @@ let name_for_int_operation = function | Iasr -> "asr" | _ -> assert false +(* Decompose an integer constant into four 16-bit shifted fragments. + Omit the fragments that are equal to "default" (16 zeros or 16 ones). *) + +let decompose_int default n = + let rec decomp n pos = + if pos >= 64 then [] else begin + let frag = Nativeint.logand n 0xFFFFn + and rem = Nativeint.shift_right_logical n 16 in + if frag = default + then decomp rem (pos + 16) + else (frag, pos) :: decomp rem (pos + 16) + end + in decomp n 0 + (* Load an integer constant into a register *) +let emit_movk dst (f, p) = + ` movk {emit_reg dst}, #{emit_nativeint f}, lsl #{emit_int p}\n` + let emit_intconst dst n = - let rec emit_pos first shift = - if shift < 0 then begin - if first then ` mov {emit_reg dst}, xzr\n` - end else begin - let s = Nativeint.(logand (shift_right_logical n shift) 0xFFFFn) in - if s = 0n then emit_pos first (shift - 16) else begin - if first then - ` movz {emit_reg dst}, #{emit_nativeint s}, lsl #{emit_int shift}\n` - else - ` movk {emit_reg dst}, #{emit_nativeint s}, lsl #{emit_int shift}\n`; - emit_pos false (shift - 16) - end - end - and emit_neg first shift = - if shift < 0 then begin - if first then ` movn {emit_reg dst}, #0\n` + if is_logical_immediate n then + ` orr {emit_reg dst}, xzr, #{emit_nativeint n}\n` + else begin + let dz = decompose_int 0x0000n n + and dn = decompose_int 0xFFFFn n in + if List.length dz <= List.length dn then begin + match dz with + | [] -> + ` mov {emit_reg dst}, xzr\n` + | (f, p) :: l -> + ` movz {emit_reg dst}, #{emit_nativeint f}, lsl #{emit_int p}\n`; + List.iter (emit_movk dst) l end else begin - let s = Nativeint.(logand (shift_right_logical n shift) 0xFFFFn) in - if s = 0xFFFFn then emit_neg first (shift - 16) else begin - if first then - ` movn {emit_reg dst}, #{emit_nativeint (Nativeint.logxor s 0xFFFFn)}, lsl #{emit_int shift}\n` - else - ` movk {emit_reg dst}, #{emit_nativeint s}, lsl #{emit_int shift}\n`; - emit_neg false (shift - 16) - end + match dn with + | [] -> + ` movn {emit_reg dst}, #0\n` + | (f, p) :: l -> + let nf = Nativeint.logxor f 0xFFFFn in + ` movn {emit_reg dst}, #{emit_nativeint nf}, lsl #{emit_int p}\n`; + List.iter (emit_movk dst) l end - in - if n < 0n then emit_neg true 48 else emit_pos true 48 + end let num_instructions_for_intconst n = - let num_instructions = ref 0 in - let rec count_pos first shift = - if shift < 0 then begin - if first then incr num_instructions - end else begin - let s = Nativeint.(logand (shift_right_logical n shift) 0xFFFFn) in - if s = 0n then count_pos first (shift - 16) else begin - incr num_instructions; - count_pos false (shift - 16) - end - end - and count_neg first shift = - if shift < 0 then begin - if first then incr num_instructions - end else begin - let s = Nativeint.(logand (shift_right_logical n shift) 0xFFFFn) in - if s = 0xFFFFn then count_neg first (shift - 16) else begin - incr num_instructions; - count_neg false (shift - 16) - end - end - in - if n < 0n then count_neg true 48 else count_pos true 48; - !num_instructions + if is_logical_immediate n then 1 else begin + let dz = decompose_int 0x0000n n + and dn = decompose_int 0xFFFFn n in + max 1 (min (List.length dz) (List.length dn)) + end (* Recognize float constants appropriate for FMOV dst, #fpimm instruction: "a normalized binary floating point encoding with 1 sign bit, 4 @@ -505,7 +497,7 @@ module BR = Branch_relaxation.Make (struct | Lop (Iextcall { alloc = false; }) -> 1 | Lop (Iextcall { alloc = true; }) -> 3 | Lop (Istackoffset _) -> 2 - | Lop (Iload (size, addr)) | Lop (Istore (size, addr, _)) -> + | Lop (Iload (size, addr, _)) | Lop (Istore (size, addr, _)) -> let based = match addr with Iindexed _ -> 0 | Ibased _ -> 1 in based + begin match size with Single -> 2 | _ -> 1 end | Lop (Ialloc _) when !fastcode_flag -> 5 @@ -539,6 +531,7 @@ module BR = Branch_relaxation.Make (struct | Lop (Ispecific (Ibswap 16)) -> 2 | Lop (Ispecific (Ibswap _)) -> 1 | Lop (Ispecific Imove32) -> 1 + | Lop (Ispecific (Isignext _)) -> 1 | Lop (Iname_for_debugger _) -> 0 | Lop (Iprobe _ |Iprobe_is_enabled _) -> Misc.fatal_error ("Probes not supported.") @@ -728,7 +721,7 @@ let emit_instr i = assert (n mod 16 = 0); emit_stack_adjustment (-n); stack_offset := !stack_offset + n - | Lop(Iload(size, addr)) -> + | Lop(Iload(size, addr, _mut)) -> let dst = i.res.(0) in let base = match addr with @@ -893,6 +886,8 @@ let emit_instr i = | _ -> assert false end + | Lop(Ispecific(Isignext size)) -> + ` sbfm {emit_reg i.res.(0)}, {emit_reg i.arg.(0)}, #0, #{emit_int (size - 1)}\n` | Lop (Iname_for_debugger _) -> () | Lop (Iprobe _ |Iprobe_is_enabled _) -> Misc.fatal_error ("Probes not supported.") diff --git a/asmcomp/arm64/proc.ml b/asmcomp/arm64/proc.ml index e50a34e168..b20467ad17 100644 --- a/asmcomp/arm64/proc.ml +++ b/asmcomp/arm64/proc.ml @@ -269,7 +269,8 @@ let destroyed_at_oper = function destroyed_at_c_call | Iop(Ialloc _) -> [| reg_x8 |] - | Iop(Iintoffloat | Ifloatofint | Iload(Single, _) | Istore(Single, _, _)) -> + | Iop( Iintoffloat | Ifloatofint + | Iload(Single, _, _) | Istore(Single, _, _)) -> [| reg_d7 |] (* d7 / s7 destroyed *) | _ -> [||] @@ -288,20 +289,9 @@ let max_register_pressure = function | Iextcall _ -> [| 7; 8 |] (* 7 integer callee-saves, 8 FP callee-saves *) | Ialloc _ -> [| 22; 32 |] | Iintoffloat | Ifloatofint - | Iload(Single, _) | Istore(Single, _, _) -> [| 23; 31 |] + | Iload(Single, _, _) | Istore(Single, _, _) -> [| 23; 31 |] | _ -> [| 23; 32 |] -(* Pure operations (without any side effect besides updating their result - registers). *) - -let op_is_pure = function - | Icall_ind | Icall_imm _ | Itailcall_ind | Itailcall_imm _ - | Iextcall _ | Istackoffset _ | Istore _ | Ialloc _ - | Iintop(Icheckbound) | Iintop_imm(Icheckbound, _) | Iopaque - | Ibeginregion | Iendregion - | Ispecific(Ishiftcheckbound _) -> false - | _ -> true - (* Layout of the stack *) let frame_required fd = fd.fun_contains_calls diff --git a/asmcomp/arm64/selection.ml b/asmcomp/arm64/selection.ml index a271a5dfea..c8e92598d5 100644 --- a/asmcomp/arm64/selection.ml +++ b/asmcomp/arm64/selection.ml @@ -34,47 +34,8 @@ let is_offset chunk n = | Word_int | Word_val | Double -> n land 7 = 0 && n lsr 3 < 0x1000) -(* An automaton to recognize ( 0+1+0* | 1+0+1* ) - - 0 1 0 - / \ / \ / \ - \ / \ / \ / - -0--> [1] --1--> [2] --0--> [3] - / - [0] - \ - -1--> [4] --0--> [5] --1--> [6] - / \ / \ / \ - \ / \ / \ / - 1 0 1 - -The accepting states are 2, 3, 5 and 6. *) - -let auto_table = [| (* accepting?, next on 0, next on 1 *) - (* state 0 *) (false, 1, 4); - (* state 1 *) (false, 1, 2); - (* state 2 *) (true, 3, 2); - (* state 3 *) (true, 3, 7); - (* state 4 *) (false, 5, 4); - (* state 5 *) (true, 5, 6); - (* state 6 *) (true, 7, 6); - (* state 7 *) (false, 7, 7) (* error state *) -|] - -let rec run_automata nbits state input = - let (acc, next0, next1) = auto_table.(state) in - if nbits <= 0 - then acc - else run_automata (nbits - 1) - (if input land 1 = 0 then next0 else next1) - (input asr 1) - -(* We are very conservative wrt what ARM64 supports: we don't support - repetitions of a 000111000 or 1110000111 pattern, just a single - pattern of this kind. *) - let is_logical_immediate n = - n <> 0 && n <> -1 && run_automata 64 0 n + Arch.is_logical_immediate (Nativeint.of_int n) (* Signed immediates are simpler *) @@ -199,6 +160,14 @@ method! select_operation op args dbg = | _ -> super#select_operation op args dbg end + (* Recognize sign extension *) + | Casr -> + begin match args with + [Cop(Clsl, [k; Cconst_int (n, _)], _); Cconst_int (n', _)] + when n' = n && 0 < n && n < 64 -> + (Ispecific (Isignext (64 - n)), [k]) + | _ -> super#select_operation op args dbg + end (* Recognize floating-point negate and multiply *) | Cnegf -> begin match args with diff --git a/asmcomp/asmgen.ml b/asmcomp/asmgen.ml index 18a5793dd2..bab9aab501 100644 --- a/asmcomp/asmgen.ml +++ b/asmcomp/asmgen.ml @@ -26,6 +26,7 @@ open Cmm type error = | Assembler_error of string | Mismatched_for_pack of string option + | Asm_generation of string * Emitaux.error exception Error of error @@ -96,9 +97,13 @@ let if_emit_do f x = if should_emit () then f x else () let emit_begin_assembly = if_emit_do Emit.begin_assembly let emit_end_assembly = if_emit_do Emit.end_assembly let emit_data = if_emit_do Emit.data -let emit_fundecl = - if_emit_do - (Profile.record ~accumulate:true "emit" Emit.fundecl) +let emit_fundecl fd = + if should_emit() then begin + try + Profile.record ~accumulate:true "emit" Emit.fundecl fd + with Emitaux.Error e -> + raise (Error (Asm_generation(fd.Linear.fun_name, e))) + end let rec regalloc ~ppf_dump round fd = if round > 50 then @@ -290,6 +295,10 @@ let report_error ppf = function fprintf ppf "This input file cannot be compiled %s: it was generated %s." (msg !Clflags.for_package) (msg saved) + | Asm_generation(fn, err) -> + fprintf ppf + "Error producing assembly code for function %s: %a" + fn Emitaux.report_error err let () = Location.register_error_of_exn diff --git a/asmcomp/asmgen.mli b/asmcomp/asmgen.mli index f86bd67375..c258e24c9f 100644 --- a/asmcomp/asmgen.mli +++ b/asmcomp/asmgen.mli @@ -44,6 +44,7 @@ val compile_phrase : type error = | Assembler_error of string | Mismatched_for_pack of string option + | Asm_generation of string * Emitaux.error exception Error of error val report_error: Format.formatter -> error -> unit diff --git a/asmcomp/asmlink.ml b/asmcomp/asmlink.ml index 697eeb3c07..d9b5753d46 100644 --- a/asmcomp/asmlink.ml +++ b/asmcomp/asmlink.ml @@ -124,19 +124,6 @@ let runtime_lib () = with Not_found -> raise(Error(File_not_found libname)) -let object_file_name name = - let file_name = - try - Load_path.find name - with Not_found -> - fatal_errorf "Asmlink.object_file_name: %s not found" name in - if Filename.check_suffix file_name ".cmx" then - Filename.chop_suffix file_name ".cmx" ^ ext_obj - else if Filename.check_suffix file_name ".cmxa" then - Filename.chop_suffix file_name ".cmxa" ^ ext_lib - else - fatal_error "Asmlink.object_file_name: bad ext" - (* First pass: determine which units are needed *) let missing_globals = (Hashtbl.create 17 : (string, string list ref) Hashtbl.t) @@ -164,6 +151,17 @@ type file = | Unit of string * unit_infos * Digest.t | Library of string * library_infos +let object_file_name_of_file = function + | Unit (fname, _, _) -> Some (Filename.chop_suffix fname ".cmx" ^ ext_obj) + | Library (fname, infos) -> + let obj_file = Filename.chop_suffix fname ".cmxa" ^ ext_lib in + (* MSVC doesn't support empty .lib files, and macOS struggles to make + them (#6550), so there shouldn't be one if the .cmxa contains no + units. The file_exists check is added to be ultra-defensive for the + case where a user has manually added things to the .a/.lib file *) + if infos.lib_units = [] && not (Sys.file_exists obj_file) then None else + Some obj_file + let read_file obj_name = let file_name = try @@ -186,42 +184,30 @@ let read_file obj_name = end else raise(Error(Not_an_object_file file_name)) -let scan_file obj_name (tolink, objfiles) = match read_file obj_name with +let scan_file file tolink = match file with | Unit (file_name,info,crc) -> (* This is a .cmx file. It must be linked in any case. *) remove_required info.ui_name; List.iter (add_required file_name) info.ui_imports_cmx; - ((info, file_name, crc) :: tolink, obj_name :: objfiles) + (info, file_name, crc) :: tolink | Library (file_name,infos) -> (* This is an archive file. Each unit contained in it will be linked in only if needed. *) add_ccobjs (Filename.dirname file_name) infos; - let tolink = - List.fold_right - (fun (info, crc) reqd -> - if info.ui_force_link - || !Clflags.link_everything - || is_required info.ui_name - then begin - remove_required info.ui_name; - List.iter (add_required (Printf.sprintf "%s(%s)" - file_name info.ui_name)) - info.ui_imports_cmx; - (info, file_name, crc) :: reqd - end else - reqd) - infos.lib_units tolink - and objfiles = - if infos.lib_units = [] - && not (Sys.file_exists (object_file_name obj_name)) then - (* MSVC doesn't support empty .lib files, and macOS struggles to make - them (#6550), so there shouldn't be one if the .cmxa contains no - units. The file_exists check is added to be ultra-defensive for the - case where a user has manually added things to the .a/.lib file *) - objfiles - else - obj_name :: objfiles - in (tolink, objfiles) + List.fold_right + (fun (info, crc) reqd -> + if info.ui_force_link + || !Clflags.link_everything + || is_required info.ui_name + then begin + remove_required info.ui_name; + List.iter (add_required (Printf.sprintf "%s(%s)" + file_name info.ui_name)) + info.ui_imports_cmx; + (info, file_name, crc) :: reqd + end else + reqd) + infos.lib_units tolink (* Second pass: generate the startup file and link it with everything else *) @@ -295,17 +281,16 @@ let call_linker_shared file_list output_name = let link_shared ~ppf_dump objfiles output_name = Profile.record_call output_name (fun () -> - let units_tolink, objfiles = - List.fold_right scan_file objfiles ([], []) - in + let obj_infos = List.map read_file objfiles in + let units_tolink = List.fold_right scan_file obj_infos [] in List.iter (fun (info, file_name, crc) -> check_consistency file_name info crc) units_tolink; Clflags.ccobjs := !Clflags.ccobjs @ !lib_ccobjs; Clflags.all_ccopts := !lib_ccopts @ !Clflags.all_ccopts; - let objfiles = List.rev_map object_file_name objfiles @ + let objfiles = + List.rev (List.filter_map object_file_name_of_file obj_infos) @ (List.rev !Clflags.ccobjs) in - let startup = if !Clflags.keep_startup_file || !Emitaux.binary_backend_available then output_name ^ ".startup" ^ ext_asm @@ -355,9 +340,8 @@ let link ~ppf_dump objfiles output_name = if !Clflags.nopervasives then objfiles else if !Clflags.output_c_object then stdlib :: objfiles else stdlib :: (objfiles @ [stdexit]) in - let units_tolink, objfiles = - List.fold_right scan_file objfiles ([], []) - in + let obj_infos = List.map read_file objfiles in + let units_tolink = List.fold_right scan_file obj_infos [] in Array.iter remove_required Runtimedef.builtin_exceptions; begin match extract_missing_globals() with [] -> () @@ -381,7 +365,7 @@ let link ~ppf_dump objfiles output_name = (fun () -> make_startup_file ~ppf_dump units_tolink ~crc_interfaces); Misc.try_finally (fun () -> - call_linker (List.map object_file_name objfiles) + call_linker (List.filter_map object_file_name_of_file obj_infos) startup_obj output_name) ~always:(fun () -> remove_file startup_obj) ) diff --git a/asmcomp/cmmgen.ml b/asmcomp/cmmgen.ml index aa80de56a6..a129cf2e12 100644 --- a/asmcomp/cmmgen.ml +++ b/asmcomp/cmmgen.ml @@ -443,7 +443,7 @@ let rec transl env e = let args = List.map (transl env) args in send kind met obj args pos dbg | Ulet(str, kind, id, exp, body) -> - transl_let env str kind id exp body + transl_let env str kind id exp (fun env -> transl env body) | Uphantom_let (var, defining_expr, body) -> let defining_expr = match defining_expr with @@ -1155,7 +1155,7 @@ and transl_unbox_sized size dbg env exp = | Thirty_two -> transl_unbox_int dbg env Pint32 exp | Sixty_four -> transl_unbox_int dbg env Pint64 exp -and transl_let env str kind id exp body = +and transl_let env str kind id exp transl_body = let dbg = Debuginfo.none in let cexp = transl env exp in let unboxing = @@ -1192,16 +1192,16 @@ and transl_let env str kind id exp body = (* N.B. [body] must still be traversed even if [exp] will never return: there may be constant closures inside that need lifting out. *) begin match str, kind with - | (Immutable | Immutable_unique), _ -> Clet(id, cexp, transl env body) - | Mutable, Pintval -> Clet_mut(id, typ_int, cexp, transl env body) - | Mutable, _ -> Clet_mut(id, typ_val, cexp, transl env body) + | (Immutable | Immutable_unique), _ -> Clet(id, cexp, transl_body env) + | Mutable, Pintval -> Clet_mut(id, typ_int, cexp, transl_body env) + | Mutable, _ -> Clet_mut(id, typ_val, cexp, transl_body env) end | Boxed (boxed_number, false) -> let unboxed_id = V.create_local (VP.name id) in let v = VP.create unboxed_id in let cexp = unbox_number dbg boxed_number cexp in let body = - transl (add_unboxed_id (VP.var id) unboxed_id boxed_number env) body in + transl_body (add_unboxed_id (VP.var id) unboxed_id boxed_number env) in begin match str, boxed_number with | (Immutable | Immutable_unique), _ -> Clet (v, cexp, body) | Mutable, bn -> Clet_mut (v, typ_of_boxed_number bn, cexp, body) @@ -1243,6 +1243,9 @@ and transl_if env (approx : then_else) ifso_dbg arg2 then_dbg then_ else_dbg else_ + | Ulet(str, kind, id, exp, cond) -> + transl_let env str kind id exp (fun env -> + transl_if env approx dbg cond then_dbg then_ else_dbg else_) | Uprim (Psequand, [arg1; arg2], inner_dbg) -> transl_sequand env approx inner_dbg arg1 diff --git a/asmcomp/deadcode.ml b/asmcomp/deadcode.ml index 887580fa74..28fe153fac 100644 --- a/asmcomp/deadcode.ml +++ b/asmcomp/deadcode.ml @@ -43,7 +43,7 @@ let rec deadcode i = { i; regs; exits = Int.Set.empty; } | Iop op -> let s = deadcode i.next in - if Proc.op_is_pure op (* no side effects *) + if operation_is_pure op (* no side effects *) && Reg.disjoint_set_array s.regs i.res (* results are not used after *) && not (Proc.regs_are_volatile i.arg) (* no stack-like hard reg *) && not (Proc.regs_are_volatile i.res) (* is involved *) diff --git a/asmcomp/emitaux.ml b/asmcomp/emitaux.ml index 2e4664e879..c0f7f8053d 100644 --- a/asmcomp/emitaux.ml +++ b/asmcomp/emitaux.ml @@ -15,6 +15,11 @@ (* Common functions for emitting assembly code *) +type error = + | Stack_frame_too_large of int + +exception Error of error + let output_channel = ref stdout let emit_string s = output_string !output_channel s @@ -178,6 +183,12 @@ let emit_frames a = Label_table.add debuginfos key lbl; lbl in + let efa_16_checked n = + assert (n >= 0); + if n < 0x1_0000 + then a.efa_16 n + else raise (Error(Stack_frame_too_large n)) + in let emit_frame fd = assert (fd.fd_frame_size land 3 = 0); let flags = @@ -191,9 +202,9 @@ let emit_frames a = then 3 else 2 in a.efa_code_label fd.fd_lbl; - a.efa_16 (fd.fd_frame_size + flags); - a.efa_16 (List.length fd.fd_live_offset); - List.iter a.efa_16 fd.fd_live_offset; + efa_16_checked (fd.fd_frame_size + flags); + efa_16_checked (List.length fd.fd_live_offset); + List.iter efa_16_checked fd.fd_live_offset; begin match fd.fd_debuginfo with | _ when flags = 0 -> () @@ -370,3 +381,7 @@ let reset () = let binary_backend_available = ref false let create_asm_file = ref true + +let report_error ppf = function + | Stack_frame_too_large n -> + Format.fprintf ppf "stack frame too large (%d bytes)" n diff --git a/asmcomp/emitaux.mli b/asmcomp/emitaux.mli index 2b4867d0b8..ad27a95e2d 100644 --- a/asmcomp/emitaux.mli +++ b/asmcomp/emitaux.mli @@ -71,7 +71,6 @@ val cfi_endproc : unit -> unit val cfi_adjust_cfa_offset : int -> unit val cfi_offset : reg:int -> offset:int -> unit - val binary_backend_available: bool ref (** Is a binary backend available. If yes, we don't need to generate the textual assembly file (unless the user @@ -79,3 +78,9 @@ val binary_backend_available: bool ref val create_asm_file: bool ref (** Are we actually generating the textual assembly file? *) + +type error = + | Stack_frame_too_large of int + +exception Error of error +val report_error: Format.formatter -> error -> unit diff --git a/asmcomp/i386/CSE.ml b/asmcomp/i386/CSE.ml index bdab0f59bb..4aba4db698 100644 --- a/asmcomp/i386/CSE.ml +++ b/asmcomp/i386/CSE.ml @@ -29,7 +29,7 @@ method! class_of_operation op = (* Operations that affect the floating-point stack cannot be factored *) | Iconst_float _ | Inegf | Iabsf | Iaddf | Isubf | Imulf | Idivf | Iintoffloat | Ifloatofint - | Iload((Single | Double), _) -> Op_other + | Iload((Single | Double), _, _) -> Op_other (* Specific ops *) | Ispecific(Ilea _) -> Op_pure | Ispecific(Istore_int(_, _, is_asg)) -> Op_store is_asg diff --git a/asmcomp/i386/arch.ml b/asmcomp/i386/arch.ml index 17876c46f1..21057b36a0 100644 --- a/asmcomp/i386/arch.ml +++ b/asmcomp/i386/arch.ml @@ -162,3 +162,15 @@ let stack_alignment = | "win32" -> 4 (* MSVC *) | _ -> 16 (* PR#6038: GCC and Clang seem to require 16-byte alignment nowadays *) + +(* Specific operations that are pure *) + +let operation_is_pure = function + | Ilea _ -> true + | _ -> false +(* x87 floating-point operations are not pure because they push and pop + on the FP stack as a side effect *) + +(* Specific operations that can raise *) + +let operation_can_raise _ = false diff --git a/asmcomp/i386/emit.mlp b/asmcomp/i386/emit.mlp index 0bacc0e727..b16d36d41d 100644 --- a/asmcomp/i386/emit.mlp +++ b/asmcomp/i386/emit.mlp @@ -589,7 +589,7 @@ let emit_instr fallthrough i = else I.sub (int n) esp; cfi_adjust_cfa_offset n; stack_offset := !stack_offset + n - | Lop(Iload(chunk, addr)) -> + | Lop(Iload(chunk, addr, _mut)) -> let dest = i.res.(0) in begin match chunk with | Word_int | Word_val | Thirtytwo_signed | Thirtytwo_unsigned -> diff --git a/asmcomp/i386/proc.ml b/asmcomp/i386/proc.ml index 310012f25a..18a7ad12ab 100644 --- a/asmcomp/i386/proc.ml +++ b/asmcomp/i386/proc.ml @@ -225,18 +225,6 @@ let max_register_pressure = function Iintoffloat -> [| 6; max_int |] | _ -> [|7; max_int |] -(* Pure operations (without any side effect besides updating their result - registers). *) - -let op_is_pure = function - | Icall_ind | Icall_imm _ | Itailcall_ind | Itailcall_imm _ - | Iextcall _ | Istackoffset _ | Istore _ | Ialloc _ - | Iintop(Icheckbound) | Iintop_imm(Icheckbound, _) | Iopaque -> false - | Ispecific(Ilea _) -> true - | Ispecific _ -> false - | Ibeginregion | Iendregion -> false - | _ -> true - (* Layout of the stack frame *) let frame_required fd = diff --git a/asmcomp/i386/selection.ml b/asmcomp/i386/selection.ml index be8608f883..c1be7b2832 100644 --- a/asmcomp/i386/selection.ml +++ b/asmcomp/i386/selection.ml @@ -133,7 +133,7 @@ let pseudoregs_for_operation op arg res = (* For floating-point operations and floating-point loads, the result is always left at the top of the floating-point stack *) | Iconst_float _ | Inegf | Iabsf | Iaddf | Isubf | Imulf | Idivf - | Ifloatofint | Iload((Single | Double ), _) + | Ifloatofint | Iload((Single | Double ), _, _) | Ispecific(Isubfrev | Idivfrev | Ifloatarithmem _ | Ifloatspecial _) -> (arg, [| tos |], false) (* don't move it immediately *) (* For storing a byte, the argument must be in eax...edx. diff --git a/asmcomp/liveness.ml b/asmcomp/liveness.ml index 926df45603..b7f549bd7e 100644 --- a/asmcomp/liveness.ml +++ b/asmcomp/liveness.ml @@ -44,7 +44,7 @@ let rec live i finally = Reg.set_of_array i.arg | Iop op -> let after = live i.next finally in - if Proc.op_is_pure op (* no side effects *) + if operation_is_pure op (* no side effects *) && Reg.disjoint_set_array after i.res (* results are not used after *) && not (Proc.regs_are_volatile i.arg) (* no stack-like hard reg *) && not (Proc.regs_are_volatile i.res) (* is involved *) @@ -55,19 +55,14 @@ let rec live i finally = end else begin let across_after = Reg.diff_set_array after i.res in let across = - match op with - | Icall_ind | Icall_imm _ | Iextcall _ | Ialloc _ - | Iprobe _ - | Iintop (Icheckbound) | Iintop_imm(Icheckbound, _) -> - (* The function call may raise an exception, branching to the - nearest enclosing try ... with. Similarly for bounds checks, - probes and allocation (for the latter: finalizers may throw - exceptions, as may signal handlers). - Hence, everything that must be live at the beginning of - the exception handler must also be live across this instr. *) - Reg.Set.union across_after !live_at_raise - | _ -> - across_after in + (* Operations that can raise an exception (function calls, + bounds checks, allocations) can branch to the + nearest enclosing try ... with. + Hence, everything that must be live at the beginning of + the exception handler must also be live across this instr. *) + if operation_can_raise op + then Reg.Set.union across_after !live_at_raise + else across_after in i.live <- across; Reg.add_set_array across i.arg end diff --git a/asmcomp/mach.ml b/asmcomp/mach.ml index f2d7b5840a..92a0726eb5 100644 --- a/asmcomp/mach.ml +++ b/asmcomp/mach.ml @@ -51,7 +51,7 @@ type operation = ty_res : Cmm.machtype; ty_args : Cmm.exttype list; alloc : bool; } | Istackoffset of int - | Iload of Cmm.memory_chunk * Arch.addressing_mode + | Iload of Cmm.memory_chunk * Arch.addressing_mode * Asttypes.mutable_flag | Istore of Cmm.memory_chunk * Arch.addressing_mode * bool | Ialloc of { bytes : int; dbginfo : Debuginfo.alloc_dbginfo; mode : Lambda.alloc_mode } @@ -160,10 +160,21 @@ let rec instr_iter f i = | _ -> instr_iter f i.next +let operation_is_pure = function + | Icall_ind | Icall_imm _ | Itailcall_ind | Itailcall_imm _ + | Iextcall _ | Istackoffset _ | Istore _ | Ialloc _ + | Iintop(Icheckbound) | Iintop_imm(Icheckbound, _) | Iopaque -> false + | Ibeginregion | Iendregion -> false + | Iprobe _ -> false + | Iprobe_is_enabled _-> true + | Ispecific sop -> Arch.operation_is_pure sop + | _ -> true + let operation_can_raise op = match op with | Icall_ind | Icall_imm _ | Iextcall _ | Iintop (Icheckbound) | Iintop_imm (Icheckbound, _) | Iprobe _ | Ialloc _ -> true + | Ispecific sop -> Arch.operation_can_raise sop | _ -> false diff --git a/asmcomp/mach.mli b/asmcomp/mach.mli index 8d14173c11..89c0b164f0 100644 --- a/asmcomp/mach.mli +++ b/asmcomp/mach.mli @@ -51,7 +51,7 @@ type operation = ty_res : Cmm.machtype; ty_args : Cmm.exttype list; alloc : bool; } | Istackoffset of int - | Iload of Cmm.memory_chunk * Arch.addressing_mode + | Iload of Cmm.memory_chunk * Arch.addressing_mode * Asttypes.mutable_flag | Istore of Cmm.memory_chunk * Arch.addressing_mode * bool (* false = initialization, true = assignment *) | Ialloc of { bytes : int; dbginfo : Debuginfo.alloc_dbginfo; @@ -116,4 +116,11 @@ val instr_cons_debug: instruction -> instruction val instr_iter: (instruction -> unit) -> instruction -> unit +val operation_is_pure : operation -> bool + (** Returns [true] if the given operation only produces a result + in its destination registers, but has no side effects whatsoever: + it doesn't raise exceptions, it doesn't modify already-allocated + blocks, it doesn't adjust the stack frame, etc. *) + val operation_can_raise : operation -> bool + (** Returns [true] if the given operation can raise an exception. *) diff --git a/asmcomp/power/arch.ml b/asmcomp/power/arch.ml index 6f5898ed54..9cd74e4931 100644 --- a/asmcomp/power/arch.ml +++ b/asmcomp/power/arch.ml @@ -115,3 +115,11 @@ let print_specific_operation printreg op ppf arg = printreg arg.(0) printreg arg.(1) printreg arg.(2) | Ialloc_far { bytes; _ } -> fprintf ppf "alloc_far %d" bytes + +(* Specific operations that are pure *) + +let operation_is_pure _ = true + +(* Specific operations that can raise *) + +let operation_can_raise _ = false diff --git a/asmcomp/power/emit.mlp b/asmcomp/power/emit.mlp index a9837b48ba..61389be277 100644 --- a/asmcomp/power/emit.mlp +++ b/asmcomp/power/emit.mlp @@ -486,7 +486,7 @@ module BR = Branch_relaxation.Make (struct size 3 (2 + tocload_size()) (2 + tocload_size()) | Lop(Iextcall { alloc = false; _}) -> size 1 2 2 | Lop(Istackoffset _) -> 1 - | Lop(Iload(chunk, addr)) -> + | Lop(Iload(chunk, addr, _mut)) -> if chunk = Byte_signed then load_store_size addr + 1 else load_store_size addr @@ -759,7 +759,7 @@ let emit_instr i = | Lop(Istackoffset n) -> ` addi 1, 1, {emit_int (-n)}\n`; adjust_stack_offset n - | Lop(Iload(chunk, addr)) -> + | Lop(Iload(chunk, addr, _mut)) -> let loadinstr = match chunk with | Byte_unsigned -> "lbz" diff --git a/asmcomp/power/proc.ml b/asmcomp/power/proc.ml index 1ad600ab7c..0ca32f5745 100644 --- a/asmcomp/power/proc.ml +++ b/asmcomp/power/proc.ml @@ -328,17 +328,6 @@ let max_register_pressure = function Iextcall _ -> [| 14; 18 |] | _ -> [| 23; 30 |] -(* Pure operations (without any side effect besides updating their result - registers). *) - -let op_is_pure = function - | Icall_ind | Icall_imm _ | Itailcall_ind | Itailcall_imm _ - | Iextcall _ | Istackoffset _ | Istore _ | Ialloc _ - | Iintop(Icheckbound) | Iintop_imm(Icheckbound, _) | Iopaque -> false - | Ispecific(Imultaddf | Imultsubf) -> true - | Ispecific _ -> false - | _ -> true - (* Layout of the stack *) (* See [reserved_stack_space] in emit.mlp. *) diff --git a/asmcomp/power/scheduling.ml b/asmcomp/power/scheduling.ml index dcbfca79f0..8438a7d00f 100644 --- a/asmcomp/power/scheduling.ml +++ b/asmcomp/power/scheduling.ml @@ -26,7 +26,7 @@ inherit Schedgen.scheduler_generic method oper_latency = function Ireload -> 2 - | Iload(_, _) -> 2 + | Iload(_, _, _) -> 2 | Iconst_float _ -> 2 (* turned into a load *) | Iconst_symbol _ -> 1 | Iintop(Imul | Imulh) -> 9 @@ -46,7 +46,7 @@ method! reload_retaddr_latency = 12 method oper_issue_cycles = function Iconst_float _ | Iconst_symbol _ -> 2 - | Iload(_, Ibased(_, _)) -> 2 + | Iload(_, Ibased(_, _), _) -> 2 | Istore(_, Ibased(_, _), _) -> 2 | Ialloc _ -> 4 | Iintop(Imod) -> 40 (* assuming full stall *) diff --git a/asmcomp/printmach.ml b/asmcomp/printmach.ml index d3d5037725..fd7da21a69 100644 --- a/asmcomp/printmach.ml +++ b/asmcomp/printmach.ml @@ -124,9 +124,12 @@ let operation op arg ppf res = (if alloc then "" else " (noalloc)") | Istackoffset n -> fprintf ppf "offset stack %i" n - | Iload(chunk, addr) -> + | Iload(chunk, addr, Immutable) -> fprintf ppf "%s[%a]" (Printcmm.chunk chunk) (Arch.print_addressing reg addr) arg + | Iload(chunk, addr, Mutable) -> + fprintf ppf "%s mut[%a]" + (Printcmm.chunk chunk) (Arch.print_addressing reg addr) arg | Istore(chunk, addr, is_assign) -> fprintf ppf "%s[%a] := %a %s" (Printcmm.chunk chunk) diff --git a/asmcomp/proc.mli b/asmcomp/proc.mli index a92b1e9c91..c1692c069d 100644 --- a/asmcomp/proc.mli +++ b/asmcomp/proc.mli @@ -58,9 +58,6 @@ val destroyed_at_reloadretaddr : Reg.t array (* Volatile registers: those that change value when read *) val regs_are_volatile: Reg.t array -> bool -(* Pure operations *) -val op_is_pure: Mach.operation -> bool - (* Info for laying out the stack frame *) val frame_required : Mach.fundecl -> bool diff --git a/asmcomp/riscv/arch.ml b/asmcomp/riscv/arch.ml index 415c479258..3c4bb94331 100644 --- a/asmcomp/riscv/arch.ml +++ b/asmcomp/riscv/arch.ml @@ -82,3 +82,11 @@ let print_specific_operation printreg op ppf arg = | Imultsubf true -> fprintf ppf "-f (%a *f %a -f %a)" printreg arg.(0) printreg arg.(1) printreg arg.(2) + +(* Specific operations that are pure *) + +let operation_is_pure _ = true + +(* Specific operations that can raise *) + +let operation_can_raise _ = false diff --git a/asmcomp/riscv/emit.mlp b/asmcomp/riscv/emit.mlp index 20d55b0cf3..b1ddf87b79 100644 --- a/asmcomp/riscv/emit.mlp +++ b/asmcomp/riscv/emit.mlp @@ -338,10 +338,10 @@ let emit_instr i = assert (n mod 16 = 0); emit_stack_adjustment (-n); stack_offset := !stack_offset + n - | Lop(Iload(Single, Iindexed ofs)) -> + | Lop(Iload(Single, Iindexed ofs, _mut)) -> ` flw {emit_reg i.res.(0)}, {emit_int ofs}({emit_reg i.arg.(0)})\n`; ` fcvt.d.s {emit_reg i.res.(0)}, {emit_reg i.res.(0)}\n` - | Lop(Iload(chunk, Iindexed ofs)) -> + | Lop(Iload(chunk, Iindexed ofs, _mut)) -> let instr = match chunk with | Byte_unsigned -> "lbu" diff --git a/asmcomp/riscv/proc.ml b/asmcomp/riscv/proc.ml index 89b960241b..1dfa663a60 100644 --- a/asmcomp/riscv/proc.ml +++ b/asmcomp/riscv/proc.ml @@ -274,16 +274,6 @@ let max_register_pressure = function | Iextcall _ -> [| 9; 12 |] | _ -> [| 23; 30 |] -(* Pure operations (without any side effect besides updating their result - registers). *) - -let op_is_pure = function - | Icall_ind | Icall_imm _ | Itailcall_ind | Itailcall_imm _ - | Iextcall _ | Istackoffset _ | Istore _ | Ialloc _ - | Iintop(Icheckbound) | Iintop_imm(Icheckbound, _) -> false - | Ispecific(Imultaddf _ | Imultsubf _) -> true - | _ -> true - (* Layout of the stack *) let frame_required fd = diff --git a/asmcomp/s390x/arch.ml b/asmcomp/s390x/arch.ml index a6353fdf98..d755a6de8d 100644 --- a/asmcomp/s390x/arch.ml +++ b/asmcomp/s390x/arch.ml @@ -87,3 +87,11 @@ let print_specific_operation printreg op ppf arg = | Imultsubf -> fprintf ppf "%a *f %a -f %a" printreg arg.(0) printreg arg.(1) printreg arg.(2) + +(* Specific operations that are pure *) + +let operation_is_pure _ = true + +(* Specific operations that can raise *) + +let operation_can_raise _ = false diff --git a/asmcomp/s390x/emit.mlp b/asmcomp/s390x/emit.mlp index 2989c74c0b..8bf18ac566 100644 --- a/asmcomp/s390x/emit.mlp +++ b/asmcomp/s390x/emit.mlp @@ -395,7 +395,7 @@ let emit_instr i = emit_stack_adjust n; stack_offset := !stack_offset + n - | Lop(Iload(chunk, addr)) -> + | Lop(Iload(chunk, addr, _mut)) -> let loadinstr = match chunk with Byte_unsigned -> "llgc" diff --git a/asmcomp/s390x/proc.ml b/asmcomp/s390x/proc.ml index e928edae2e..89862dcc5f 100644 --- a/asmcomp/s390x/proc.ml +++ b/asmcomp/s390x/proc.ml @@ -219,16 +219,6 @@ let max_register_pressure = function Iextcall _ -> [| 4; 7 |] | _ -> [| 9; 15 |] -(* Pure operations (without any side effect besides updating their result - registers). *) - -let op_is_pure = function - | Icall_ind | Icall_imm _ | Itailcall_ind | Itailcall_imm _ - | Iextcall _ | Istackoffset _ | Istore _ | Ialloc _ - | Iintop(Icheckbound) | Iintop_imm(Icheckbound, _) | Iopaque -> false - | Ispecific(Imultaddf | Imultsubf) -> true - | _ -> true - (* Layout of the stack *) let frame_required fd = diff --git a/asmcomp/s390x/scheduling.ml b/asmcomp/s390x/scheduling.ml index a766d6a34f..cca0fb83bc 100644 --- a/asmcomp/s390x/scheduling.ml +++ b/asmcomp/s390x/scheduling.ml @@ -35,7 +35,7 @@ inherit Schedgen.scheduler_generic method oper_latency = function Ireload -> 4 - | Iload(_, _) -> 4 + | Iload(_, _, _) -> 4 | Iconst_float _ -> 4 (* turned into a load *) | Iintop(Imul) -> 10 | Iintop_imm(Imul, _) -> 10 diff --git a/asmcomp/schedgen.ml b/asmcomp/schedgen.ml index f33078250d..8125b73d9c 100644 --- a/asmcomp/schedgen.ml +++ b/asmcomp/schedgen.ml @@ -135,7 +135,7 @@ let rec remove_instr node = function (* We treat Lreloadretaddr as a word-sized load *) -let some_load = (Iload(Cmm.Word_int, Arch.identity_addressing)) +let some_load = (Iload(Cmm.Word_int, Arch.identity_addressing, Mutable)) (* The generic scheduler *) @@ -182,7 +182,7 @@ method is_store = function | _ -> false method is_load = function - Iload(_, _) -> true + Iload(_, _, _) -> true | _ -> false method is_checkbound = function diff --git a/asmcomp/selectgen.ml b/asmcomp/selectgen.ml index a4c1562c1e..bcb7244b22 100644 --- a/asmcomp/selectgen.ml +++ b/asmcomp/selectgen.ml @@ -478,9 +478,9 @@ method select_operation op args _dbg = (Icall_ind, args) | (Cextcall(func, ty_res, ty_args, alloc), _) -> Iextcall { func; ty_res; ty_args; alloc; }, args - | (Cload (chunk, _mut), [arg]) -> + | (Cload (chunk, mut), [arg]) -> let (addr, eloc) = self#select_addressing chunk arg in - (Iload(chunk, addr), [eloc]) + (Iload(chunk, addr, mut), [eloc]) | (Cstore (chunk, init), [arg1; arg2]) -> let (addr, eloc) = self#select_addressing chunk arg1 in let is_assign = diff --git a/asmcomp/spill.ml b/asmcomp/spill.ml index 578b26aec6..bfba2650f8 100644 --- a/asmcomp/spill.ml +++ b/asmcomp/spill.ml @@ -301,17 +301,13 @@ let rec spill i finally = let before1 = Reg.diff_set_array after i.res in (instr_cons i.desc i.arg i.res new_next, Reg.add_set_array before1 i.res) - | Iop _ -> + | Iop op -> let (new_next, after) = spill i.next finally in let before1 = Reg.diff_set_array after i.res in let before = - match i.desc with - Iop(Icall_ind) | Iop(Icall_imm _) | Iop(Iextcall _) | Iop(Ialloc _) - | Iop (Iprobe _) - | Iop(Iintop (Icheckbound)) | Iop(Iintop_imm(Icheckbound, _)) -> - Reg.Set.union before1 !spill_at_raise - | _ -> - before1 in + if operation_can_raise op + then Reg.Set.union before1 !spill_at_raise + else before1 in (instr_cons_debug i.desc i.arg i.res i.dbg (add_spills (Reg.inter_set_array after i.res) new_next), before) diff --git a/configure b/configure index 15b0ab3d4c..628adf7fdc 100755 --- a/configure +++ b/configure @@ -710,6 +710,7 @@ cmm_invariants flambda_invariants flambda2 flambda +cpp_mangling frame_pointers profinfo_width profinfo @@ -839,6 +840,7 @@ enable_bigarray_lib enable_ocamldoc enable_ocamltest enable_frame_pointers +enable_cpp_mangling enable_naked_pointers enable_naked_pointers_checker enable_spacetime @@ -1511,6 +1513,7 @@ Optional Features: --disable-ocamldoc do not build the ocamldoc documentation system --disable-ocamltest do not build the ocamltest driver --enable-frame-pointers use frame pointers in runtime and generated code + --enable-cpp-mangling use cpp mangling for exported symbols --disable-naked-pointers do not allow naked pointers --enable-naked-pointers-checker @@ -2882,6 +2885,7 @@ VERSION=4.12.0 + ## Generated files @@ -3129,6 +3133,12 @@ if test "${enable_frame_pointers+set}" = set; then : fi +# Check whether --enable-cpp-mangling was given. +if test "${enable_cpp_mangling+set}" = set; then : + enableval=$enable_cpp_mangling; +fi + + # Check whether --enable-naked-pointers was given. if test "${enable_naked_pointers+set}" = set; then : enableval=$enable_naked_pointers; @@ -16817,6 +16827,16 @@ $as_echo "$as_me: not using frame pointers" >&6;} frame_pointers=false fi +## CPP mangling + +if test x"$enable_cpp_mangling" = "xyes"; then : + cpp_mangling=true + $as_echo "#define WITH_CPP_MANGLING 1" >>confdefs.h + +else + cpp_mangling=false +fi + ## No naked pointers if test x"$enable_naked_pointers" = "xno" ; then : diff --git a/configure.ac b/configure.ac index 914d6069a7..0cebbc8a26 100644 --- a/configure.ac +++ b/configure.ac @@ -152,6 +152,7 @@ AC_SUBST([install_source_artifacts]) AC_SUBST([profinfo]) AC_SUBST([profinfo_width]) AC_SUBST([frame_pointers]) +AC_SUBST([cpp_mangling]) AC_SUBST([flambda]) AC_SUBST([flambda2]) AC_SUBST([flambda_invariants]) @@ -278,6 +279,10 @@ AC_ARG_ENABLE([frame-pointers], [AS_HELP_STRING([--enable-frame-pointers], [use frame pointers in runtime and generated code])]) +AC_ARG_ENABLE([cpp-mangling], + [AS_HELP_STRING([--enable-cpp-mangling], + [use cpp mangling for exported symbols])]) + AC_ARG_ENABLE([naked-pointers], [AS_HELP_STRING([--disable-naked-pointers], [do not allow naked pointers])]) @@ -1738,6 +1743,13 @@ AS_IF([test x"$enable_frame_pointers" = "xyes"], [AC_MSG_NOTICE([not using frame pointers]) frame_pointers=false]) +## CPP mangling + +AS_IF([test x"$enable_cpp_mangling" = "xyes"], + [cpp_mangling=true + AC_DEFINE([WITH_CPP_MANGLING])], + [cpp_mangling=false]) + ## No naked pointers AS_IF([test x"$enable_naked_pointers" = "xno" ], diff --git a/driver/maindriver.ml b/driver/maindriver.ml index 4def9fef8f..1d1f0af82d 100644 --- a/driver/maindriver.ml +++ b/driver/maindriver.ml @@ -63,7 +63,8 @@ let main argv ppf = are incompatible with -pack, -a, -output-obj" (String.concat "|" (P.available_pass_names ~filter:(fun _ -> true) ~native:false)) - | Some (P.Scheduling | P.Simplify_cfg | P.Emit) -> assert false (* native only *) + | Some (P.Scheduling | P.Simplify_cfg | P.Emit | P.Selection) -> + assert false (* native only *) end; if !make_archive then begin Compmisc.init_path (); diff --git a/driver/optmaindriver.ml b/driver/optmaindriver.ml index c66774515e..d176b1418d 100644 --- a/driver/optmaindriver.ml +++ b/driver/optmaindriver.ml @@ -76,7 +76,7 @@ let main argv ppf = Compenv.fatal "Please specify at most one of -pack, -a, -shared, -c, \ -output-obj"; | Some ((P.Parsing | P.Typing | P.Scheduling - | P.Simplify_cfg | P.Emit) as p) -> + | P.Simplify_cfg | P.Emit | P.Selection) as p) -> assert (P.is_compilation_pass p); Printf.ksprintf Compenv.fatal "Options -i and -stop-after (%s) \ diff --git a/runtime/caml/signals.h b/runtime/caml/signals.h index 3ff152c269..285dbd7feb 100644 --- a/runtime/caml/signals.h +++ b/runtime/caml/signals.h @@ -87,7 +87,7 @@ value caml_do_pending_actions_exn (void); value caml_process_pending_actions_with_root (value extra_root); // raises value caml_process_pending_actions_with_root_exn (value extra_root); int caml_set_signal_action(int signo, int action); -CAMLextern void caml_setup_stack_overflow_detection(void); +CAMLextern int caml_setup_stack_overflow_detection(void); CAMLextern void (*caml_enter_blocking_section_hook)(void); CAMLextern void (*caml_leave_blocking_section_hook)(void); diff --git a/runtime/signals_byt.c b/runtime/signals_byt.c index 2183142da1..38eb5e3a47 100644 --- a/runtime/signals_byt.c +++ b/runtime/signals_byt.c @@ -81,4 +81,4 @@ int caml_set_signal_action(int signo, int action) return 0; } -CAMLexport void caml_setup_stack_overflow_detection(void) {} +CAMLexport int caml_setup_stack_overflow_detection(void) { return 0; } diff --git a/runtime/signals_nat.c b/runtime/signals_nat.c index 1be1b45d42..ca86956783 100644 --- a/runtime/signals_nat.c +++ b/runtime/signals_nat.c @@ -174,8 +174,6 @@ DECLARE_SIGNAL_HANDLER(trap_handler) #error "CONTEXT_SP is required if HAS_STACK_OVERFLOW_DETECTION is defined" #endif -static char sig_alt_stack[SIGSTKSZ]; - /* Code compiled with ocamlopt never accesses more than EXTRA_STACK bytes below the stack pointer. */ #define EXTRA_STACK 256 @@ -269,28 +267,33 @@ void caml_init_signals(void) #endif #ifdef HAS_STACK_OVERFLOW_DETECTION - { - stack_t stk; + if (caml_setup_stack_overflow_detection() != -1) { struct sigaction act; - stk.ss_sp = sig_alt_stack; - stk.ss_size = SIGSTKSZ; - stk.ss_flags = 0; SET_SIGACT(act, segv_handler); act.sa_flags |= SA_ONSTACK | SA_NODEFER; sigemptyset(&act.sa_mask); - if (sigaltstack(&stk, NULL) == 0) { sigaction(SIGSEGV, &act, NULL); } + sigaction(SIGSEGV, &act, NULL); } #endif } -CAMLexport void caml_setup_stack_overflow_detection(void) +/* Allocate and select an alternate stack for handling signals, + especially SIGSEGV signals. + Each thread needs its own alternate stack. + The alternate stack used to be statically-allocated for the main thread, + but this is incompatible with Glibc 2.34 and newer, where SIGSTKSZ + may not be a compile-time constant (issue #10250). */ + +CAMLexport int caml_setup_stack_overflow_detection(void) { #ifdef HAS_STACK_OVERFLOW_DETECTION stack_t stk; stk.ss_sp = malloc(SIGSTKSZ); + if (stk.ss_sp == NULL) return -1; stk.ss_size = SIGSTKSZ; stk.ss_flags = 0; - if (stk.ss_sp) - sigaltstack(&stk, NULL); + return sigaltstack(&stk, NULL); +#else + return 0; #endif } diff --git a/testsuite/tests/asmcomp/try_checkbound.ml b/testsuite/tests/asmcomp/try_checkbound.ml new file mode 100644 index 0000000000..8dd980ce15 --- /dev/null +++ b/testsuite/tests/asmcomp/try_checkbound.ml @@ -0,0 +1,12 @@ +(* TEST *) + +(* See PR#10339 *) + +let access (a: string array) n = + try + ignore (a.(n)); -1 + with _ -> + n + +let _ = + assert (access [||] 1 = 1) diff --git a/testsuite/tests/tool-ocamlopt-save-ir/save_ir_after_typing.compilers.reference b/testsuite/tests/tool-ocamlopt-save-ir/save_ir_after_typing.compilers.reference index a562e51d3e..ed417eb204 100644 --- a/testsuite/tests/tool-ocamlopt-save-ir/save_ir_after_typing.compilers.reference +++ b/testsuite/tests/tool-ocamlopt-save-ir/save_ir_after_typing.compilers.reference @@ -1 +1 @@ -wrong argument 'typing'; option '-save-ir-after' expects one of: scheduling simplify_cfg. +wrong argument 'typing'; option '-save-ir-after' expects one of: scheduling simplify_cfg selection. diff --git a/utils/Makefile b/utils/Makefile index 3e268dd315..01a09c146d 100644 --- a/utils/Makefile +++ b/utils/Makefile @@ -86,6 +86,7 @@ config.ml: config.mlp $(ROOTDIR)/Makefile.config Makefile $(call SUBST,SYSTHREAD_SUPPORT) \ $(call SUBST,TARGET) \ $(call SUBST,WITH_FRAME_POINTERS) \ + $(call SUBST,WITH_CPP_MANGLING) \ $(call SUBST,WITH_PROFINFO) \ $(call SUBST,FLAT_FLOAT_ARRAY) \ $(call SUBST,FUNCTION_SECTIONS) \ diff --git a/utils/clflags.ml b/utils/clflags.ml index 42ab66a20e..a27dfcda1e 100644 --- a/utils/clflags.ml +++ b/utils/clflags.ml @@ -546,7 +546,7 @@ module Compiler_pass = struct - the manpages in man/ocaml{c,opt}.m - the manual manual/manual/cmds/unified-options.etex *) - type t = Parsing | Typing | Scheduling | Emit | Simplify_cfg + type t = Parsing | Typing | Scheduling | Emit | Simplify_cfg | Selection let to_string = function | Parsing -> "parsing" @@ -554,6 +554,7 @@ module Compiler_pass = struct | Scheduling -> "scheduling" | Emit -> "emit" | Simplify_cfg -> "simplify_cfg" + | Selection -> "selection" let of_string = function | "parsing" -> Some Parsing @@ -561,11 +562,13 @@ module Compiler_pass = struct | "scheduling" -> Some Scheduling | "emit" -> Some Emit | "simplify_cfg" -> Some Simplify_cfg + | "selection" -> Some Selection | _ -> None let rank = function | Parsing -> 0 | Typing -> 1 + | Selection -> 20 | Simplify_cfg -> 49 | Scheduling -> 50 | Emit -> 60 @@ -576,19 +579,22 @@ module Compiler_pass = struct Scheduling; Emit; Simplify_cfg; + Selection; ] let is_compilation_pass _ = true let is_native_only = function | Scheduling -> true | Emit -> true | Simplify_cfg -> true + | Selection -> true | Parsing | Typing -> false let enabled is_native t = not (is_native_only t) || is_native let can_save_ir_after = function | Scheduling -> true | Simplify_cfg -> true - | _ -> false + | Selection -> true + | Parsing | Typing | Emit -> false let available_pass_names ~filter ~native = passes @@ -603,6 +609,7 @@ module Compiler_pass = struct match t with | Scheduling -> prefix ^ Compiler_ir.(extension Linear) | Simplify_cfg -> prefix ^ Compiler_ir.(extension Cfg) + | Selection -> prefix ^ Compiler_ir.(extension Cfg) ^ "-sel" | Emit | Parsing | Typing -> Misc.fatal_error "Not supported" let of_input_filename name = diff --git a/utils/clflags.mli b/utils/clflags.mli index a346986872..2ba3e21a53 100644 --- a/utils/clflags.mli +++ b/utils/clflags.mli @@ -254,7 +254,7 @@ module Compiler_ir : sig end module Compiler_pass : sig - type t = Parsing | Typing | Scheduling | Emit | Simplify_cfg + type t = Parsing | Typing | Scheduling | Emit | Simplify_cfg | Selection val of_string : string -> t option val to_string : t -> string val is_compilation_pass : t -> bool diff --git a/utils/config.mli b/utils/config.mli index b3f38ae39e..a2c5967e85 100644 --- a/utils/config.mli +++ b/utils/config.mli @@ -164,6 +164,9 @@ val asm_cfi_supported: bool val with_frame_pointers : bool (** Whether assembler should maintain frame pointers *) +val with_cpp_mangling : bool +(** Whether symbol names should be following the cpp mangling convention *) + val ext_obj: string (** Extension for object files, e.g. [.o] under Unix. *) diff --git a/utils/config.mlp b/utils/config.mlp index 96c92b0402..83fe13dafb 100644 --- a/utils/config.mlp +++ b/utils/config.mlp @@ -135,6 +135,7 @@ let system = "%%SYSTEM%%" let asm = "%%ASM%%" let asm_cfi_supported = %%ASM_CFI_SUPPORTED%% let with_frame_pointers = %%WITH_FRAME_POINTERS%% +let with_cpp_mangling = %%WITH_CPP_MANGLING%% let profinfo = %%WITH_PROFINFO%% let profinfo_width = %%PROFINFO_WIDTH%% @@ -190,6 +191,7 @@ let configuration_variables = p "asm" asm; p_bool "asm_cfi_supported" asm_cfi_supported; p_bool "with_frame_pointers" with_frame_pointers; + p_bool "with_cpp_mangling" with_cpp_mangling; p "ext_exe" ext_exe; p "ext_obj" ext_obj; p "ext_asm" ext_asm; diff --git a/utils/strongly_connected_components.ml b/utils/strongly_connected_components.ml index a11f6987f4..52d4666cc1 100644 --- a/utils/strongly_connected_components.ml +++ b/utils/strongly_connected_components.ml @@ -107,8 +107,52 @@ end = struct } end +module type Id = sig + type t + + module Set : sig + type elt = t + + type t + + val empty : t + + val add : elt -> t -> t + + val elements : t -> elt list + + val iter : (elt -> unit) -> t -> unit + + val fold : (elt -> 'a -> 'a) -> t -> 'a -> 'a + end + + module Map : sig + type key = t + + type 'a t + + val empty : _ t + + val add : key -> 'a -> 'a t -> 'a t + + val cardinal : _ t -> int + + val bindings : 'a t -> (key * 'a) list + + val find : key -> 'a t -> 'a + + val iter : (key -> 'a -> unit) -> 'a t -> unit + + val fold : (key -> 'a -> 'b -> 'b) -> 'a t -> 'b -> 'b + + val mem : key -> 'a t -> bool + end + + val print : Format.formatter -> t -> unit +end + module type S = sig - module Id : Identifiable.S + module Id : Id type directed_graph = Id.Set.t Id.Map.t @@ -123,7 +167,7 @@ module type S = sig val component_graph : directed_graph -> (component * int list) array end -module Make (Id : Identifiable.S) = struct +module Make (Id : Id) = struct type directed_graph = Id.Set.t Id.Map.t type component = @@ -176,6 +220,11 @@ module Make (Id : Identifiable.S) = struct in { back; forth }, integer_graph + let rec int_list_mem x xs = + match xs with + | [] -> false + | x' :: xs -> if Int.equal x x' then true else int_list_mem x xs + let component_graph graph = let numbering, integer_graph = number graph in let { Kosaraju. sorted_connected_components; @@ -186,7 +235,7 @@ module Make (Id : Identifiable.S) = struct match nodes with | [] -> assert false | [node] -> - (if List.mem node integer_graph.(node) + (if int_list_mem node integer_graph.(node) then Has_loop [numbering.forth.(node)] else No_loop numbering.forth.(node)), component_edges.(component) diff --git a/utils/strongly_connected_components.mli b/utils/strongly_connected_components.mli index e700952792..ecbae3df0b 100644 --- a/utils/strongly_connected_components.mli +++ b/utils/strongly_connected_components.mli @@ -21,8 +21,52 @@ *) +module type Id = sig + type t + + module Set : sig + type elt = t + + type t + + val empty : t + + val add : elt -> t -> t + + val elements : t -> elt list + + val iter : (elt -> unit) -> t -> unit + + val fold : (elt -> 'a -> 'a) -> t -> 'a -> 'a + end + + module Map : sig + type key = t + + type 'a t + + val empty : _ t + + val add : key -> 'a -> 'a t -> 'a t + + val cardinal : _ t -> int + + val bindings : 'a t -> (key * 'a) list + + val find : key -> 'a t -> 'a + + val iter : (key -> 'a -> unit) -> 'a t -> unit + + val fold : (key -> 'a -> 'b -> 'b) -> 'a t -> 'b -> 'b + + val mem : key -> 'a t -> bool + end + + val print : Format.formatter -> t -> unit +end + module type S = sig - module Id : Identifiable.S + module Id : Id type directed_graph = Id.Set.t Id.Map.t (** If (a -> set) belongs to the map, it means that there are edges @@ -40,4 +84,4 @@ module type S = sig val component_graph : directed_graph -> (component * int list) array end -module Make (Id : Identifiable.S) : S with module Id := Id +module Make (Id : Id) : S with module Id := Id diff --git a/utils/target_system.ml b/utils/target_system.ml index d9afee0c87..1ac62d8b35 100644 --- a/utils/target_system.ml +++ b/utils/target_system.ml @@ -20,6 +20,18 @@ let architecture () : architecture = | "riscv" -> Riscv | arch -> Misc.fatal_errorf "Unknown architecture `%s'" arch +let is_64_bit = + match architecture () with + | X86_64 + | AArch64 + | POWER + | Z + | Riscv -> true + | IA32 + | ARM -> false + +let is_32_bit = not is_64_bit + type derived_system = | Linux | MinGW_32 diff --git a/utils/target_system.mli b/utils/target_system.mli index 458fded091..0bbbdefec4 100644 --- a/utils/target_system.mli +++ b/utils/target_system.mli @@ -9,6 +9,10 @@ type architecture = val architecture : unit -> architecture +val is_64_bit : bool + +val is_32_bit : bool + type derived_system = | Linux | MinGW_32