Skip to content

Commit

Permalink
Squashed 'ocaml/' changes from da6ff04d2..fe8a98b0c
Browse files Browse the repository at this point in the history
fe8a98b0c flambda-backend: Save Mach as Cfg after Selection (#624)
2b205d886 flambda-backend: Clean up algorithms (#611)
524f0b435 flambda-backend: Initial refactoring of To_cmm (#619)
0bf75de86 flambda-backend: Refactor and correct the "is pure" and "can raise" (port upstream PR#10354 and PR#10387) (#555)
d234bfdbe flambda-backend: Cpp mangling is now a configuration option (#614)
20fc614bf flambda-backend: Check that stack frames are not too large (#10085) (#561)
5fc2e9503 flambda-backend: Allow CSE of immutable loads across stores (port upstream PR#9562) (#562)
2a650deec flambda-backend: Backport commit fc9534746bf5d08a4c109f22e344cf49d5d46d54 from trunk (#584)
31651b87e flambda-backend: Improved ARM64 code generation (port upstream PR#9937) (#556)
f0b6d68e8 flambda-backend: Simplify processing and remove dead code (error paths) in asmlink (port upstream PR#9943) (#557)
90c674687 flambda-backend: Improve code-generation for inlined comparisons (port upstream PR#10228) (#563)

git-subtree-dir: ocaml
git-subtree-split: fe8a98b0cd38bf1872b8faf3b93542caebabad18
  • Loading branch information
lpw25 committed May 19, 2022
1 parent d491210 commit 076ba4d
Show file tree
Hide file tree
Showing 65 changed files with 580 additions and 330 deletions.
11 changes: 10 additions & 1 deletion .depend
Original file line number Diff line number Diff line change
Expand Up @@ -2093,15 +2093,18 @@ asmcomp/CSEgen.cmo : \
asmcomp/proc.cmi \
asmcomp/mach.cmi \
asmcomp/cmm.cmi \
parsing/asttypes.cmi \
asmcomp/CSEgen.cmi
asmcomp/CSEgen.cmx : \
asmcomp/reg.cmx \
asmcomp/proc.cmx \
asmcomp/mach.cmx \
asmcomp/cmm.cmx \
parsing/asttypes.cmi \
asmcomp/CSEgen.cmi
asmcomp/CSEgen.cmi : \
asmcomp/mach.cmi
asmcomp/mach.cmi \
parsing/asttypes.cmi
asmcomp/afl_instrument.cmo : \
lambda/lambda.cmi \
asmcomp/cmm.cmi \
Expand Down Expand Up @@ -2146,6 +2149,7 @@ asmcomp/asmgen.cmo : \
asmcomp/linscan.cmi \
asmcomp/linearize.cmi \
file_formats/linear_format.cmi \
asmcomp/linear.cmi \
lambda/lambda.cmi \
asmcomp/interval.cmi \
asmcomp/interf.cmi \
Expand Down Expand Up @@ -2188,6 +2192,7 @@ asmcomp/asmgen.cmx : \
asmcomp/linscan.cmx \
asmcomp/linearize.cmx \
file_formats/linear_format.cmx \
asmcomp/linear.cmx \
lambda/lambda.cmx \
asmcomp/interval.cmx \
asmcomp/interf.cmx \
Expand All @@ -2211,6 +2216,7 @@ asmcomp/asmgen.cmx : \
asmcomp/asmgen.cmi
asmcomp/asmgen.cmi : \
lambda/lambda.cmi \
asmcomp/emitaux.cmi \
asmcomp/cmm.cmi \
middle_end/clambda.cmi \
middle_end/backend_intf.cmi
Expand Down Expand Up @@ -2719,6 +2725,7 @@ asmcomp/mach.cmo : \
lambda/debuginfo.cmi \
asmcomp/cmm.cmi \
middle_end/backend_var.cmi \
parsing/asttypes.cmi \
asmcomp/arch.cmo \
asmcomp/mach.cmi
asmcomp/mach.cmx : \
Expand All @@ -2729,6 +2736,7 @@ asmcomp/mach.cmx : \
lambda/debuginfo.cmx \
asmcomp/cmm.cmx \
middle_end/backend_var.cmx \
parsing/asttypes.cmi \
asmcomp/arch.cmx \
asmcomp/mach.cmi
asmcomp/mach.cmi : \
Expand All @@ -2738,6 +2746,7 @@ asmcomp/mach.cmi : \
lambda/debuginfo.cmi \
asmcomp/cmm.cmi \
middle_end/backend_var.cmi \
parsing/asttypes.cmi \
asmcomp/arch.cmo
asmcomp/printcmm.cmo : \
utils/targetint.cmi \
Expand Down
1 change: 1 addition & 0 deletions Makefile.config.in
Original file line number Diff line number Diff line change
Expand Up @@ -225,6 +225,7 @@ WITH_OCAMLDOC=@ocamldoc@
WITH_OCAMLTEST=@ocamltest@
ASM_CFI_SUPPORTED=@asm_cfi_supported@
WITH_FRAME_POINTERS=@frame_pointers@
WITH_CPP_MANGLING=@cpp_mangling@
WITH_PROFINFO=@profinfo@
PROFINFO_WIDTH=@profinfo_width@
WITH_FPIC=@fpic@
Expand Down
37 changes: 19 additions & 18 deletions asmcomp/CSEgen.ml
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ type valnum = int
type op_class =
| Op_pure (* pure arithmetic, produce one or several result *)
| Op_checkbound (* checkbound-style: no result, can raise an exn *)
| Op_load (* memory load *)
| Op_load of Asttypes.mutable_flag (* memory load *)
| Op_store of bool (* memory store, false = init, true = assign *)
| Op_other (* anything else that does not allocate nor store in memory *)

Expand All @@ -40,29 +40,30 @@ module Equations = struct
Map.Make(struct type t = rhs let compare = Stdlib.compare end)

type 'a t =
{ load_equations : 'a Rhs_map.t;
{ mutable_load_equations : 'a Rhs_map.t;
other_equations : 'a Rhs_map.t }

let empty =
{ load_equations = Rhs_map.empty;
{ mutable_load_equations = Rhs_map.empty;
other_equations = Rhs_map.empty }

let add op_class op v m =
match op_class with
| Op_load ->
{ m with load_equations = Rhs_map.add op v m.load_equations }
| Op_load Mutable ->
{ m with mutable_load_equations =
Rhs_map.add op v m.mutable_load_equations }
| _ ->
{ m with other_equations = Rhs_map.add op v m.other_equations }

let find op_class op m =
match op_class with
| Op_load ->
Rhs_map.find op m.load_equations
| Op_load Mutable ->
Rhs_map.find op m.mutable_load_equations
| _ ->
Rhs_map.find op m.other_equations

let remove_loads m =
{ load_equations = Rhs_map.empty;
let remove_mutable_loads m =
{ mutable_load_equations = Rhs_map.empty;
other_equations = m.other_equations }
end

Expand Down Expand Up @@ -190,8 +191,8 @@ let set_unknown_regs n rs =

(* Keep only the equations satisfying the given predicate. *)

let remove_load_numbering n =
{ n with num_eqs = Equations.remove_loads n.num_eqs }
let remove_mutable_load_numbering n =
{ n with num_eqs = Equations.remove_mutable_loads n.num_eqs }

(* Forget everything we know about registers of type [Addr]. *)

Expand Down Expand Up @@ -225,7 +226,7 @@ method class_of_operation op =
| Icall_ind | Icall_imm _ | Itailcall_ind | Itailcall_imm _
| Iextcall _ | Iprobe _ | Iopaque -> assert false (* treated specially *)
| Istackoffset _ -> Op_other
| Iload(_,_) -> Op_load
| Iload(_,_,mut) -> Op_load mut
| Istore(_,_,asg) -> Op_store asg
| Ialloc _ -> assert false (* treated specially *)
| Iintop(Icheckbound) -> Op_checkbound
Expand All @@ -246,11 +247,11 @@ method is_cheap_operation op =
| Iconst_int _ -> true
| _ -> false

(* Forget all equations involving memory loads. Performed after a
non-initializing store *)
(* Forget all equations involving mutable memory loads.
Performed after a non-initializing store *)

method private kill_loads n =
remove_load_numbering n
remove_mutable_load_numbering n

(* Perform CSE on the given instruction [i] and its successors.
[n] is the value numbering current at the beginning of [i]. *)
Expand Down Expand Up @@ -292,13 +293,13 @@ method private cse n i =
Moreover, allocation can trigger the asynchronous execution
of arbitrary Caml code (finalizer, signal handler, context
switch), which can contain non-initializing stores.
Hence, all equations over loads must be removed. *)
Hence, all equations over mutable loads must be removed. *)
let n1 = kill_addr_regs (self#kill_loads n) in
let n2 = set_unknown_regs n1 i.res in
{i with next = self#cse n2 i.next}
| Iop op ->
begin match self#class_of_operation op with
| (Op_pure | Op_checkbound | Op_load) as op_class ->
| (Op_pure | Op_checkbound | Op_load _) as op_class ->
let (n1, varg) = valnum_regs n i.arg in
let n2 = set_unknown_regs n1 (Proc.destroyed_at_oper i.desc) in
begin match find_equation op_class n1 (op, varg) with
Expand Down Expand Up @@ -336,7 +337,7 @@ method private cse n i =
{i with next = self#cse n2 i.next}
| Op_store true ->
(* A non-initializing store can invalidate
anything we know about prior loads. *)
anything we know about prior mutable loads. *)
let n1 = set_unknown_regs n (Proc.destroyed_at_oper i.desc) in
let n2 = set_unknown_regs n1 i.res in
let n3 = self#kill_loads n2 in
Expand Down
2 changes: 1 addition & 1 deletion asmcomp/CSEgen.mli
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
type op_class =
| Op_pure (* pure, produce one result *)
| Op_checkbound (* checkbound-style: no result, can raise an exn *)
| Op_load (* memory load *)
| Op_load of Asttypes.mutable_flag (* memory load *)
| Op_store of bool (* memory store, false = init, true = assign *)
| Op_other (* anything else that does not allocate nor store in memory *)

Expand Down
2 changes: 1 addition & 1 deletion asmcomp/amd64/CSE.ml
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ method! class_of_operation op =
| Ilea _ | Isextend32 | Izextend32 -> Op_pure
| Istore_int(_, _, is_asg) -> Op_store is_asg
| Ioffset_loc(_, _) -> Op_store true
| Ifloatarithmem _ | Ifloatsqrtf _ -> Op_load
| Ifloatarithmem _ | Ifloatsqrtf _ -> Op_load Mutable
| Ibswap _ | Isqrtf -> super#class_of_operation op
end
| _ -> super#class_of_operation op
Expand Down
13 changes: 13 additions & 0 deletions asmcomp/amd64/arch.ml
Original file line number Diff line number Diff line change
Expand Up @@ -134,7 +134,20 @@ let print_specific_operation printreg op ppf arg =
| Izextend32 ->
fprintf ppf "zextend32 %a" printreg arg.(0)

(* Are we using the Windows 64-bit ABI? *)

let win64 =
match Config.system with
| "win64" | "mingw64" | "cygwin" -> true
| _ -> false

(* Specific operations that are pure *)

let operation_is_pure = function
| Ilea _ | Ibswap _ | Isqrtf | Isextend32 | Izextend32 -> true
| Ifloatarithmem _ | Ifloatsqrtf _ -> true
| _ -> false

(* Specific operations that can raise *)

let operation_can_raise _ = false
2 changes: 1 addition & 1 deletion asmcomp/amd64/emit.mlp
Original file line number Diff line number Diff line change
Expand Up @@ -717,7 +717,7 @@ let emit_instr fallthrough i =
end
| Lop(Istackoffset n) ->
emit_stack_offset n
| Lop(Iload(chunk, addr)) ->
| Lop(Iload(chunk, addr, _mut)) ->
let dest = res i 0 in
begin match chunk with
| Word_int | Word_val ->
Expand Down
13 changes: 0 additions & 13 deletions asmcomp/amd64/proc.ml
Original file line number Diff line number Diff line change
Expand Up @@ -360,19 +360,6 @@ let max_register_pressure = function
if fp then [| 12; 15 |] else [| 13; 15 |]
| _ -> if fp then [| 12; 16 |] else [| 13; 16 |]

(* Pure operations (without any side effect besides updating their result
registers). *)

let op_is_pure = function
| Icall_ind | Icall_imm _ | Itailcall_ind | Itailcall_imm _
| Iextcall _ | Istackoffset _ | Istore _ | Ialloc _
| Iintop(Icheckbound) | Iintop_imm(Icheckbound, _) | Iopaque -> false
| Ispecific(Ilea _|Isextend32|Izextend32) -> true
| Ispecific _ -> false
| Iprobe _ | Iprobe_is_enabled _-> false
| Ibeginregion | Iendregion -> false
| _ -> true

(* Layout of the stack frame *)

let frame_required fd =
Expand Down
12 changes: 12 additions & 0 deletions asmcomp/arm/arch.ml
Original file line number Diff line number Diff line change
Expand Up @@ -262,3 +262,15 @@ let is_immediate n =
s := !s + 2
done;
!s <= m

(* Specific operations that are pure *)

let operation_is_pure = function
| Ishiftcheckbound _ -> false
| _ -> true

(* Specific operations that can raise *)

let operation_can_raise = function
| Ishiftcheckbound _ -> true
| _ -> false
6 changes: 3 additions & 3 deletions asmcomp/arm/emit.mlp
Original file line number Diff line number Diff line change
Expand Up @@ -580,10 +580,10 @@ let emit_instr i =
let ninstr = emit_stack_adjustment (-n) in
stack_offset := !stack_offset + n;
ninstr
| Lop(Iload(Single, addr)) when !fpu >= VFPv2 ->
| Lop(Iload(Single, addr, _mut)) when !fpu >= VFPv2 ->
` flds s14, {emit_addressing addr i.arg 0}\n`;
` fcvtds {emit_reg i.res.(0)}, s14\n`; 2
| Lop(Iload(Double, addr)) when !fpu = Soft ->
| Lop(Iload(Double, addr, _mut)) when !fpu = Soft ->
(* Use LDM or LDRD if possible *)
begin match i.res.(0), i.res.(1), addr with
{loc = Reg rt}, {loc = Reg rt2}, Iindexed 0
Expand All @@ -602,7 +602,7 @@ let emit_instr i =
` ldr {emit_reg i.res.(0)}, {emit_addressing addr i.arg 0}\n`
end; 2
end
| Lop(Iload(size, addr)) ->
| Lop(Iload(size, addr, _mut)) ->
let r = i.res.(0) in
let instr =
match size with
Expand Down
15 changes: 3 additions & 12 deletions asmcomp/arm/proc.ml
Original file line number Diff line number Diff line change
Expand Up @@ -311,7 +311,8 @@ let destroyed_at_oper = function
| Iop(Iintop (Icomp _) | Iintop_imm(Icomp _, _))
when !arch >= ARMv8 && !thumb ->
[| phys_reg 3 |] (* r3 destroyed *)
| Iop(Iintoffloat | Ifloatofint | Iload(Single, _) | Istore(Single, _, _)) ->
| Iop(Iintoffloat | Ifloatofint
| Iload(Single, _, _) | Istore(Single, _, _)) ->
[| phys_reg 107 |] (* d7 (s14-s15) destroyed *)
| _ -> [||]

Expand All @@ -335,20 +336,10 @@ let max_register_pressure = function
| Ialloc _ -> if abi = EABI then [| 7; 0; 0 |] else [| 7; 8; 8 |]
| Iconst_symbol _ when !Clflags.pic_code -> [| 7; 16; 32 |]
| Iintoffloat | Ifloatofint
| Iload(Single, _) | Istore(Single, _, _) -> [| 9; 15; 31 |]
| Iload(Single, _, _) | Istore(Single, _, _) -> [| 9; 15; 31 |]
| Iintop Imulh when !arch < ARMv6 -> [| 8; 16; 32 |]
| _ -> [| 9; 16; 32 |]

(* Pure operations (without any side effect besides updating their result
registers). *)

let op_is_pure = function
| Icall_ind | Icall_imm _ | Itailcall_ind | Itailcall_imm _
| Iextcall _ | Istackoffset _ | Istore _ | Ialloc _
| Iintop(Icheckbound) | Iintop_imm(Icheckbound, _) | Iopaque
| Ispecific(Ishiftcheckbound _) -> false
| _ -> true

(* Layout of the stack *)

let frame_required fd =
Expand Down
2 changes: 1 addition & 1 deletion asmcomp/arm/scheduling.ml
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ method oper_latency = function
(* Loads have a latency of two cycles in general *)
Iconst_symbol _
| Iconst_float _
| Iload(_, _)
| Iload(_, _, _)
| Ireload
| Ifloatofint (* mcr/mrc count as memory access *)
| Iintoffloat -> 2
Expand Down

0 comments on commit 076ba4d

Please sign in to comment.