Permalink
Browse files

Merge pull request #47 from ocamllabs/native

Native code backend for multicore
  • Loading branch information...
stedolan committed Jun 8, 2016
2 parents 31b4f3d + 185ad3a commit fc366191ff17fffa24aac34fad64c398d462af6d
Showing with 1,823 additions and 668 deletions.
  1. +0 −1 Makefile
  2. +1 −2 asmcomp/CSEgen.ml
  3. +113 −11 asmcomp/amd64/emit.mlp
  4. +7 −4 asmcomp/amd64/proc.ml
  5. +9 −0 asmcomp/closure.ml
  6. +0 −3 asmcomp/cmm.ml
  7. +0 −3 asmcomp/cmm.mli
  8. +0 −7 asmcomp/cmmgen.ml
  9. +5 −2 asmcomp/emitaux.ml
  10. +2 −0 asmcomp/linearize.ml
  11. +1 −0 asmcomp/linearize.mli
  12. +1 −5 asmcomp/mach.ml
  13. +1 −5 asmcomp/mach.mli
  14. +0 −3 asmcomp/printcmm.ml
  15. +1 −1 asmcomp/printlinear.ml
  16. +3 −7 asmcomp/printmach.ml
  17. +15 −0 asmcomp/reg.ml
  18. +1 −0 asmcomp/reg.mli
  19. +1 −0 asmcomp/schedgen.ml
  20. +5 −11 asmcomp/selectgen.ml
  21. +1 −1 asmcomp/spill.ml
  22. +551 −235 asmrun/amd64.S
  23. +17 −11 asmrun/backtrace.c
  24. +5 −2 asmrun/fail.c
  25. +12 −95 asmrun/frame_descriptors.c
  26. +5 −3 asmrun/frame_descriptors.h
  27. +7 −4 asmrun/signals_asm.c
  28. +8 −9 asmrun/stack.h
  29. +7 −1 asmrun/startup.c
  30. BIN boot/ocamlc
  31. BIN boot/ocamldep
  32. BIN boot/ocamllex
  33. +1 −1 bytecomp/bytegen.ml
  34. +1 −0 byterun/alloc.c
  35. +4 −4 byterun/backtrace.c
  36. +5 −5 byterun/callback.c
  37. +3 −0 byterun/config.h
  38. +6 −6 byterun/debugger.c
  39. +0 −5 byterun/domain.c
  40. +0 −7 byterun/domain.h
  41. +9 −7 byterun/domain_state.tbl
  42. +328 −141 byterun/fiber.c
  43. +9 −20 byterun/fiber.h
  44. +3 −1 byterun/gc_ctrl.c
  45. +2 −1 byterun/gc_ctrl.h
  46. +4 −4 byterun/instrtrace.c
  47. +21 −21 byterun/interp.c
  48. +21 −7 byterun/memory.c
  49. +7 −2 byterun/memory.h
  50. +6 −0 byterun/params.c
  51. +1 −0 byterun/params.h
  52. +19 −9 byterun/roots.c
  53. +1 −0 driver/compenv.ml
  54. +6 −0 driver/main_args.ml
  55. +1 −0 driver/main_args.mli
  56. +1 −0 driver/optmain.ml
  57. +66 −0 native-tests/Makefile
  58. +1 −0 native-tests/Status.txt
  59. +77 −0 native-tests/expected.out
  60. +3 −0 native-tests/test1.ml
  61. +3 −0 native-tests/test10.ml
  62. +20 −0 native-tests/test11.ml
  63. +11 −0 native-tests/test12.ml
  64. +12 −0 native-tests/test13.ml
  65. +15 −0 native-tests/test14.ml
  66. +2 −0 native-tests/test15.ml
  67. +21 −0 native-tests/test16.c
  68. +26 −0 native-tests/test16.ml
  69. +1 −0 native-tests/test16.mli
  70. +21 −0 native-tests/test17.c
  71. +28 −0 native-tests/test17.ml
  72. +1 −0 native-tests/test17.mli
  73. +8 −0 native-tests/test18.ml
  74. +15 −0 native-tests/test19.ml
  75. +6 −0 native-tests/test2.ml
  76. +14 −0 native-tests/test20.ml
  77. +4 −0 native-tests/test3.ml
  78. +9 −0 native-tests/test4.ml
  79. +12 −0 native-tests/test5.ml
  80. +22 −0 native-tests/test6.c
  81. +17 −0 native-tests/test6.ml
  82. +1 −0 native-tests/test6.mli
  83. +21 −0 native-tests/test7.c
  84. +29 −0 native-tests/test7.ml
  85. +1 −0 native-tests/test7.mli
  86. +21 −0 native-tests/test8.c
  87. +23 −0 native-tests/test8.ml
  88. +1 −0 native-tests/test8.mli
  89. +36 −0 native-tests/test9.c
  90. +21 −0 native-tests/test9.ml
  91. +3 −0 native-tests/test9.mli
  92. +2 −0 stdlib/obj.ml
  93. +3 −0 stdlib/obj.mli
  94. +1 −0 tools/ocamloptp.ml
  95. +1 −0 utils/clflags.ml
  96. +1 −0 utils/clflags.mli
  97. +4 −1 utils/config.mli
  98. +1 −0 utils/config.mlp
View
@@ -137,7 +137,6 @@ world:
# Compile also native code compiler and libraries, fast
world.opt:
$(error Multicore doesn't yet support native code compiler. `make world` will build the bytecode compiler)
$(MAKE) coldstart
$(MAKE) opt.opt
View
@@ -187,7 +187,6 @@ method class_of_operation op =
| Iconst_int _ | Iconst_float _ | Iconst_symbol _
| Iconst_blockheader _ -> Op_pure
| Icall_ind | Icall_imm _ | Itailcall_ind | Itailcall_imm _
| Iperform | Ireperform | Iresume_ind | Itail_resume_ind
| Iextcall _ -> assert false (* treated specially *)
| Istackoffset _ -> Op_other
| Iload(_,_) -> Op_load
@@ -227,7 +226,7 @@ method private cse n i =
as to the argument reg. *)
let n1 = set_move n i.arg.(0) i.res.(0) in
{i with next = self#cse n1 i.next}
| Iop (Icall_ind | Icall_imm _ | Iextcall _ | Iperform | Iresume_ind | Ireperform) ->
| Iop (Icall_ind | Icall_imm _ | Iextcall _) ->
(* For function calls and context switches, we should at least forget:
- equations involving memory loads, since the callee can
perform arbitrary memory stores;
View
@@ -34,6 +34,8 @@ let stack_offset = ref 0
(* Layout of the stack frame *)
let stack_threshold_size = Config.stack_threshold * 8 (* bytes *)
let frame_required () =
fp || !contains_calls || num_stack_slots.(0) > 0 || num_stack_slots.(1) > 0
@@ -414,8 +416,6 @@ let emit_instr fallthrough i =
output_epilogue begin fun () ->
` jmp *{emit_reg i.arg.(0)}\n`
end
| Lop(Iperform | Ireperform | Iresume_ind | Itail_resume_ind) ->
` /* this was a bad idea to begin with */`
| Lop(Itailcall_imm s) ->
if s = !function_name then
` jmp {emit_label !tailrec_entry_point}\n`
@@ -424,8 +424,20 @@ let emit_instr fallthrough i =
` {emit_jump s}\n`
end
end
| Lop(Iextcall(s, alloc)) ->
if alloc then begin
| Lop(Iextcall(s, alloc, stack_off)) ->
if stack_off > 0 then
begin
` leaq (%rsp), %r13\n`;
` leaq {emit_int stack_off}(%rsp), %r12\n`;
` {load_symbol_addr s}, %rax\n`;
` {emit_call "caml_c_call_stack_args"}\n`;
record_frame i.live i.dbg;
(* caml_c_call preserves the old value of %r15, but this may no longer
be the correct minor heap pointer, so we reload young_ptr *)
` {locate_domain_state ()}, %r15\n`;
` movq {domain_field Domainstate.Domain_young_ptr}(%r15), %r15\n`;
end
else if alloc then begin
` {load_symbol_addr s}, %rax\n`;
` {emit_call "caml_c_call"}\n`;
record_frame i.live i.dbg;
@@ -434,7 +446,14 @@ let emit_instr fallthrough i =
` {locate_domain_state ()}, %r15\n`;
` movq {domain_field Domainstate.Domain_young_ptr}(%r15), %r15\n`;
end else begin
` {emit_call s}\n`
` movq %rsp, %r10\n`;
` movq %r15, %r11\n`;
` {locate_domain_state ()}, %r11\n`;
` movq {domain_field Domainstate.Domain_system_sp}(%r11), %rsp\n`;
` pushq %r10\n`;
` {emit_call s}\n`;
` popq %r10\n`;
` movq %r10, %rsp\n`
end
| Lop(Istackoffset n) ->
if n < 0
@@ -653,7 +672,7 @@ let emit_instr fallthrough i =
if macosx then
` .const\n`
else if mingw64 || cygwin then
` .section .rdata,\"dr\"\n`
` .section .rdata,\"dr\"\n`
else
` .section .rodata\n`;
emit_align 4;
@@ -668,7 +687,10 @@ let emit_instr fallthrough i =
cfi_adjust_cfa_offset 8;
` pushq %r14\n`;
cfi_adjust_cfa_offset 8;
` movq %rsp, %r14\n`;
` movq %r15, %r14\n`;
` {locate_domain_state ()}, %r14\n`;
` movq {domain_field Domainstate.Domain_stack_high}(%r14), %r14\n`;
` subq %rsp, %r14\n`;
stack_offset := !stack_offset + 16
| Lpoptrap ->
` popq %r14\n`;
@@ -686,7 +708,10 @@ let emit_instr fallthrough i =
record_frame Reg.Set.empty i.dbg
| false, _
| true, Lambda.Raise_notrace ->
` movq %r14, %rsp\n`;
` movq %r15, %rsp\n`;
` {locate_domain_state ()}, %rsp\n`;
` movq {domain_field Domainstate.Domain_stack_high}(%rsp), %rsp\n`;
` subq %r14, %rsp\n`;
` popq %r14\n`;
` ret\n`
end
@@ -716,6 +741,39 @@ let emit_profile () =
| _ ->
() (*unsupported yet*)
type preproc_fun_result =
{max_stack_size : int;
contains_nontail_calls : bool;
contains_external_calls : bool}
let preproc_fun fun_body fun_name =
let rec proc_instr r a i =
if i.desc = Lend then r else
let upd_size r delta =
{r with max_stack_size = max r.max_stack_size (a+delta)}
in
let (r',a') = match i.desc with
| Lop (Istackoffset n) -> (upd_size r n, a+n)
| Lpushtrap -> (upd_size r 16, a+16)
| Lpoptrap -> (r, a-16)
| Lop (Iextcall _ | Ialloc _ | Iintop Icheckbound
| Iintop_imm (Icheckbound, _)) ->
({r with contains_external_calls = true}, a)
| Lop (Icall_ind | Icall_imm _ ) ->
({r with contains_nontail_calls = true}, a)
| _ -> (r, a)
in
proc_instr r' a' i.next
in
let fs = frame_size () in
let r =
{max_stack_size = fs;
contains_nontail_calls = false;
contains_external_calls = false}
in
proc_instr r fs fun_body
(* Emission of a function declaration *)
let fundecl fundecl =
@@ -752,9 +810,53 @@ let fundecl fundecl =
` subq ${emit_int n}, %rsp\n`;
cfi_adjust_cfa_offset n;
end;
`{emit_label !tailrec_entry_point}:\n`;
emit_all true fundecl.fun_body;
List.iter emit_call_gc !call_gc_sites;
let preproc_res = preproc_fun fundecl.fun_body fundecl.fun_name in
let s = !Clflags.stack_slop + preproc_res.max_stack_size in
(* Extra 1k bytes slop space if profiling is enabled. Ensures that `mcount`
* and signal handlers do not stack overflow. *)
let profile_slop =
if !Clflags.gprofile then Config.profile_slop * 8 else 0
in
let s = s + profile_slop in
(* Extra 24 bytes for caml_context at the bottom of OCaml stack if the
* function makes external calls *)
let s = if preproc_res.contains_external_calls then s + 24 else s in
if not preproc_res.contains_nontail_calls &&
s < stack_threshold_size + profile_slop
then begin
`{emit_label !tailrec_entry_point}:\n`;
emit_all true fundecl.fun_body;
List.iter emit_call_gc !call_gc_sites;
end
else begin
(* XXX KC: fundecl.fun_body.live does not capture liveness at the start of
the function properly. *)
let live_initially =
Reg.Set.(fold (fun r s -> add {r with typ = Addr} s)
fundecl.fun_args empty)
in
let handle_overflow = new_label() in
let after_overflow = record_frame_label live_initially Debuginfo.none in
` movq %r15, %r11\n`;
` {locate_domain_state ()}, %r11\n`;
` movq {domain_field Domainstate.Domain_stack_threshold}(%r11), %r11\n`;
` addq ${emit_int s}, %r11\n`;
` cmpq %r11, %rsp\n`;
` jb {emit_label handle_overflow}\n`;
`{emit_label !tailrec_entry_point}:\n`;
emit_all true fundecl.fun_body;
List.iter emit_call_gc !call_gc_sites;
`{emit_label handle_overflow}:\n`;
` {emit_call "caml_call_realloc_stack"}\n`;
`{emit_label after_overflow}:\n`;
` jmp {emit_label !tailrec_entry_point}\n`;
end;
emit_call_bound_errors ();
cfi_endproc ();
begin match Config.system with
View
@@ -259,8 +259,11 @@ let destroyed_at_c_call =
108;109;110;111;112;113;114;115])
let destroyed_at_oper = function
Iop(Icall_ind | Icall_imm _ | Iextcall(_, true)) -> all_phys_regs
| Iop(Iextcall(_, false)) -> destroyed_at_c_call
Iop(Icall_ind | Icall_imm _) -> all_phys_regs
| Iop(Iextcall(_, alloc, stack_ofs)) ->
assert (stack_ofs >= 0);
if alloc || stack_ofs > 0 then all_phys_regs
else destroyed_at_c_call
| Iop(Iintop(Idiv | Imod)) | Iop(Iintop_imm((Idiv | Imod), _))
-> [| rax; rdx |]
| Iop(Istore(Single, _, _)) -> [| rxmm15 |]
@@ -281,11 +284,11 @@ let destroyed_at_raise = all_phys_regs
let safe_register_pressure = function
Iextcall(_,_) -> if win64 then if fp then 7 else 8 else 0
Iextcall(_,_,_) -> if win64 then if fp then 7 else 8 else 0
| _ -> if fp then 10 else 11
let max_register_pressure = function
Iextcall(_, _) ->
Iextcall(_, _,_) ->
if win64 then
if fp then [| 7; 10 |] else [| 8; 10 |]
else
View
@@ -940,6 +940,15 @@ let rec close fenv cenv = function
let (ulam, approx) = close fenv cenv arg in
(Uprim(Praise k, [ulam], Debuginfo.from_raise ev),
Value_unknown)
| Lprim(Pperform, args) ->
let args = close_list fenv cenv args in
(Udirect_apply ("caml_perform", args, Debuginfo.none), Value_unknown)
| Lprim(Presume, args) ->
let args = close_list fenv cenv args in
(Udirect_apply ("caml_resume", args, Debuginfo.none), Value_unknown)
| Lprim(Preperform, args) ->
let args = close_list fenv cenv args in
(Udirect_apply ("caml_reperform", args, Debuginfo.none), Value_unknown)
| Lprim(p, args) ->
simplif_prim !Clflags.float_const_prop
p (close_list_approx fenv cenv args) Debuginfo.none
View
@@ -81,9 +81,6 @@ type operation =
| Ccmpf of comparison
| Craise of Lambda.raise_kind * Debuginfo.t
| Ccheckbound of Debuginfo.t
| Cperform
| Cresume
| Creperform
type expression =
Cconst_int of int
View
@@ -67,9 +67,6 @@ type operation =
| Ccmpf of comparison
| Craise of Lambda.raise_kind * Debuginfo.t
| Ccheckbound of Debuginfo.t
| Cperform
| Cresume
| Creperform
type expression =
Cconst_int of int
View
@@ -1682,8 +1682,6 @@ and transl_prim_1 p arg dbg =
tag_int (Cop(Cextcall("caml_bswap16_direct", typ_int, false,
Debuginfo.none),
[untag_int (transl arg)]))
| Pperform ->
Cop(Cperform, [transl arg])
| prim ->
let (_ : string) = Format.flush_str_formatter () in
Printlambda.primitive Format.str_formatter prim;
@@ -1921,8 +1919,6 @@ and transl_prim_2 p arg1 arg2 dbg =
| Pbintcomp(bi, cmp) ->
tag_int (Cop(Ccmpi(transl_comparison cmp),
[transl_unbox_int bi arg1; transl_unbox_int bi arg2]))
| Preperform ->
Cop(Creperform, [transl arg1; transl arg2])
| prim ->
let (_ : string) = Format.flush_str_formatter () in
Printlambda.primitive Format.str_formatter prim;
@@ -2056,9 +2052,6 @@ and transl_prim_3 p arg1 arg2 arg3 dbg =
(Cconst_int 7)) idx
(unaligned_set_64 ba_data idx newval))))))
| Presume ->
Cop(Cresume, [transl arg1; transl arg2; transl arg3])
| _ ->
fatal_error "Cmmgen.transl_prim_3"
View
@@ -136,8 +136,11 @@ let emit_frames a =
a.efa_16 (if Debuginfo.is_none fd.fd_debuginfo
then fd.fd_frame_size
else fd.fd_frame_size + 1);
a.efa_16 (List.length fd.fd_live_offset);
List.iter a.efa_16 fd.fd_live_offset;
let uniq_fd_live_offset =
List.sort_uniq compare fd.fd_live_offset
in
a.efa_16 (List.length uniq_fd_live_offset);
List.iter a.efa_16 uniq_fd_live_offset;
a.efa_align Arch.size_addr;
if not (Debuginfo.is_none fd.fd_debuginfo) then begin
let d = fd.fd_debuginfo in
View
@@ -51,6 +51,7 @@ let has_fallthrough = function
type fundecl =
{ fun_name: string;
fun_args: Reg.Set.t;
fun_body: instruction;
fun_fast: bool;
fun_dbg : Debuginfo.t }
@@ -292,6 +293,7 @@ let reset () =
let fundecl f =
{ fun_name = f.Mach.fun_name;
fun_args = Reg.set_of_array f.Mach.fun_args;
fun_body = linear f.Mach.fun_body end_instr;
fun_fast = f.Mach.fun_fast;
fun_dbg = f.Mach.fun_dbg }
View
@@ -46,6 +46,7 @@ val invert_test: Mach.test -> Mach.test
type fundecl =
{ fun_name: string;
fun_args: Reg.Set.t;
fun_body: instruction;
fun_fast: bool;
fun_dbg : Debuginfo.t }
View
@@ -43,11 +43,7 @@ type operation =
| Icall_imm of string
| Itailcall_ind
| Itailcall_imm of string
| Iperform
| Ireperform
| Iresume_ind
| Itail_resume_ind
| Iextcall of string * bool
| Iextcall of string * bool * int
| Istackoffset of int
| Iload of Cmm.memory_chunk * Arch.addressing_mode
| Istore of Cmm.memory_chunk * Arch.addressing_mode * bool
View
@@ -43,11 +43,7 @@ type operation =
| Icall_imm of string
| Itailcall_ind
| Itailcall_imm of string
| Iperform
| Ireperform
| Iresume_ind
| Itail_resume_ind
| Iextcall of string * bool (* false = noalloc, true = alloc *)
| Iextcall of string * bool * int (* false = noalloc, true = alloc, n = stack_args *)
| Istackoffset of int
| Iload of Cmm.memory_chunk * Arch.addressing_mode
| Istore of Cmm.memory_chunk * Arch.addressing_mode * bool
View
@@ -84,9 +84,6 @@ let operation = function
| Ccmpf c -> Printf.sprintf "%sf" (comparison c)
| Craise (k, d) -> Lambda.raise_kind k ^ Debuginfo.to_string d
| Ccheckbound d -> "checkbound" ^ Debuginfo.to_string d
| Cperform -> "perform"
| Cresume -> "resume"
| Creperform -> "reperform"
let rec expr ppf = function
| Cconst_int n -> fprintf ppf "%i" n
View
@@ -25,7 +25,7 @@ let instr ppf i =
| Lend -> ()
| Lop op ->
begin match op with
| Ialloc _ | Icall_ind | Icall_imm _ | Iextcall(_, _) ->
| Ialloc _ | Icall_ind | Icall_imm _ | Iextcall _ ->
fprintf ppf "@[<1>{%a}@]@," regsetaddr i.live
| _ -> ()
end;
Oops, something went wrong.

0 comments on commit fc36619

Please sign in to comment.