docs/dev/jit_i386.dev

# Copyright: 2001-2004 The Perl Foundation.  All Rights Reserved.
# $Id$

=head1 NAME

docs/dev/jit_i386.dev - Parrot JIT (i386/gcc)

=head1 ABSTRACT

This PDD describes the i386 gcc JIT implementation.

=head1 DESCRIPTION

JIT i386/gcc is a combination of unrolled assembly instructions and the
Computed Goto Predereferenced (CGP) run loop. For branch instructions the
function implementation in the standard core is called.

Another difference of JIT/i386 is that most vtable functions are JITed
instructions which use register mappings.

For a better understanding of the control flow between these basically 3 run
loop cores, an example shows the gory details.

=head1 EXAMPLE

Given the following PASM program, the righthand three columns show where  each
opcode gets executed:

        PASM                 JIT ops   Normal     CGP ops

	                     (call cgp_core)      (jmp back)

	set I0, 10           set_i_ic
	print I0             (call)               print_i
	print "\n"                                print_sc
	bsr inc              (call)     bsr_ic    cpu_ret
	end                  (jmp) HALT           end (ret)
	                     end (ret)
  inc:
	inc I0               inc_i
	new P0, .PerlString  new_p_ic
	set P0, I0           set_p_i
	print P0             (call)               print_p
	print "\n"                                print_sc
	ret                  (call)     ret       cpu_ret

=head2 Startup sequence

In B<runops_jit> a prederefed copy of the opcode stream is built by
B<init_prederef>. Then B<build_asm> generates the assembler code sequence as
usual. This generated code (shown as B<runops_jit> in B<ddd>) is then executed.

Generate minimal stack frame, save %ebx

    0x812c510 <jit_func>:	push   %ebp
    0x812c511 <jit_func+1>:	mov    %esp,%ebp
    0x812c513 <jit_func+3>:	push   %ebx

Get the program counter to %ebx

    0x812c514 <jit_func+4>:	mov    0xc(%ebp),%ebx

Push B<interpreter> and B<(opcode_t*) 1> and call B<cgp_core>

    0x812c517 <jit_func+7>:	push   $0x8113db8
    0x812c51c <jit_func+12>:	push   $0x1
    0x812c521 <jit_func+17>:	mov    $0x1,%eax
    0x812c526 <jit_func+22>:	call   0x80b5830 <cgp_core>

In B<cgp_core> all callee saved registers are saved.

    0x80b5830 <cgp_core>:	push   %ebp
    0x80b5831 <cgp_core+1>:	mov    %esp,%ebp
    0x80b5833 <cgp_core+3>:	sub    $0xdc,%esp
    0x80b5839 <cgp_core+9>:	lea    0x8(%ebp),%eax
    0x80b583c <cgp_core+12>:	push   %edi
    0x80b583d <cgp_core+13>:	push   %esi
    0x80b583e <cgp_core+14>:	push   %ebx

In B<%eax> the init flag is set to B<-1>

    0x80b583f <cgp_core+15>:	mov    %eax,0xfffffff

The parameter B<*cur_op> (the program counter) is put into B<%esi> and ...

    0x80b5842 <cgp_core+18>:	mov    0x8(%ebp),%esi
    0x80b5845 <cgp_core+21>:	test   %esi,%esi
    0x80b5847 <cgp_core+23>:	jne    0x80b5853 <cgp_core+35>
    0x80b5849 <cgp_core+25>:	mov    $0x810ca60,%eax
    0x80b584e <cgp_core+30>:	jmp    0x80bb470 <cgp_core+23616>

... compared to B<1>

    0x80b5853 <cgp_core+35>:	cmp    $0x1,%esi
    0x80b5856 <cgp_core+38>:	jne    0x80b5860 <cgp_core+48>

If true, the program jumps to the return address of above function call, i.e.
it jumps back again to JIT code.

    0x80b5858 <cgp_core+40>:	jmp    *0x4(%ebp)

Back again in JIT code, the init flag is checked

    0x812c52b <jit_func+27>:	test   %eax,%eax
    0x812c52d <jit_func+29>:	jne    0x812c536 <jit_func+38>

... and if zero, the function would be left.

 [   0x812c52f <jit_func+31>:	pop    %ebx          ]
 [   0x812c531 <jit_func+33>:	mov    %ebp,%esp     ]
 [   0x812c533 <jit_func+35>:	pop    %ebp          ]
 [   0x812c535 <jit_func+37>:	ret                  ]

When coming from the init sequence, program flow continues by checking the
B<resume_offset> and jumping to the desired instruction

    0x812c536 <jit_func+38>:	mov    %ebx,%eax
    0x812c538 <jit_func+40>:	sub    $0x400140c0,%eax
    0x812c53e <jit_func+46>:	mov    $0x812c4a8,%edx
    0x812c543 <jit_func+51>:	jmp    *(%edx,%eax,1)

B<set I0, 10> and save_registers

    0x812c546 <jit_func+54>:	mov    $0xa,%ebx
    0x812c54b <jit_func+59>:	mov    %ebx,0x8113db8

Now non-JITed code follows -- get the address from the prederefed op_func_table
and call it:

    0x812c551 <jit_func+65>:	mov    $0x812ac0c,%esi
    0x812c556 <jit_func+70>:	call   *(%esi)

    inline op print(in INT) {
      printf(INTVAL_FMT, (INTVAL)$1);
      goto NEXT();
    }

where the B<goto NEXT()> is a simple:

    0x80b5b49 <cgp_core+793>:	jmp    *(%esi)

    op print(in STR) {
     ...
      goto NEXT();
    }

As the last instruction of the non-JITed code sequence is a branch, this is not
executed in CGP, but the opcode:

    inline op cpu_ret() {
    #ifdef __GNUC__
    # ifdef I386
       asm("ret")

is executed. This opcode is patched into the prederefed code stream by
Parrot_jit_normal_op at the end of a non-JITed code sequence. This returns to
JIT code again, where the next instruction gets called as a function in the
standard core ...

    0x812c558 <jit_func+72>:	push   $0x8113db8
    0x812c55d <jit_func+77>:	push   $0x400140dc
    0x812c562 <jit_func+82>:	call   0x805be60 <Parrot_bsr_ic>
    0x812c567 <jit_func+87>:	add    $0x8,%esp

... and from the return result in B<%eax>, the new code position in JIT is
calculated and gets jumped to:

    0x812c56a <jit_func+90>:	sub    $0x400140c0,%eax
    0x812c570 <jit_func+96>:	mov    $0x812c4a8,%edx
    0x812c575 <jit_func+101>:	jmp    *(%edx,%eax,1)

Now in the subroutine B<inc>:

    0x812c580 <jit_func+112>:	mov    0x8113db8,%ebx
    0x812c586 <jit_func+118>:	inc    %ebx

Save register and arguments and call B<pmc_new_noinit>:

    0x812c587 <jit_func+119>:	push   %edx
    0x812c588 <jit_func+120>:	push   $0x11
    0x812c58d <jit_func+125>:	push   $0x8113db8
    0x812c592 <jit_func+130>:	call   0x806fc60 <pmc_new_noinit>

put the PMC* into Parrot's register:

    0x812c597 <jit_func+135>:	mov    %eax,0x8113fb8

and prepare arguments for a VTABLE call:

    0x812c59d <jit_func+141>:	push   %eax
    0x812c59e <jit_func+142>:	push   $0x8113db8
    0x812c5a3 <jit_func+147>:	mov    0x10(%eax),%eax
    0x812c5a6 <jit_func+150>:	call   *0x18(%eax)
    0x812c5a9 <jit_func+153>:	add    $0x10,%esp
    0x812c5ac <jit_func+156>:	pop    %edx

and another one:

    0x812c5ae <jit_func+158>:	push   %edx

Here, with the mapped register in B<%ebx>, push B<I0>, the PMC and the
interpreter:

    0x812c5af <jit_func+159>:	push   %ebx
    0x812c5b0 <jit_func+160>:	mov    0x8113fb8,%eax
    0x812c5b6 <jit_func+166>:	push   %eax
    0x812c5b7 <jit_func+167>:	push   $0x8113db8

and call the vtable:

    0x812c5bc <jit_func+172>:	mov    0x10(%eax),%eax
    0x812c5bf <jit_func+175>:	call   *0xdc(%eax)
    0x812c5c5 <jit_func+181>:	add    $0xc,%esp
    0x812c5c8 <jit_func+184>:	pop    %edx

As this ends the JITed section, used registers are saved back to Parrot's
register:

    0x812c5ca <jit_func+186>:	mov    %ebx,0x8113db8

and again the code in B<cgp_core> gets called:

    0x812c5d0 <jit_func+192>:	mov    $0x812ac48,%esi
    0x812c5d5 <jit_func+197>:	call   *(%esi)

which after executing the B<print> returns back here in JIT, where the B<ret>
is called:

    0x812c5d7 <jit_func+199>:	push   $0x8113db8
    0x812c5dc <jit_func+204>:	push   $0x40014118
    0x812c5e1 <jit_func+209>:	call   0x805d5e0 <Parrot_ret>
    0x812c5e6 <jit_func+214>:	add    $0x8,%esp

From the returned PC a JIT address is calculated, which gets executed:

    0x812c5e9 <jit_func+217>:	sub    $0x400140c0,%eax
    0x812c5ef <jit_func+223>:	mov    $0x812c4a8,%edx
    0x812c5f4 <jit_func+228>:	jmp    *(%edx,%eax,1)

Now at the B<end> opcode, the CGP code for HALT() gets jumped to:

    0x812c578 <jit_func+104>:	mov    $0x80b5877,%esi
    0x812c57d <jit_func+109>:	jmp    *%esi

which is:

    inline op end() {
      HALT();
    }

or, set return result:

    0x80b8b6f <cgp_core+13119>:	xor    %eax,%eax
    ...

and clean up stack frame and ret:

    0x80bb470 <cgp_core+23616>:	lea    0xffffff18(%ebp),%esp
    0x80bb476 <cgp_core+23622>:	pop    %ebx
    0x80bb477 <cgp_core+23623>:	pop    %esi
    0x80bb478 <cgp_core+23624>:	pop    %edi
    0x80bb479 <cgp_core+23625>:	mov    %ebp,%esp
    0x80bb47b <cgp_core+23627>:	pop    %ebp
    0x80bb47c <cgp_core+23628>:	ret

This returns after the position where B<cgp_core> was called during the init
sequence, but now the return value B<%eax> is zero and the..

    0x812c52b <jit_func+27>:	test   %eax,%eax
    0x812c52d <jit_func+29>:	jne    0x812c536 <jit_func+38>
    0x812c52f <jit_func+31>:	pop    %ebx
    0x812c531 <jit_func+33>:	mov    %ebp,%esp
    0x812c533 <jit_func+35>:	pop    %ebp
    0x812c535 <jit_func+37>:	ret

... whole story ends here, we are back again in B<runops_jit>.

So this is rather simple once it gets going.

=head1 BUGS

The floating point registers do not get saved to Parrot before vtable calls.
This assumes that external routines preserve the FP stack pointer and don't use
more the 4 floating point registers at once.

=head1 AUTHOR

Leopold Toetsch C<lt@toetsch.at>

=head1 VERSION

=head2 CURRENT

14.02.2003 by Leopold Toetsch