- 词法分析
- 基于递归下降的语法分析
- 基于
LL(1)
的语法分析 - 抽象语法树
AST
的生成 - 符号检查
- 类型检查与类型推导
-
LLVM IR
中间代码生成 - 利用
LLVM
进行JIT
即时编译 - 三元式生成
- 目标代码生成
- 表达式优化
- 基于
AST
的代码优化
extern crate inkwell;
extern crate parser;
use self::inkwell::targets::{Target, InitializationConfig};
use self::inkwell::execution_engine::Symbol;
use parser::parser::*;
use parser::parser::recursive_descent::*;
use parser::parser::llvm_ir_generater::*;
use parser::lexer::*;
fn main () {
Target::initialize_native(&InitializationConfig::default()).unwrap();
let src = "
int f(int a, int b)
{
if (a >= b)
return a;
return a + b;
}
";
let mut parser = RecursiveDescentParser::new(Lexer::new(src.as_bytes()));
parser.run().unwrap();
let mut generater = LLVMIRGenerater::new(parser.syntax_tree());
let module = generater.ir_gen();
let ee = generater.execution_engine().unwrap();
let f: Symbol<unsafe extern "C" fn(i64, i64) -> i64> = unsafe {
ee.get_function("f").unwrap()
};
assert_eq!(5, unsafe { f(2, 3) });
assert_eq!(6, unsafe { f(6, 5) });
assert_eq!(7, unsafe { f(3, 4) });
assert_eq!(5, unsafe { f(5, 5) });
}
let src = "
int func_add(int a, int b)
{
return a + b;
}
int main()
{
return 0;
}
";
let lexer = Lexer::new(src.as_bytes());
let mut parser = RecursiveDescentParser::new(lexer);
parser.run();
parser.dump();
output AST:
SyntaxTree
FuncDefine
Terminal(KeyWord(Int))
Terminal(Identifier("func_add"))
FuncArg
Terminal(KeyWord(Int))
Terminal(Identifier("a"))
FuncArg
Terminal(KeyWord(Int))
Terminal(Identifier("b"))
ReturnStmt
Expr
Terminal(Identifier("a"))
Terminal(Operator(Add))
Terminal(Identifier("b"))
FuncDefine
Terminal(KeyWord(Int))
Terminal(Identifier("main"))
ReturnStmt
Terminal(Number("0"))
let src = "
struct S { int a, b; };
struct S1 { double S1; int b; };
int a(int a, char b) { }
";
let mut parser = RecursiveDescentParser::new(Lexer::new(src.as_bytes()));
println!("result: {:?}\n", parser.run());
output: result: Ok(())
let src = "
// multi define of variable S::a
struct S { int a, b; char a; };
";
let mut parser = RecursiveDescentParser::new(Lexer::new(src.as_bytes()));
println!("result: {:?}\n", parser.run());
output: result: Err(ParseErrInfo { err_type: MultiDefineError })
if, else, for, ...
short, int, long, unsigned, ...
- number =
\d+
- identifier =
[a-z][a-z|0-9]*
- ident =
number
|identifier
-
expr_opt:
bool_expr
epsilon
-
expr:
expr
add_op
expr_mul
- =>
expr_mul
expr_fix
(消除左递归后的产生式,下同)
-
expr_fix:
add_op
expr_mul
expr_fix
|epsilon
-
expr_mul:
expr_mul
mul_op
expr_factor
- =>
expr_factor
expr_mul_fix
-
expr_mul_fix =
mul_op
expr_factor
expr_mul_fix
|epsilon
-
expr_factor =
(
expr
)
|ident
引入的 Tokens
- add_op =
+
|-
- mul_op =
*
|/
- single_op =
!
|~
原始定义
- bool_expr:
bool_expr
||
bool_expr
bool_expr
&&
bool_expr
bool_expr
equal_op
bool_expr
bool_expr
cmp_op
bool_expr
!
expr
expr
消除左递归及添加优先级后的定义
-
bool_expr:
bool_expr
||
bool_expr_and
- =>
bool_expr_and
bool_expr_fix
-
bool_expr_fix:
||
bool_expr_and
bool_expr_fix
|epsilon
-
bool_expr_and:
bool_expr_and
&&
bool_expr_equal
- =>
bool_expr_equal
bool_expr_and_fix
-
bool_expr_and_fix:
&&
bool_expr_equal
bool_expr_and_fix
|epsilon
-
bool_expr_equal:
bool_expr_equal
equal_op
bool_expr_cmp
- =>
bool_expr_cmp
bool_expr_equal_fix
-
bool_expr_equal_fix:
equal_op
bool_expr_cmp
bool_expr_equal_fix
|epsilon
-
bool_expr_cmp:
bool_expr_cmp
cmp_op
bool_expr_factor
- =>
bool_expr_factor
bool_expr_cmp_fix
-
bool_expr_cmp_fix:
cmp_op
bool_expr_factor
bool_expr_cmp_fix
|epsilon
-
bool_expr_factor:
!
bool_expr
(
bool_expr
)
expr
引入的 Tokens
- cmp_op:
>
>=
<
<=
- equal_op:
==
!=
-
stmt:
stmt_factor
-
stmt_factor:
stmt_single
;
stmt_block
stmt_control
;
-
stmt_single
assign_stmt
break_stmt
return_stmt
-
stmt_control
if_stmt
while_loop
for_loop
-
stmt_list:
stmt
stmt_list
|epsilon
-
stmt_block:
{
stmt_list
}
-
assign_stmt:
left_value
=
right_value
-
if_stmt:
if
(
bool_expr
)
stmt
else
stmt
-
while_loop:
while
(
bool_expr
)
stmt
-
for_loop:
for
(
expr_opt
;
expr_opt
;
expr_opt
)
stmt
-
break_stmt:
break
-
return_stmt:
return
return_expr
-
return_expr:
bool_expr
epsilon
-
left_value:
identifier
-
right_value:
bool_expr
-
variable_define:
variable_type
variable_def_list
-
variable_def_list:
identifier
;
identifier
,
variable_def_list
-
variable_type:
variable_prefix
variable_suffix
float
double
-
variable_prefix:
unsigned
signed
long
long long
-
variable_suffix:
int
-
func_declare:
func_ret_type
func_name
(
func_param_list
)
;
-
func_name
identifier
-
func_ret_type:
type
-
func_param_list:
func_param
func_param_list_tail
epsilon
-
func_param_list_tail:
,
func_param
func_param_list_tail
epsilon
-
func_param:
func_param_type
func_param_name
-
func_param_type:
type
-
func_param_name:
identifier
-
struct_define:
struct
{
struct_vars
}
;
struct
identifier
{
struct_vars
}
;
-
struct_vars
struct_var
;
struct_var
struct_vars
epsilon
-
struct_var
variable_define
-
function:
func_ret_type
func_name
(
func_param_list
)
{
func_body
}
-
func_body:
stmt_list
-
func_call:
func_name
(
func_arg_list
)
-
func_arg_list:
func_arg
,
func_arg_list
epsilon
-
func_arg:
identifier