Skip to content
This repository has been archived by the owner on Jun 3, 2021. It is now read-only.

Commit

Permalink
Use Cranelift instead of LLVM
Browse files Browse the repository at this point in the history
This was a lot of work for seemingly little benefit, so here's the
rationale (in vague order of importance):

- Cranelift is written in Rust, not C++, so there's no FFI
- Cranelift does not have segmentation faults or undefined behavior if
you use the crate improperly
- Cranelift has releases instead of running from a Git branch, so things
don't break unexpectedly (I had to redo a lot of code when Inkwell
removed `Void::ptr_type`).
- Cranelift can compile directly to an object file instead of IR (using
cranelift_faerie)
- Cranelift runs significantly faster, although it performs fewer
optimizations.
  • Loading branch information
jyn514 committed Jul 27, 2019
1 parent 469d2c8 commit 9f5573d
Show file tree
Hide file tree
Showing 6 changed files with 265 additions and 236 deletions.
8 changes: 7 additions & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,13 @@ license = "BSD-3-Clause"
[dependencies]
lazy_static = "1"
ansi_term = "0.11"
inkwell = { git = "https://github.com/TheDan64/inkwell", branch = "llvm6-0" }
cranelift = "0.36"
cranelift-codegen = "0.36"
cranelift-entity = "0.36"
cranelift-faerie = "0.36"
cranelift-module = "0.36"
failure = "0.1"
target-lexicon = "0.4"

structopt = { version = "0.2", optional = true }

Expand Down
231 changes: 97 additions & 134 deletions src/backend/mod.rs
Original file line number Diff line number Diff line change
@@ -1,17 +1,24 @@
use std::cmp::max;
use std::convert::TryInto;

use inkwell::context::Context;
use inkwell::types::{self, AnyType, AnyTypeEnum, BasicType, BasicTypeEnum};
use inkwell::AddressSpace;
use cranelift_codegen::ir::types::{self, Type as IrType};
use cranelift_codegen::ir::{AbiParam, Signature};
use cranelift_codegen::isa::CallConv;
use target_lexicon::Triple;

use crate::data::{Type, INT_POINTER};
use crate::data::{FunctionType, Locatable, Location, Type};
use Type::*;

// NOTE: this is required by the standard to always be one
const CHAR_SIZE: u32 = 1;
const CHAR_SIZE: u16 = 1;

// TODO: allow this to be configured at runtime
lazy_static! {
// TODO: make this `const` when
// https://github.com/CraneStation/target-lexicon/pull/19 is merged
pub static ref TARGET: Triple = Triple::host();
pub static ref CALLING_CONVENTION: CallConv = CallConv::triple_default(&TARGET);
}
mod x64;
pub use x64::*;

Expand All @@ -23,18 +30,19 @@ impl Type {
other
)
}

// TODO: instead of doing this manually,
// convert to LLVM type and call t.size_of()
pub fn sizeof(&self) -> Result<u32, &'static str> {
pub fn sizeof(&self) -> Result<SIZE_T, &'static str> {
match self {
Bool => Ok(BOOL_SIZE * CHAR_BIT),
Char(_) => Ok(CHAR_SIZE * CHAR_BIT),
Short(_) => Ok(SHORT_SIZE * CHAR_BIT),
Int(_) => Ok(INT_SIZE * CHAR_BIT),
Long(_) => Ok(LONG_SIZE * CHAR_BIT),
Float => Ok(FLOAT_SIZE * CHAR_BIT),
Double => Ok(DOUBLE_SIZE * CHAR_BIT),
Pointer(_, _) => Ok(PTR_SIZE * CHAR_BIT),
Bool => Ok(BOOL_SIZE.into()),
Char(_) => Ok(CHAR_SIZE.into()),
Short(_) => Ok(SHORT_SIZE.into()),
Int(_) => Ok(INT_SIZE.into()),
Long(_) => Ok(LONG_SIZE.into()),
Float => Ok(FLOAT_SIZE.into()),
Double => Ok(DOUBLE_SIZE.into()),
Pointer(_, _) => Ok(PTR_SIZE.into()),
// now for the hard ones
Array(t, l) => t.sizeof().and_then(|n| Ok(n * l.length()?)),
Enum(symbols) => {
Expand Down Expand Up @@ -66,7 +74,7 @@ impl Type {
}
// TODO: instead of doing this manually,
// convert to LLVM type and call t.size_of()
pub fn alignof(&self) -> Result<u32, &'static str> {
pub fn alignof(&self) -> Result<SIZE_T, &'static str> {
match self {
Bool
| Char(_)
Expand All @@ -86,136 +94,52 @@ impl Type {
Void => Err("cannot take `alignof` void"),
}
}
}

// given an enum $enum with some variants that share a method,
// call that method on each of them
// useful if each variant of an enum has that method but the enum doesn't implement
// a trait giving you access to it
macro_rules! gen_calls {
// an enum to match and a method to call on all variants
( $enum: expr, $method: ident,
// with arbitrary arguments
$args: tt,
// for an arbitrary number of variants
$( $variant: path ),*
) => {
match $enum {
$( $variant(t) => t.$method($args), )*
}
}
}

trait ToPointerType {
fn ptr_type(&self, address_space: AddressSpace) -> types::PointerType;
}
trait ToArrayType {
fn array_type(&self, array_size: u32) -> types::ArrayType;
}
impl ToPointerType for BasicTypeEnum {
fn ptr_type(&self, addr: AddressSpace) -> types::PointerType {
use BasicTypeEnum::*;
gen_calls!(
self,
ptr_type,
addr,
FloatType,
IntType,
PointerType,
StructType,
VectorType,
ArrayType
)
}
}
impl ToPointerType for types::VoidType {
fn ptr_type(&self, addr: AddressSpace) -> types::PointerType {
self.get_context()
.custom_width_int_type(
INT_POINTER
.sizeof()
.expect("pointers should always have a valid size"),
)
.ptr_type(AddressSpace::Generic)
}
}
impl ToPointerType for AnyTypeEnum {
fn ptr_type(&self, addr: AddressSpace) -> types::PointerType {
use AnyTypeEnum::*;
gen_calls!(
self,
ptr_type,
addr,
FloatType,
IntType,
PointerType,
StructType,
VectorType,
ArrayType,
FunctionType,
VoidType
)
}
}
impl ToArrayType for BasicTypeEnum {
fn array_type(&self, array_size: u32) -> types::ArrayType {
use BasicTypeEnum::*;
gen_calls!(
self,
array_type,
array_size,
PointerType,
FloatType,
IntType,
StructType,
VectorType,
ArrayType
)
}
}

impl Type {
pub fn into_llvm_basic(self, context: &Context) -> Result<BasicTypeEnum, String> {
pub fn into_llvm_basic(self) -> Result<IrType, String> {
match self {
Bool | Char(_) | Short(_) | Int(_) | Long(_) | Enum(_) => Ok(context
.custom_width_int_type(self.sizeof()?)
.as_basic_type_enum()),
// Integers
Bool | Char(_) | Short(_) | Int(_) | Long(_) | Pointer(_, _) | Enum(_) => {
let int_size = SIZE_T::from(CHAR_BIT)
* self
.sizeof()
.expect("integers should always have a valid size");
Ok(IrType::int(int_size.try_into().unwrap_or_else(|_| {
panic!(
"integers should never have a size larger than {}",
i16::max_value()
)
}))
.unwrap_or_else(|| panic!("unsupported size for IR: {}", int_size)))
}

// Floats
// TODO: this is hard-coded for x64 because LLVM doesn't allow specifying a
// custom type
Float => Ok(context.f32_type().as_basic_type_enum()),
Double => Ok(context.f64_type().as_basic_type_enum()),
Float => Ok(types::F32),
Double => Ok(types::F64),

// derived types
Pointer(t, _) => Ok(t
.into_llvm(context)?
.ptr_type(AddressSpace::Generic)
.as_basic_type_enum()),
Array(t, l) => Ok(t
.into_llvm_basic(context)?
.array_type(l.length()?)
.as_basic_type_enum()),
// Aggregates
// arrays decay to pointers at the assembly level
Array(t, l) => Ok(IrType::int(PTR_SIZE * CHAR_BIT)
.unwrap_or_else(|| panic!("unsupported size of IR: {}", PTR_SIZE))),
Struct(members) => {
let llvm_elements: Vec<BasicTypeEnum> = members
let llvm_elements: Vec<_> = members
.into_iter()
.map(|m| m.ctype.into_llvm_basic(context))
.map(|m| m.ctype.into_llvm_basic())
.collect::<Result<_, String>>()?;
// TODO: allow struct packing
Ok(context
.struct_type(&llvm_elements, false)
.as_basic_type_enum())
unimplemented!("struct type -> IR");
}
// LLVM does not have a union type.
// What Clang does is cast it to the type of the largest member,
// and then cast every element of the union as it is accessed.
// See https://stackoverflow.com/questions/19549942/extracting-a-value-from-an-union#19550613
Union(members) => try_max_by_key(members.into_iter().map(|m| m.ctype), Type::sizeof)
.expect("parser should ensure all unions have at least one member")?
.into_llvm_basic(context),
Void | Bitfield(_) | Function(_) => Err(format!("{} is not a basic type", self)),
.into_llvm_basic(),
Bitfield(_) => unimplemented!("bitfield to llvm type"),
Void | Function(_) => Err(format!("{} is not a basic type", self)),
}
}
pub fn into_llvm(self, context: &Context) -> Result<AnyTypeEnum, String> {
pub fn into_llvm(self) -> Result<IrType, String> {
match self {
// basic types (according to LLVM)
Bool
Expand All @@ -229,18 +153,57 @@ impl Type {
| Pointer(_, _)
| Array(_, _)
| Struct(_)
| Union(_) => Ok(self.into_llvm_basic(context)?.as_any_type_enum()),
// any type
Void => Ok(context.void_type().as_any_type_enum()),
| Bitfield(_)
| Union(_) => self.into_llvm_basic(),
// void cannot be loaded or stored
Void => Ok(types::INVALID),
// I don't think Cranelift IR has a representation for functions
Function(_) => unimplemented!("functions to LLVM type"),
//Function(func_type) => Ok(ty.to_llvm_basic()?.func_type())
// It looks like LLVM has a bitfield type but it isn't exposed by the
// Inkwell API? See https://stackoverflow.com/questions/25058213/how-to-spot-a-bit-field-with-clang
Bitfield(_) => unimplemented!("bitfield to llvm type"),
}
}
}

impl FunctionType {
pub fn signature(self, location: Location) -> Result<Signature, Locatable<String>> {
let params = if self.params.len() == 1 && self.params[0].ctype == Type::Void {
// no arguments
Vec::new()
} else {
self.params
.into_iter()
.map(|param| {
param
.ctype
.into_llvm_basic()
.map(AbiParam::new)
.map_err(|err| Locatable {
data: err,
location: location.clone(),
})
})
.collect::<Result<Vec<_>, Locatable<String>>>()?
};
let return_type = if *self.return_type == Type::Void {
vec![]
} else {
vec![self
.return_type
.into_llvm_basic()
.map(AbiParam::new)
.map_err(|err| Locatable {
data: err,
location,
})?]
};
Ok(Signature {
call_conv: *CALLING_CONVENTION,
params,
returns: return_type,
})
}
}

/// partially taken from
/// https://doc.rust-lang.org/src/core/iter/traits/iterator.rs.html#2591
/// short-circuiting version of iter.max_by_key
Expand Down
20 changes: 12 additions & 8 deletions src/backend/x64.rs
Original file line number Diff line number Diff line change
@@ -1,12 +1,16 @@
// https://en.wikipedia.org/wiki/64-bit_computing#64-bit_data_models
pub const FLOAT_SIZE: u32 = 4;
pub const DOUBLE_SIZE: u32 = 8;
#[allow(non_camel_case_types)]
pub type SIZE_T = u64;
pub const SIZE_MAX: SIZE_T = SIZE_T::max_value();

pub const LONG_SIZE: u32 = 8;
pub const INT_SIZE: u32 = 4;
pub const SHORT_SIZE: u32 = 2;
pub const BOOL_SIZE: u32 = 1;
pub const FLOAT_SIZE: u16 = 4;
pub const DOUBLE_SIZE: u16 = 8;

pub const PTR_SIZE: u32 = 8;
pub const LONG_SIZE: u16 = 8;
pub const INT_SIZE: u16 = 4;
pub const SHORT_SIZE: u16 = 2;
pub const BOOL_SIZE: u16 = 1;

pub const CHAR_BIT: u32 = 8; // number of bits in a byte
pub const PTR_SIZE: u16 = 8;

pub const CHAR_BIT: u16 = 8; // number of bits in a byte
4 changes: 3 additions & 1 deletion src/data.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@ use std::collections::HashMap;
use std::convert::TryFrom;
use std::fmt::{self, Display, Formatter};

use crate::backend::SIZE_T;

#[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)]
pub enum Keyword {
// keywords
Expand Down Expand Up @@ -398,7 +400,7 @@ pub enum LengthError {
}

impl ArrayType {
pub fn length(&self) -> Result<u32, LengthError> {
pub fn length(&self) -> Result<SIZE_T, LengthError> {
match self {
ArrayType::Unbounded => Err(LengthError::Unbounded),
ArrayType::Fixed(expr) => {
Expand Down
Loading

0 comments on commit 9f5573d

Please sign in to comment.