Skip to content

Commit

Permalink
[FEATURE] Add preliminary support for iterators and built-in iterator…
Browse files Browse the repository at this point in the history
… functions (tuplex#25)

Add support for builtin iterator functions `iter, zip, enumerate, next, reversed`. Fix bug when calling `len` on an empty list `[]`. Add support for multiple identifiers in loop, e.g. `for a, b in (t1, t2), (t3, t4)`

Details:

- Add iterator type and related functions for typing
- Add symbol for iterator-related functions (iter, zip, enumerate, next, reversed)
- Add iterator-specific annotation
- Add iterator-related helping functions in LLVMEnvironment
- Fix getListType for list of tuples
- Refactor code refactoring for error handling for unsupported types
- Refactor code for error handling for unsupported types
- Add iterator core class
- Add functions for creating iterator-related calls in FunctionRegistry
- Fix the case when expression (also called testlist) in for loop contains multiple elements, i.e. "for a, b in (t1, t2), (t3, t4)" should work now
- Update BlockGeneratorVisitor
   1. Update visit NCall, declareVariables, assignToSingleVariable to make iterator related calls work
   2. Fix codegen for list of tuples
   3. Add support for for loops with iterator as expression
- Use fallback mode for mixed AST node types in for loop exprlist for now
- Add tests about iterators
- Fix len() call on EMPTYLIST
- Fix bug in UnrollLoopsVisitor. Add tests

authored by: Yunzhi Shao (yunzhi_shao@brown.edu)
  • Loading branch information
yunzhi-jake committed Oct 1, 2021
1 parent be085b3 commit b0a7d35
Show file tree
Hide file tree
Showing 29 changed files with 3,211 additions and 167 deletions.
24 changes: 21 additions & 3 deletions tuplex/codegen/include/ASTAnnotation.h
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,6 @@ class Symbol : public std::enable_shared_from_this<Symbol> {
* @return true if a specialized function type could be generated, false else.
*/
inline bool findFunctionTypeBasedOnParameterType(const python::Type& parameterType, python::Type& specializedFunctionType) {

// check if typer function is there?
auto generic_result = functionTyper(parameterType);
if(generic_result != python::Type::UNKNOWN) {
Expand Down Expand Up @@ -352,13 +351,29 @@ class Symbol : public std::enable_shared_from_this<Symbol> {
}
};

/*!
* iterator-specific annotation for NIdentifier (identifiers with iteratorType) and NCall (iterator related function calls including iter(), zip(), enumerate(), next())
* For an iterator generating NCall (iter(), zip() or enumerate()), its IteratorInfo saves info about the current call.
* For an NIdentifier with _name=x, its IteratorInfo reveals how x was generated.
* For NCall next() with _positionalArguments=x, its IteratorInfo is the same as x's.
* Example:
* x = iter("abcd") // both NIdentifier x and NCall iter() are annotated with *info1 = {"iter", str, {nullptr})}
* y = zip(x, [1, 2]) // both NIdentifier y and NCall zip() are annotated with *info3 = {"zip", (Iterator[str], [I64]), {info1, info2}} where *info2 = {"iter", [I64], {nullptr}} since zip() implicitly converts any non-iteratorType member to an iterator
* z = next(y) // NCall next() is annotated with info4 = info3
*/
struct IteratorInfo {
std::string iteratorName; // from which built-in function the iterator was generated, currently can be "iter", "zip", "enumerate".
python::Type argsType; // concrete type of arguments of the iterator generating function.
std::vector<std::shared_ptr<IteratorInfo>> argsIteratorInfo; // pointers to IteratorInfo of each argument.
};

// simple class used to annotate ast nodes
class ASTAnnotation {
public:

ASTAnnotation() : numTimesVisited(0), symbol(nullptr), iMin(0), iMax(0), negativeValueCount(0), positiveValueCount(0) {}
ASTAnnotation() : numTimesVisited(0), symbol(nullptr), iMin(0), iMax(0), negativeValueCount(0), positiveValueCount(0), iteratorInfo(nullptr) {}
ASTAnnotation(const ASTAnnotation& other) : numTimesVisited(other.numTimesVisited), iMin(other.iMin), iMax(other.iMax),
negativeValueCount(other.negativeValueCount), positiveValueCount(other.positiveValueCount), symbol(other.symbol), types(other.types) {}
negativeValueCount(other.negativeValueCount), positiveValueCount(other.positiveValueCount), symbol(other.symbol), types(other.types), iteratorInfo(other.iteratorInfo) {}

///! how often was node visited? Helpful annotation for if-branches
size_t numTimesVisited;
Expand All @@ -382,6 +397,9 @@ class ASTAnnotation {
///! traced types
std::vector<python::Type> types;

///! iterator-specific info
std::shared_ptr<IteratorInfo> iteratorInfo;

inline python::Type majorityType() const {
if(types.empty())
return python::Type::UNKNOWN;
Expand Down
13 changes: 0 additions & 13 deletions tuplex/codegen/include/ASTHelpers.h
Original file line number Diff line number Diff line change
Expand Up @@ -76,19 +76,6 @@ namespace tuplex {
* @return vector of identifiers
*/
extern std::vector<ASTNode *> getForLoopMultiTarget(ASTNode* target);

/*!
* error handling for unsupported types
*/
enum class CompileError {
TYPE_ERROR_NONE,
TYPE_ERROR_LIST_OF_LISTS,
TYPE_ERROR_LIST_OF_TUPLES,
TYPE_ERROR_LIST_OF_DICTS,
TYPE_ERROR_LIST_OF_MULTITYPES
};

extern std::string compileErrorToStr(const CompileError& err);
}

#endif //TUPLEX_ASTHELPERS_H
15 changes: 8 additions & 7 deletions tuplex/codegen/include/AnnotatedAST.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
#include <Logger.h>
#include <SymbolTable.h>
#include <ClosureEnvironment.h>
#include <ASTHelpers.h>
#include <IFailable.h>

#include <llvm/ADT/APFloat.h>
#include <llvm/ADT/STLExtras.h>
Expand All @@ -38,7 +38,7 @@ namespace tuplex {
};

// class holding an abstract syntax tree
class AnnotatedAST {
class AnnotatedAST : public IFailable {
private:

// name of the function/last statement within the IR module
Expand All @@ -52,7 +52,6 @@ namespace tuplex {
// holds the AST tree after successful parsing
ASTNode *_root;
bool _typesDefined; // lazy check variable whether types are already defined or not
CompileError _typeError; // temporary variable for dealing with unsupported type

ClosureEnvironment _globals; // global variables + modules

Expand All @@ -74,9 +73,9 @@ namespace tuplex {
// updates function ast with type & also updates the param nodes...
void setFunctionType(ASTNode* node, const python::Type& type);
public:
AnnotatedAST(): _root(nullptr), _typesDefined(false), _allowNumericTypeUnification(false), _typeError(CompileError::TYPE_ERROR_NONE) {}
AnnotatedAST(): _root(nullptr), _typesDefined(false), _allowNumericTypeUnification(false) {}

AnnotatedAST(const AnnotatedAST& other) : _root(nullptr), _typesDefined(other._typesDefined), _globals(other._globals), _allowNumericTypeUnification(other._allowNumericTypeUnification), _typeError(other._typeError) {
AnnotatedAST(const AnnotatedAST& other) : _root(nullptr), _typesDefined(other._typesDefined), _globals(other._globals), _allowNumericTypeUnification(other._allowNumericTypeUnification) {
cloneFrom(other);
}

Expand Down Expand Up @@ -163,9 +162,11 @@ namespace tuplex {
AnnotatedAST& removeParameterTypes();

/*!
* throw exception for unsupported types
* checks _compileErrors in IFailable and throws an exception if return type is not supported and not resolved through fallback mode.
* currently returning list of lists/tuples/dicts/multi-types will raise exception.
* TODO: Add support for returning list of tuples/dicts and use fallback mode for other cases
*/
void checkTypeError();
void checkReturnError();

/*!
* set/upcast return type to target type
Expand Down
22 changes: 21 additions & 1 deletion tuplex/codegen/include/BlockGeneratorVisitor.h
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@
#include <LambdaFunction.h>
#include <FunctionRegistry.h>
#include <stack>
#include <IteratorContextProxy.h>

namespace tuplex {

Expand Down Expand Up @@ -107,6 +108,7 @@ namespace codegen {
// assert(llvm::isa<llvm::Constant>(nullPtr));
// }

// iterator slot may not have ptr yet
return codegen::SerializableValue(builder.CreateLoad(ptr), builder.CreateLoad(sizePtr),
nullPtr ? builder.CreateLoad(nullPtr) : nullptr);
}
Expand All @@ -115,7 +117,6 @@ namespace codegen {
assert(ptr && sizePtr);

if(val.val) {

// if tuples etc. are used, then there could be a pointer. When this happens, load & then assign
if(val.val->getType() == ptr->getType()) {
// load val
Expand Down Expand Up @@ -344,6 +345,8 @@ namespace codegen {
// store current iteration ending block and loop ending block for for and while loops
std::deque<llvm::BasicBlock*> _loopBlockStack;

std::shared_ptr<IteratorContextProxy> _iteratorContextProxy;

void init() {

if (!_blockStack.empty()) {
Expand All @@ -360,6 +363,7 @@ namespace codegen {
//_block = nullptr;
_funcNames = std::stack<std::string>();
_numLambdaFunctionsEncountered = 0;
_iteratorContextProxy = std::make_shared<IteratorContextProxy>(_env);
}

/*!
Expand Down Expand Up @@ -634,6 +638,22 @@ namespace codegen {
const std::unordered_map<std::string, VariableRealization> &else_var_realizations);

llvm::Value *generateConstantIntegerPower(llvm::IRBuilder<>& builder, llvm::Value *base, int64_t exponent);

/*!
* should get called when targetType is iteratorType
* use targetType and iteratorInfo annotation to get concrete LLVM type for iterator variable
* allocate iterator struct and update slot ptr if the current slot ptr type is different from the concrete LLVM type
* @param builder
* @param slot
* @param val
* @param targetType
* @param iteratorInfo
*/
void updateIteratorVariableSlot(llvm::IRBuilder<> &builder,
VariableSlot *slot,
const SerializableValue &val,
const python::Type &targetType,
const std::shared_ptr<IteratorInfo> &iteratorInfo);
};
}
}
Expand Down
59 changes: 58 additions & 1 deletion tuplex/codegen/include/FunctionRegistry.h
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
#include <Token.h>
#include <LambdaFunction.h>
#include <unordered_map>
#include <IteratorContextProxy.h>

#include <Utils.h>

Expand All @@ -46,7 +47,9 @@ namespace tuplex {
*/
class FunctionRegistry {
public:
FunctionRegistry(LLVMEnvironment& env, bool sharedObjectPropagation) : _env(env), _sharedObjectPropagation(sharedObjectPropagation) {}
FunctionRegistry(LLVMEnvironment& env, bool sharedObjectPropagation) : _env(env), _sharedObjectPropagation(sharedObjectPropagation) {
_iteratorContextProxy = std::make_shared<IteratorContextProxy>(&env);
}

codegen::SerializableValue createGlobalSymbolCall(LambdaFunctionBuilder& lfb,
llvm::IRBuilder<>& builder,
Expand Down Expand Up @@ -101,6 +104,59 @@ namespace tuplex {

SerializableValue createRandomChoiceCall(LambdaFunctionBuilder &lfb, llvm::IRBuilder<> &builder, const python::Type &argType, const SerializableValue &arg);

SerializableValue createIterCall(LambdaFunctionBuilder &lfb,
llvm::IRBuilder<>& builder,
const python::Type &argsType,
const python::Type &retType,
const std::vector<tuplex::codegen::SerializableValue> &args);

SerializableValue createReversedCall(LambdaFunctionBuilder &lfb,
llvm::IRBuilder<>& builder,
const python::Type &argsType,
const python::Type &retType,
const std::vector<tuplex::codegen::SerializableValue> &args);

SerializableValue createNextCall(LambdaFunctionBuilder &lfb,
llvm::IRBuilder<>& builder,
const python::Type &argsType,
const python::Type &retType,
const std::vector<tuplex::codegen::SerializableValue> &args,
const std::shared_ptr<IteratorInfo> &iteratorInfo);

SerializableValue createZipCall(LambdaFunctionBuilder &lfb,
llvm::IRBuilder<>& builder,
const python::Type &argsType,
const python::Type &retType,
const std::vector<tuplex::codegen::SerializableValue> &args,
const std::shared_ptr<IteratorInfo> &iteratorInfo);

SerializableValue createEnumerateCall(LambdaFunctionBuilder &lfb,
llvm::IRBuilder<>& builder,
const python::Type &argsType,
const python::Type &retType,
const std::vector<tuplex::codegen::SerializableValue> &args,
const std::shared_ptr<IteratorInfo> &iteratorInfo);

/*!
* Create calls related to iterators. Including iterator generating calls (iter(), zip(), enumerate())
* or function calls that take iteratorType as argument (next())
* @param lfb
* @param builder
* @param symbol
* @param argsType
* @param retType
* @param args
* @param iteratorInfo
* @return
*/
SerializableValue createIteratorRelatedSymbolCall(tuplex::codegen::LambdaFunctionBuilder &lfb,
llvm::IRBuilder<> &builder,
const std::string &symbol,
const python::Type &argsType,
const python::Type &retType,
const std::vector<tuplex::codegen::SerializableValue> &args,
const std::shared_ptr<IteratorInfo> &iteratorInfo);

SerializableValue createDictConstructor(LambdaFunctionBuilder& lfb, llvm::IRBuilder<>& builder, python::Type argsType, const std::vector<tuplex::codegen::SerializableValue> &args);
void getValueFromcJSON(llvm::IRBuilder<> &builder, llvm::Value *cjson_val, python::Type retType,
llvm::Value *retval,
Expand Down Expand Up @@ -139,6 +195,7 @@ namespace tuplex {
private:
LLVMEnvironment& _env;
bool _sharedObjectPropagation;
std::shared_ptr<IteratorContextProxy> _iteratorContextProxy;

// lookup (symbolname, typehash)
std::unordered_map<std::tuple<std::string, python::Type>, llvm::Function*> _funcMap;
Expand Down
57 changes: 57 additions & 0 deletions tuplex/codegen/include/IFailable.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,24 @@
#include <Base.h>
#include <Logger.h>

/*!
* error handling for unsupported language features (i.e. valid python UDF codes but not supported yet in Tuplex)
*/
enum class CompileError {
COMPILE_ERROR_NONE,
TYPE_ERROR_LIST_OF_LISTS,
TYPE_ERROR_RETURN_LIST_OF_TUPLES,
TYPE_ERROR_RETURN_LIST_OF_DICTS,
TYPE_ERROR_RETURN_LIST_OF_LISTS,
TYPE_ERROR_RETURN_LIST_OF_MULTITYPES,
TYPE_ERROR_LIST_OF_MULTITYPES,
TYPE_ERROR_ITER_CALL_WITH_NONHOMOGENEOUS_TUPLE,
TYPE_ERROR_ITER_CALL_WITH_DICTIONARY,
TYPE_ERROR_RETURN_ITERATOR,
TYPE_ERROR_NEXT_CALL_DIFFERENT_DEFAULT_TYPE,
TYPE_ERROR_MIXED_ASTNODETYPE_IN_FOR_LOOP_EXPRLIST, // exprlist contains a mix of tuple/list of identifiers and single identifier
};

/*!
* helper interface/trait especially useful for visitors that may or may not fail
* when executed. Provides a silent and an explicit mode for logging errors/warnings/etc.
Expand All @@ -23,6 +41,8 @@ class IFailable {
bool _succeeded;
bool _silentMode; // don't issue warnings
std::vector<std::tuple<std::string, std::string>> _messages; //! stores messages in silent mode
std::vector<CompileError> _compileErrors;

protected:
/*!
* logs an error. this will automatically set the status to failure
Expand All @@ -39,7 +59,21 @@ class IFailable {
void reset() {
_succeeded = true;
_messages.clear();
_compileErrors.clear();
}

/*!
* add all CompileErrors in err to _compileErrors
* @param err
*/
void addCompileErrors(const std::vector<CompileError> &err) {_compileErrors.insert(_compileErrors.begin(), err.begin(), err.end());}

/*!
* add single CompileError to _compileErrors
* @param err
*/
void addCompileError(const CompileError& err) {_compileErrors.push_back(err);}

public:

IFailable(bool silentMode=false) : _succeeded(true), _silentMode(silentMode) {}
Expand All @@ -56,6 +90,29 @@ class IFailable {

std::vector<std::tuple<std::string, std::string>> getErrorMessages() const { return _messages; }

/*!
* return all type errors (errors generated from unsupported types) encountered for the current class instance.
* @return
*/
std::vector<CompileError> getCompileErrors() {return _compileErrors;}

/*!
* return CompileError of returning list of lists/tuples/dicts/multi-types. If no such error exists, return COMPILE_ERROR_NONE.
* @return
*/
CompileError getReturnError();

/*!
* clear all compile errors (errors generated from unsupported language features) for the current class instance.
*/
void clearCompileErrors() {_compileErrors.clear();}

/*!
* return detailed error message of a CompileError.
* @param err
* @return
*/
std::string compileErrorToStr(const CompileError& err);
};

#endif //TUPLEX_IFAILABLE_H
Loading

0 comments on commit b0a7d35

Please sign in to comment.