diff --git a/clang/docs/CMakeLists.txt b/clang/docs/CMakeLists.txt index 356814f994c32..4163dd2d90ad5 100644 --- a/clang/docs/CMakeLists.txt +++ b/clang/docs/CMakeLists.txt @@ -103,13 +103,6 @@ function (gen_rst_file_from_td output_file td_option source docs_targets) endfunction() if (LLVM_ENABLE_SPHINX) - llvm_find_program(dot) - if (HAVE_DOT) - set(DOT ${LLVM_PATH_DOT}) - else() - message(FATAL_ERROR "Cannot find DOT") - endif() - include(AddSphinxTarget) if (SPHINX_FOUND AND (${SPHINX_OUTPUT_HTML} OR ${SPHINX_OUTPUT_MAN})) # Copy rst files to build directory before generating the html diff --git a/clang/docs/ClangRepl.rst b/clang/docs/ClangRepl.rst index 5399036c123fb..aaaabd99bc82f 100644 --- a/clang/docs/ClangRepl.rst +++ b/clang/docs/ClangRepl.rst @@ -213,411 +213,6 @@ concept helps support advanced use cases such as template instantiations on dema automatic language interoperability. It also helps static languages such as C/C++ become apt for data science. -Execution Results Handling in Clang-Repl -======================================== - -Execution Results Handling features discussed below help extend the Clang-Repl -functionality by creating an interface between the execution results of a -program and the compiled program. - -1. **Capture Execution Results**: This feature helps capture the execution results -of a program and bring them back to the compiled program. - -2. **Dump Captured Execution Results**: This feature helps create a temporary dump -for Value Printing/Automatic Printf, that is, to display the value and type of -the captured data. - - -1. Capture Execution Results -============================ - -In many cases, it is useful to bring back the program execution result to the -compiled program. This result can be stored in an object of type **Value**. - -How Execution Results are captured (Value Synthesis): ------------------------------------------------------ - -The synthesizer chooses which expression to synthesize, and then it replaces -the original expression with the synthesized expression. Depending on the -expression type, it may choose to save an object (``LastValue``) of type 'value' -while allocating memory to it (``SetValueWithAlloc()``), or not ( -``SetValueNoAlloc()``). - -.. graphviz:: - :name: valuesynthesis - :caption: Value Synthesis - :alt: Shows how an object of type 'Value' is synthesized - :align: center - - digraph "valuesynthesis" { - rankdir="LR"; - graph [fontname="Verdana", fontsize="12"]; - node [fontname="Verdana", fontsize="12"]; - edge [fontname="Sans", fontsize="9"]; - - start [label=" Create an Object \n 'Last Value' \n of type 'Value' ", shape="note", fontcolor=white, fillcolor="#3333ff", style=filled]; - assign [label=" Assign the result \n to the 'LastValue' \n (based on respective \n Memory Allocation \n scenario) ", shape="box"] - print [label=" Pretty Print \n the Value Object ", shape="Msquare", fillcolor="yellow", style=filled]; - start -> assign; - assign -> print; - - subgraph SynthesizeExpression { - synth [label=" SynthesizeExpr() ", shape="note", fontcolor=white, fillcolor="#3333ff", style=filled]; - mem [label=" New Memory \n Allocation? ", shape="diamond"]; - withaloc [label=" SetValueWithAlloc() ", shape="box"]; - noaloc [label=" SetValueNoAlloc() ", shape="box"]; - right [label=" 1. RValue Structure \n (a temporary value)", shape="box"]; - left2 [label=" 2. LValue Structure \n (a variable with \n an address)", shape="box"]; - left3 [label=" 3. Built-In Type \n (int, float, etc.)", shape="box"]; - output [label=" move to 'Assign' step ", shape="box"]; - - synth -> mem; - mem -> withaloc [label="Yes"]; - mem -> noaloc [label="No"]; - withaloc -> right; - noaloc -> left2; - noaloc -> left3; - right -> output; - left2 -> output; - left3 -> output; - } - output -> assign - } - -Where is the captured result stored? ------------------------------------- - -``LastValue`` holds the last result of the value printing. It is a class member -because it can be accessed even after subsequent inputs. - -**Note:** If no value printing happens, then it is in an invalid state. - -Improving Efficiency and User Experience ----------------------------------------- - -The Value object is essentially used to create a mapping between an expression -'type' and the allocated 'memory'. Built-in types (bool, char, int, -float, double, etc.) are copyable. Their memory allocation size is known -and the Value object can introduce a small-buffer optimization. -In case of objects, the ``Value`` class provides reference-counted memory -management. - -The implementation maps the type as written and the Clang Type to be able to use -the preprocessor to synthesize the relevant cast operations. For example, -``X(char, Char_S)``, where ``char`` is the type from the language's type system -and ``Char_S`` is the Clang builtin type which represents it. This mapping helps -to import execution results from the interpreter in a compiled program and vice -versa. The ``Value.h`` header file can be included at runtime and this is why it -has a very low token count and was developed with strict constraints in mind. - -This also enables the user to receive the computed 'type' back in their code -and then transform the type into something else (e.g., re-cast a double into -a float). Normally, the compiler can handle these conversions transparently, -but in interpreter mode, the compiler cannot see all the 'from' and 'to' types, -so it cannot implicitly do the conversions. So this logic enables providing -these conversions on request. - -On-request conversions can help improve the user experience, by allowing -conversion to a desired 'to' type, when the 'from' type is unknown or unclear. - -Significance of this Feature ----------------------------- - -The 'Value' object enables wrapping a memory region that comes from the -JIT, and bringing it back to the compiled code (and vice versa). -This is a very useful functionality when: - -- connecting an interpreter to the compiled code, or -- connecting an interpreter in another language. - -For example, this feature helps transport values across boundaries. A notable -example is the cppyy project code makes use of this feature to enable running C++ -within Python. It enables transporting values/information between C++ -and Python. - -Note: `cppyy `_ is an automatic, run-time, -Python-to-C++ bindings generator, for calling C++ from Python and Python from C++. -It uses LLVM along with a C++ interpreter (e.g., Cling) to enable features like -run-time instantiation of C++ templates, cross-inheritance, callbacks, -auto-casting, transparent use of smart pointers, etc. - -In a nutshell, this feature enables a new way of developing code, paving the -way for language interoperability and easier interactive programming. - -Implementation Details -====================== - -Interpreter as a REPL vs. as a Library --------------------------------------- - -1 - If we're using the interpreter in interactive (REPL) mode, it will dump -the value (i.e., value printing). - -.. code-block:: console - - if (LastValue.isValid()) { - if (!V) { - LastValue.dump(); - LastValue.clear(); - } else - *V = std::move(LastValue); - } - - -2 - If we're using the interpreter as a library, then it will pass the value -to the user. - -Incremental AST Consumer ------------------------- - -The ``IncrementalASTConsumer`` class wraps the original code generator -``ASTConsumer`` and it performs a hook, to traverse all the top-level decls, to -look for expressions to synthesize, based on the ``isSemiMissing()`` condition. - -If this condition is found to be true, then ``Interp.SynthesizeExpr()`` will be -invoked. - -**Note:** Following is a sample code snippet. Actual code may vary over time. - -.. code-block:: console - - for (Decl *D : DGR) - if (auto *TSD = llvm::dyn_cast(D); - TSD && TSD->isSemiMissing()) - TSD->setStmt(Interp.SynthesizeExpr(cast(TSD->getStmt()))); - - return Consumer->HandleTopLevelDecl(DGR); - -The synthesizer will then choose the relevant expression, based on its type. - -Communication between Compiled Code and Interpreted Code --------------------------------------------------------- - -In Clang-Repl there is **interpreted code**, and this feature adds a 'value' -runtime that can talk to the **compiled code**. - -Following is an example where the compiled code interacts with the interpreter -code. The execution results of an expression are stored in the object 'V' of -type Value. This value is then printed, effectively helping the interpreter -use a value from the compiled code. - -.. code-block:: console - - int Global = 42; - void setGlobal(int val) { Global = val; } - int getGlobal() { return Global; } - Interp.ParseAndExecute(“void setGlobal(int val);”); - Interp.ParseAndExecute(“int getGlobal();”); - Value V; - Interp.ParseAndExecute(“getGlobal()”, &V); - std::cout << V.getAs() << “\n”; // Prints 42 - - -**Note:** Above is an example of interoperability between the compiled code and -the interpreted code. Interoperability between languages (e.g., C++ and Python) -works similarly. - - -2. Dump Captured Execution Results -================================== - -This feature helps create a temporary dump to display the value and type -(pretty print) of the desired data. This is a good way to interact with the -interpreter during interactive programming. - -How value printing is simplified (Automatic Printf) ---------------------------------------------------- - -The ``Automatic Printf`` feature makes it easy to display variable values during -program execution. Using the ``printf`` function repeatedly is not required. -This is achieved using an extension in the ``libclangInterpreter`` library. - -To automatically print the value of an expression, simply write the expression -in the global scope **without a semicolon**. - -.. graphviz:: - :name: automaticprintf - :caption: Automatic PrintF - :alt: Shows how Automatic PrintF can be used - :align: center - - digraph "AutomaticPrintF" { - size="6,4"; - rankdir="LR"; - graph [fontname="Verdana", fontsize="12"]; - node [fontname="Verdana", fontsize="12"]; - edge [fontname="Sans", fontsize="9"]; - - manual [label=" Manual PrintF ", shape="box"]; - int1 [label=" int ( &) 42 ", shape="box"] - auto [label=" Automatic PrintF ", shape="box"]; - int2 [label=" int ( &) 42 ", shape="box"] - - auto -> int2 [label="int x = 42; \n x"]; - manual -> int1 [label="int x = 42; \n printf("(int &) %d \\n", x);"]; - } - - -Significance of this feature ----------------------------- - -Inspired by a similar implementation in `Cling `_, -this feature added to upstream Clang repo has essentially extended the syntax of -C++, so that it can be more helpful for people that are writing code for data -science applications. - -This is useful, for example, when you want to experiment with a set of values -against a set of functions, and you'd like to know the results right away. -This is similar to how Python works (hence its popularity in data science -research), but the superior performance of C++, along with this flexibility -makes it a more attractive option. - -Implementation Details -====================== - -Parsing mechanism: ------------------- - -The Interpreter in Clang-Repl (``Interpreter.cpp``) includes the function -``ParseAndExecute()`` that can accept a 'Value' parameter to capture the result. -But if the value parameter is made optional and it is omitted (i.e., that the -user does not want to utilize it elsewhere), then the last value can be -validated and pushed into the ``dump()`` function. - -.. graphviz:: - :name: parsing - :caption: Parsing Mechanism - :alt: Shows the Parsing Mechanism for Pretty Printing - :align: center - - - digraph "prettyprint" { - rankdir="LR"; - graph [fontname="Verdana", fontsize="12"]; - node [fontname="Verdana", fontsize="12"]; - edge [fontname="Verdana", fontsize="9"]; - - parse [label=" ParseAndExecute() \n in Clang ", shape="box"]; - capture [label=" Capture 'Value' parameter \n for processing? ", shape="diamond"]; - use [label=" Use for processing ", shape="box"]; - dump [label=" Validate and push \n to dump()", shape="box"]; - callp [label=" call print() function ", shape="box"]; - type [label=" Print the Type \n ReplPrintTypeImpl()", shape="box"]; - data [label=" Print the Data \n ReplPrintDataImpl() ", shape="box"]; - output [label=" Output Pretty Print \n to the user ", shape="box", fontcolor=white, fillcolor="#3333ff", style=filled]; - - parse -> capture [label="Optional 'Value' Parameter"]; - capture -> use [label="Yes"]; - use -> End; - capture -> dump [label="No"]; - dump -> callp; - callp -> type; - callp -> data; - type -> output; - data -> output; - } - -**Note:** Following is a sample code snippet. Actual code may vary over time. - -.. code-block:: console - - llvm::Error Interpreter::ParseAndExecute(llvm::StringRef Code, Value *V) { - - auto PTU = Parse(Code); - if (!PTU) - return PTU.takeError(); - if (PTU->TheModule) - if (llvm::Error Err = Execute(*PTU)) - return Err; - - if (LastValue.isValid()) { - if (!V) { - LastValue.dump(); - LastValue.clear(); - } else - *V = std::move(LastValue); - } - return llvm::Error::success(); - } - -The ``dump()`` function (in ``value.cpp``) calls the ``print()`` function. - -Printing the Data and Type are handled in their respective functions: -``ReplPrintDataImpl()`` and ``ReplPrintTypeImpl()``. - -Annotation Token (annot_repl_input_end) ---------------------------------------- - -This feature uses a new token (``annot_repl_input_end``) to consider printing the -value of an expression if it doesn't end with a semicolon. When parsing an -Expression Statement, if the last semicolon is missing, then the code will -pretend that there one and set a marker there for later utilization, and -continue parsing. - -A semicolon is normally required in C++, but this feature expands the C++ -syntax to handle cases where a missing semicolon is expected (i.e., when -handling an expression statement). It also makes sure that an error is not -generated for the missing semicolon in this specific case. - -This is accomplished by identifying the end position of the user input -(expression statement). This helps store and return the expression statement -effectively, so that it can be printed (displayed to the user automatically). - -**Note:** This logic is only available for C++ for now, since part of the -implementation itself requires C++ features. Future versions may support more -languages. - -.. code-block:: console - - Token *CurTok = nullptr; - // If the semicolon is missing at the end of REPL input, consider if - // we want to do value printing. Note this is only enabled in C++ mode - // since part of the implementation requires C++ language features. - // Note we shouldn't eat the token since the callback needs it. - if (Tok.is(tok::annot_repl_input_end) && Actions.getLangOpts().CPlusPlus) - CurTok = &Tok; - else - // Otherwise, eat the semicolon. - ExpectAndConsumeSemi(diag::err_expected_semi_after_expr); - - StmtResult R = handleExprStmt(Expr, StmtCtx); - if (CurTok && !R.isInvalid()) - CurTok->setAnnotationValue(R.get()); - - return R; - } - -AST Transformation -------------------- - -When Sema encounters the ``annot_repl_input_end`` token, it knows to transform -the AST before the real CodeGen process. It will consume the token and set a -'semi missing' bit in the respective decl. - -.. code-block:: console - - if (Tok.is(tok::annot_repl_input_end) && - Tok.getAnnotationValue() != nullptr) { - ConsumeAnnotationToken(); - cast(DeclsInGroup.back())->setSemiMissing(); - } - -In the AST Consumer, traverse all the Top Level Decls, to look for expressions -to synthesize. If the current Decl is the Top Level Statement -Decl(``TopLevelStmtDecl``) and has a semicolon missing, then ask the interpreter -to synthesize another expression (an internal function call) to replace this -original expression. - - -Detailed RFC and Discussion: ----------------------------- - -For more technical details, community discussion and links to patches related -to these features, -Please visit: `RFC on LLVM Discourse `_. - -Some logic presented in the RFC (e.g. ValueGetter()) may be outdated, -compared to the final developed solution. Related Reading =============== diff --git a/clang/docs/conf.py b/clang/docs/conf.py index 31a4daa39d5b8..ca310026f53e2 100644 --- a/clang/docs/conf.py +++ b/clang/docs/conf.py @@ -27,7 +27,7 @@ # Add any Sphinx extension module names here, as strings. They can be extensions # coming with Sphinx (named 'sphinx.ext.*') or your custom ones. -extensions = ["sphinx.ext.todo", "sphinx.ext.mathjax", "sphinx.ext.graphviz"] +extensions = ["sphinx.ext.todo", "sphinx.ext.mathjax"] # Add any paths that contain templates here, relative to this directory. templates_path = ["_templates"]