-
Notifications
You must be signed in to change notification settings - Fork 243
/
verifier.h
511 lines (380 loc) · 17.9 KB
/
verifier.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
/*
* Copyright 2014 The Kythe Authors. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef KYTHE_CXX_VERIFIER_H_
#define KYTHE_CXX_VERIFIER_H_
#include <functional>
#include <optional>
#include <string>
#include "absl/container/flat_hash_map.h"
#include "absl/types/span.h"
#include "assertions.h"
#include "kythe/proto/common.pb.h"
#include "kythe/proto/storage.pb.h"
namespace kythe {
namespace verifier {
/// \brief Runs logic programs.
///
/// The `Verifier` combines an `AssertionContext` with a database of Kythe
/// facts. It can be used to determine whether the goals specified in the
/// `AssertionContext` are satisfiable.
class Verifier {
public:
/// \param trace_lex Dump lexing debug information
/// \param trace_parse Dump parsing debug information
explicit Verifier(bool trace_lex = false, bool trace_parse = false);
/// \brief Loads an in-memory source file.
/// \param filename The name to use for the file; may be blank.
/// \param vname The AstNode of the vname for the file.
/// \param text The symbol for the text to load
/// \return false if we failed.
bool LoadInMemoryRuleFile(const std::string& filename, AstNode* vname,
Symbol text);
/// \brief Loads a source file with goal comments indicating rules and data.
/// The VName for the source file will be determined by matching its content
/// against file nodes.
/// \param filename The filename to load
/// \return false if we failed
bool LoadInlineRuleFile(const std::string& filename);
/// \brief Loads a text proto with goal comments indicating rules and data.
/// The VName for the source file will be blank.
/// \param file_data The data to load
/// \param path the path to use for anchors
/// \param root the root to use for anchors
/// \param corpus the corpus to use for anchors
/// \return false if we failed
bool LoadInlineProtoFile(const std::string& file_data,
absl::string_view path = "",
absl::string_view root = "",
absl::string_view corpus = "");
/// \brief During verification, ignore duplicate facts.
void IgnoreDuplicateFacts();
/// \brief During verification, ignore conflicting /kythe/code facts.
void IgnoreCodeConflicts();
/// \brief Save results of verification keyed by inspection label.
void SaveEVarAssignments();
/// \brief Dump all goals to standard out.
void ShowGoals();
/// \brief Prints out a particular goal with its original source location
/// to standard error.
/// \param group_index The index of the goal's group.
/// \param goal_index The index of the goal to print
/// \sa highest_goal_reached, highest_group_reached
void DumpErrorGoal(size_t group_index, size_t goal_index);
/// \brief Dump known facts to standard out as a GraphViz graph.
void DumpAsDot();
/// \brief Dump known facts to standard out as JSON.
void DumpAsJson();
/// \brief Attempts to satisfy all goals from all loaded rule files and facts.
/// \param inspect function to call on any inspection request
/// \return true if all goals could be satisfied.
bool VerifyAllGoals(std::function<bool(Verifier* context, const Inspection&,
std::string_view)>
inspect);
/// \brief Attempts to satisfy all goals from all loaded rule files and facts.
/// \return true if all goals could be satisfied.
bool VerifyAllGoals();
/// \brief Adds a single Kythe fact to the database.
/// \param database_name some name used to define the database; should live
/// as long as the `Verifier`. Used only for diagnostics.
/// \param fact_id some identifier for the fact. Used only for diagnostics.
/// \return false if something went wrong.
bool AssertSingleFact(std::string* database_name, unsigned int fact_id,
const kythe::proto::Entry& entry);
/// \brief Perform basic well-formedness checks on the input database.
/// \pre The database contains only fact-shaped terms, as generated by
/// `AssertSingleFact`.
/// \return false if the database was not well-formed.
bool PrepareDatabase();
/// Arena for allocating memory for both static data loaded from the database
/// and dynamic data allocated during the course of evaluation.
Arena* arena() { return &arena_; }
/// Symbol table for uniquing strings.
SymbolTable* symbol_table() { return &symbol_table_; }
/// \brief Allocates an identifier for some token.
/// \param location The source location for the identifier.
/// \param token The text of the identifier.
/// \return An `Identifier`. This may not be unique.
Identifier* IdentifierFor(const yy::location& location,
const std::string& token);
/// \brief Stringifies an integer, then makes an identifier out of it.
/// \param location The source location for the identifier.
/// \param integer The integer to stringify.
/// \return An `Identifier`. This may not be unique.
Identifier* IdentifierFor(const yy::location& location, int integer);
/// \brief Convenience function to make `(App head (Tuple values))`.
/// \param location The source location for the predicate.
/// \param head The lhs of the `App` to allocate.
/// \param values The body of the `Tuple` to allocate.
AstNode* MakePredicate(const yy::location& location, AstNode* head,
absl::Span<AstNode* const> values);
/// \brief The head used for equality predicates.
Identifier* eq_id() { return eq_id_; }
/// \brief The head used for any VName predicate.
AstNode* vname_id() { return vname_id_; }
/// \brief The head used for any Fact predicate.
AstNode* fact_id() { return fact_id_; }
/// \brief The fact kind for an root/empty fact label.
AstNode* root_id() { return root_id_; }
/// \brief The empty string as an identifier.
AstNode* empty_string_id() { return empty_string_id_; }
/// \brief The fact kind for an edge ordinal.
AstNode* ordinal_id() { return ordinal_id_; }
/// \brief The fact kind used to assign a node its kind (eg /kythe/node/kind).
AstNode* kind_id() { return kind_id_; }
/// \brief The fact kind used for an anchor.
AstNode* anchor_id() { return anchor_id_; }
/// \brief The fact kind used for a file.
AstNode* file_id() { return file_id_; }
/// \brief Object for parsing and storing assertions.
AssertionParser* parser() { return &parser_; }
/// \brief Returns the highest group index the verifier reached during
/// solving.
size_t highest_group_reached() const { return highest_group_reached_; }
/// \brief Returns the highest goal index the verifier reached during
/// solving.
size_t highest_goal_reached() const { return highest_goal_reached_; }
/// \brief Change the regex used to identify goals in source text.
/// \return false on failure.
bool SetGoalCommentRegex(const std::string& regex, std::string* error);
/// \brief Use a prefix to match goals in source text.
void SetGoalCommentPrefix(const std::string& it);
/// \brief Look for assertions in file node text.
void UseFileNodes() { assertions_from_file_nodes_ = true; }
/// \brief Only raise a warning if a file VName is missing.
void AllowMissingFileVNames() { allow_missing_file_vnames_ = true; }
/// \brief Convert MarkedSource-valued facts to graphs.
void ConvertMarkedSource() { convert_marked_source_ = true; }
/// \brief Show anchor locations in graph dumps (instead of @).
void ShowAnchors() { show_anchors_ = true; }
/// \brief Show VNames for nodes which also have labels in graph dumps.
void ShowLabeledVnames() { show_labeled_vnames_ = true; }
/// \brief Show the /kythe and /kythe/edge prefixes in graph dumps.
void ShowFactPrefix() { show_fact_prefix_ = true; }
/// \brief Elide unlabeled nodes from graph dumps.
void ElideUnlabeled() { show_unlabeled_ = false; }
/// \brief Check for singleton EVars.
/// \return true if there were singletons.
bool CheckForSingletonEVars() { return parser_.CheckForSingletonEVars(); }
/// \brief Don't search for file vnames.
void IgnoreFileVnames() { file_vnames_ = false; }
/// \brief Use the fast solver.
void UseFastSolver(bool value) { use_fast_solver_ = value; }
/// \brief Gets a string representation of `i`.
/// \deprecated Inspection callbacks will be provided with strings and
/// will no longer have access to the internal AST.
std::string InspectionString(const Inspection& i);
/// \brief Use `corpus` for file nodes without a corpus set.
void UseDefaultFileCorpus(const std::string& corpus) {
default_file_corpus_ = IdentifierFor(builtin_location_, corpus);
}
private:
using InternedVName = std::tuple<Symbol, Symbol, Symbol, Symbol, Symbol>;
/// \brief Interns an AST node known to be a VName.
/// \param node the node to intern.
InternedVName InternVName(AstNode* node);
/// \return a new vname with its corpus filled with the default file corpus
/// if `node` is a vname without a corpus set; otherwise `node`.
AstNode* FixFileVName(AstNode* node);
/// \brief Generate a VName that will not conflict with any other VName.
AstNode* NewUniqueVName(const yy::location& loc);
/// \brief Converts an encoded /kythe/code fact to a form that's useful
/// to the verifier.
/// \param loc The location to use in diagnostics.
/// \return null if something went wrong; otherwise, an AstNode corresponding
/// to a VName of a synthetic node for `code_data`.
AstNode* ConvertCodeFact(const yy::location& loc,
const std::string& code_data);
/// \brief Converts an encoded /kythe/code/json fact to a form that's useful
/// to the verifier.
/// \param loc The location to use in diagnostics.
/// \return null if something went wrong; otherwise, an AstNode corresponding
/// to a VName of a synthetic node for `code_data`.
AstNode* ConvertCodeJsonFact(const yy::location& loc,
const std::string& code_data);
/// \brief Converts a MarkedSource message to a form that's useful
/// to the verifier.
/// \param loc The location to use in diagnostics.
/// \return null if something went wrong; otherwise, an AstNode corresponding
/// to a VName of a synthetic node for `marked_source`.
AstNode* ConvertMarkedSource(
const yy::location& loc,
const kythe::proto::common::MarkedSource& marked_source);
/// \brief Converts a VName proto to its AST representation.
AstNode* ConvertVName(const yy::location& location,
const kythe::proto::VName& vname);
/// \brief Adds an anchor VName.
void AddAnchor(AstNode* vname, size_t begin, size_t end) {
anchors_.emplace(std::make_pair(begin, end), vname);
}
/// \brief Processes a fact tuple for the fast solver.
/// \param tuple the five-tuple representation of a fact
/// \return true if successful.
bool ProcessFactTupleForFastSolver(Tuple* tuple);
/// \sa parser()
AssertionParser parser_;
/// \sa arena()
Arena arena_;
/// \sa symbol_table()
SymbolTable symbol_table_;
/// All known facts.
Database facts_;
/// Maps anchor offsets to anchor VName tuples.
AnchorMap anchors_;
/// Has the database been prepared?
bool database_prepared_ = false;
/// Ignore duplicate facts during verification?
bool ignore_dups_ = false;
/// Ignore conflicting /kythe/code facts during verification?
bool ignore_code_conflicts_ = false;
/// Filename to use for builtin constants.
std::string builtin_location_name_;
/// Location to use for builtin constants.
yy::location builtin_location_;
/// Node to use for the `=` identifier.
Identifier* eq_id_;
/// Node to use for the `vname` constant.
AstNode* vname_id_;
/// Node to use for the `fact` constant.
AstNode* fact_id_;
/// Node to use for the `/` constant.
AstNode* root_id_;
/// Node to use for the empty string constant.
AstNode* empty_string_id_;
/// Node to use for the `/kythe/ordinal` constant.
AstNode* ordinal_id_;
/// Node to use for the `/kythe/node/kind` constant.
AstNode* kind_id_;
/// Node to use for the `anchor` constant.
AstNode* anchor_id_;
/// Node to use for the `/kythe/loc/start` constant.
AstNode* start_id_;
/// Node to use for the `/kythe/loc/end` constant.
AstNode* end_id_;
/// Node to use for the `file` node kind.
AstNode* file_id_;
/// Node to use for the `text` fact kind.
AstNode* text_id_;
/// Node to use for the `code` fact kind. The fact value should be a
/// serialized kythe.proto.MarkedSource message.
AstNode* code_id_;
/// Node to use for the `code/json` fact kind. The fact value should be a
/// JSON-serialized kythe.proto.MarkedSource message.
AstNode* code_json_id_;
/// The highest goal group reached during solving (often the culprit for why
/// the solution failed).
size_t highest_group_reached_ = 0;
/// The highest goal reached during solving (often the culprit for why
/// the solution failed).
size_t highest_goal_reached_ = 0;
/// Whether we save assignments to EVars (by inspection label).
bool saving_assignments_ = false;
/// A map from inspection label to saved assignment. Note that
/// duplicate labels will overwrite one another. This means that
/// it's important to disambiguate cases where this is likely
/// (e.g., we add line and column information to labels we generate
/// for anchors).
std::map<std::string, AstNode*> saved_assignments_;
/// Maps from pretty-printed vnames to (parsed) file node text.
std::map<std::string, Symbol> fake_files_;
/// Read assertions from file nodes.
bool assertions_from_file_nodes_ = false;
/// The regex to look for to identify goal comments. Should have one match
/// group.
std::unique_ptr<RE2> goal_comment_regex_;
/// If true, convert MarkedSource-valued facts to subgraphs. If false,
/// MarkedSource-valued facts will be replaced with opaque but unique
/// identifiers.
bool convert_marked_source_ = false;
/// If true, show anchor locations in graph dumps (instead of @).
bool show_anchors_ = false;
/// If true, show unlabeled nodes in graph dumps.
bool show_unlabeled_ = true;
/// If true, show VNames for labeled nodes in graph dumps.
bool show_labeled_vnames_ = false;
/// If true, include the /kythe and /kythe/edge prefix on facts and edges.
bool show_fact_prefix_ = false;
/// Identifier for MarkedSource child edges.
AstNode* marked_source_child_id_;
/// Identifier for MarkedSource code edges.
AstNode* marked_source_code_edge_id_;
/// Identifier for MarkedSource BOX kinds.
AstNode* marked_source_box_id_;
/// Identifier for MarkedSource TYPE kinds.
AstNode* marked_source_type_id_;
/// Identifier for MarkedSource PARAMETER kinds.
AstNode* marked_source_parameter_id_;
/// Identifier for MarkedSource IDENTIFIER kinds.
AstNode* marked_source_identifier_id_;
/// Identifier for MarkedSource CONTEXT kinds.
AstNode* marked_source_context_id_;
/// Identifier for MarkedSource INITIALIZER kinds.
AstNode* marked_source_initializer_id_;
/// Identifier for MarkedSource MODIFIER kinds.
AstNode* marked_source_modifier_id_;
/// Identifier for MarkedSource PARAMETER_LOOKUP_BY_PARAM kinds.
AstNode* marked_source_parameter_lookup_by_param_id_;
/// Identifier for MarkedSource LOOKUP_BY_PARAM kinds.
AstNode* marked_source_lookup_by_param_id_;
/// Identifier for MarkedSource PARAMETER_LOOKUP_BY_TPARAM kinds.
AstNode* marked_source_parameter_lookup_by_tparam_id_;
/// Identifier for MarkedSource LOOKUP_BY_TPARAM kinds.
AstNode* marked_source_lookup_by_tparam_id_;
/// Identifier for MarkedSource LOOKUP_BY_PARAM_WITH_DEFAULTS kinds.
AstNode* marked_source_parameter_lookup_by_param_with_defaults_id_;
/// Identifier for MarkedSource LOOKUP_BY_TYPED kinds.
AstNode* marked_source_lookup_by_typed_id_;
/// Identifier for MarkedSource kind facts.
AstNode* marked_source_kind_id_;
/// Identifier for MarkedSource pre_text facts.
AstNode* marked_source_pre_text_id_;
/// Identifier for MarkedSource post_child_text facts.
AstNode* marked_source_post_child_text_id_;
/// Identifier for MarkedSource post_text facts.
AstNode* marked_source_post_text_id_;
/// Identifier for MarkedSource lookup_index facts.
AstNode* marked_source_lookup_index_id_;
/// Identifier for MarkedSource default_children_count facts.
AstNode* marked_source_default_children_count_id_;
/// Identifier for MarkedSource add_final_list_token facts.
AstNode* marked_source_add_final_list_token_id_;
/// Identifier for MarkedSource link edges.
AstNode* marked_source_link_id_;
/// Identifier for MarkedSource true values.
AstNode* marked_source_true_id_;
/// Identifier for MarkedSource false values.
AstNode* marked_source_false_id_;
/// Maps from file content to (verified) VName.
absl::flat_hash_map<Symbol, AstNode*> content_to_vname_;
/// Find file vnames by examining file content.
bool file_vnames_ = true;
/// If file_vnames_ is true, only warn if we can't find a file's VName.
bool allow_missing_file_vnames_ = false;
/// Use the fast solver.
bool use_fast_solver_ = false;
/// Sentinel value for a known file.
Symbol known_file_sym_;
/// Sentinel value for a known nonfile.
Symbol known_not_file_sym_;
/// Maps VNames to known_file_sym_, known_not_file_sym_, or file text.
absl::flat_hash_map<InternedVName, Symbol> fast_solver_files_;
/// File corpus to use if none is set on a file node.
AstNode* default_file_corpus_;
/// The symbol for the empty string.
Symbol empty_string_sym_;
};
} // namespace verifier
} // namespace kythe
#endif // KYTHE_CXX_VERIFIER_H_