/
Generic_vs_generic.ml
2870 lines (2723 loc) · 104 KB
/
Generic_vs_generic.ml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
(* Yoann Padioleau
*
* Copyright (C) 2019-2021 r2c
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public License
* version 2.1 as published by the Free Software Foundation, with the
* special exception on linking described in file license.txt.
*
* This library is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the file
* license.txt for more details.
*)
open Common
(* G is the pattern, and B the concrete source code. For now
* we both use the same module but they may differ later
* as the expressivity of the pattern language grows.
*
* subtle: use 'b' to report errors, because 'a' is the sgrep pattern and it
* has no file information usually.
*)
module B = AST_generic
module G = AST_generic
module MV = Metavariable
module Flag = Flag_semgrep
module Config = Config_semgrep_t
module H = AST_generic_helpers
(* optimisations *)
module CK = Caching.Cache_key
module Env = Metavariable_capture
module F = Bloom_filter
open Matching_generic
let logger = Logging.get_logger [ __MODULE__ ]
(*****************************************************************************)
(* Prelude *)
(*****************************************************************************)
(* AST generic vs AST generic code matcher.
*
* This module allows to match some AST elements against other AST elements in
* a flexible way, providing a kind of grep but at a syntactical level.
*
* Most of the boilerplate code was generated by
* $ pfff/meta/gen_code -matcher_gen_all
* using OCaml pad-style reflection (see commons/OCaml.ml) on
* h_program-lang/AST_generic.ml.
*
* See pfff/matcher/fuzzy_vs_fuzzy.ml for another approach.
*
* There are four main features allowing a "pattern" to match some "code":
* - metavariables can match anything (see metavar: tag in this file)
* - '...' can match any sequence (see dots: tag)
* - simple constructs match complex constructs having more details
* (e.g., the absence of attribute in a pattern will still match functions
* having many attributes) (see less-is-ok: tag)
* - the underlying AST uses some normalization (!= is transformed in !(..=))
* to support certain code equivalences (see equivalence: tag)
* - we do not care about differences in spaces/indentations/comments.
* we work at the AST-level.
*
* alternatives:
* - would it be simpler to work on a simpler AST, like a Term language,
* or even a Node/Leaf? or Ast_fuzzy? the "less-is-ok" would be
* difficult with that approach, because you need to know that some
* parts of the AST are attributes/annotations that can be skipped.
* In the same way, code equivalences like name resolution on the AST
* would be more difficult with an untyped-general tree.
*)
(*****************************************************************************)
(* Extra Helpers *)
(*****************************************************************************)
let env_add_matched_stmt rightmost_stmt (tin : tin) =
[ extend_stmts_match_span rightmost_stmt tin ]
(* equivalence: on different indentation
* todo? work? was copy-pasted from XHP sgrep matcher
*)
let m_string_xhp_text sa sb =
if sa =$= sb || (sa =~ "^[\n ]+$" && sb =~ "^[\n ]+$") then return ()
else fail ()
let has_ellipsis_and_filter_ellipsis_gen f xs =
let has_ellipsis = ref false in
let ys =
xs
|> Common.exclude (fun x ->
let res = f x in
if res then has_ellipsis := true;
res)
in
(!has_ellipsis, ys)
let has_ellipsis_and_filter_ellipsis xs =
has_ellipsis_and_filter_ellipsis_gen
(function
| { G.e = G.Ellipsis _; _ } -> true
| _ -> false)
xs
let has_xml_ellipsis_and_filter_ellipsis xs =
has_ellipsis_and_filter_ellipsis_gen
(function
| G.XmlEllipsis _ -> true
| _ -> false)
xs
let has_case_ellipsis_and_filter_ellipsis xs =
has_ellipsis_and_filter_ellipsis_gen
(function
| G.CaseEllipsis _ -> true
| _ -> false)
xs
let has_match_case_ellipsis_and_filter_ellipsis xs =
has_ellipsis_and_filter_ellipsis_gen
(fun (pat, body) ->
match (pat, body) with
| G.PatEllipsis _, { G.e = G.Ellipsis _; _ } -> true
| _ -> false)
xs
let rec obj_and_method_calls_of_expr e =
match e.G.e with
| B.Call ({ e = B.DotAccess (e, tok, fld); _ }, args) ->
let o, xs = obj_and_method_calls_of_expr e in
(o, (fld, tok, args) :: xs)
| _ -> (e, [])
let rec expr_of_obj_and_method_calls (obj, xs) =
match xs with
| [] -> obj
| (fld, tok, args) :: xs ->
let e = expr_of_obj_and_method_calls (obj, xs) in
B.Call (B.DotAccess (e, tok, fld) |> G.e, args) |> G.e
let rec all_suffix_of_list xs =
xs
::
(match xs with
| [] -> []
| _x :: xs -> all_suffix_of_list xs)
let _ =
Common2.example
(all_suffix_of_list [ 1; 2; 3 ] = [ [ 1; 2; 3 ]; [ 2; 3 ]; [ 3 ]; [] ])
(* copy paste of pfff/lang_ml/analyze/module_ml.ml *)
let module_name_of_filename file =
let _d, b, _e = Common2.dbe_of_filename file in
let module_name = String.capitalize_ascii b in
module_name
(*****************************************************************************)
(* Optimisations (caching, bloom filter) *)
(*****************************************************************************)
(* Getters and setters that were left abstract in the cache implementation. *)
let cache_access : tin Caching.Cache.access =
{
get_span_field = (fun tin -> tin.stmts_match_span);
set_span_field = (fun tin x -> { tin with stmts_match_span = x });
get_mv_field = (fun tin -> tin.mv);
set_mv_field = (fun tin mv -> { tin with mv });
}
let stmts_may_match pattern_stmts (stmts : G.stmt list) =
(* We could gather all the strings from the stmts
and perform one intersection, but this is quite slow.
By iterating over each stmt, we can shortcircuit *)
if not !Flag.use_bloom_filter then true
else
let pattern_strs =
Bloom_annotation.set_of_pattern_strings (Ss pattern_stmts)
in
let pats_in_stmt pats (stmt : AST_generic.stmt) =
match stmt.s_bf with
| None -> true
| Some bf -> Set_.subset pats (F.set_of_filter bf)
in
let rec patterns_in_any_stmt pats stmts acc =
match stmts with
| [] -> acc
| stmt :: rest -> (
match acc with
| false -> patterns_in_any_stmt pats rest (pats_in_stmt pats stmt)
| true -> acc)
in
patterns_in_any_stmt pattern_strs stmts true
[@@profiling]
(*****************************************************************************)
(* Name *)
(*****************************************************************************)
(* coupling: modify also m_ident_and_id_info *)
(* You should prefer to use m_ident_and_id_info if you can *)
let m_ident a b =
match (a, b) with
(* metavar: *)
| (str, tok), b when MV.is_metavar_name str ->
envf (str, tok) (MV.Id (b, None))
(* in some languages such as Javascript certain entities like
* fields can use strings for identifiers (e.g., {"myfield": 1}),
* which gives the opportunity to use regexp string for fields
* (e.g., {"=~/.*field/": $X}).
*)
| (stra, _), (strb, _) when Pattern.is_regexp_string stra ->
let re_match = Matching_generic.regexp_matcher_of_regexp_string stra in
if re_match strb then return () else fail ()
(* general case *)
| a, b -> (m_wrap m_string) a b
let m_dotted_name a b =
match (a, b) with
(* TODO: [$X] should match any list *)
| a, b -> (m_list m_ident) a b
(* This is for languages like Python where foo.arg.func is not parsed
* as a qualified name but as a chain of DotAccess.
*)
let make_dotted xs =
match xs with
| [] -> raise Impossible
| x :: xs ->
let base = B.N (B.Id (x, B.empty_id_info ())) |> G.e in
List.fold_left
(fun acc e ->
let tok = Parse_info.fake_info (snd x) "." in
B.DotAccess (acc, tok, B.FN (B.Id (e, B.empty_id_info ()))) |> G.e)
base xs
(* similar to m_list_prefix but binding $X to the whole list *)
let rec m_dotted_name_prefix_ok a b =
match (a, b) with
| [], [] -> return ()
| [ (s, t) ], [ x ] when MV.is_metavar_name s -> envf (s, t) (MV.Id (x, None))
| [ (s, t) ], _ :: _ when MV.is_metavar_name s ->
(* TODO: should we bind it instead to a MV.N IdQualified?
* but it is actually just the qualifier part; the last id
* is not here (even though make_dottd will not care about that)
*)
envf (s, t) (MV.E (make_dotted b))
| xa :: aas, xb :: bbs ->
let* () = m_ident xa xb in
m_dotted_name_prefix_ok aas bbs
(* prefix is ok *)
| [], _ -> return ()
| _ :: _, _ -> fail ()
(* less-is-ok: prefix matching is supported for imports, eg.:
* pattern: import foo.x should match: from foo.x.z.y
*)
let m_module_name_prefix a b =
match (a, b) with
(* metavariable case *)
| G.FileName ((a_str, _) as a1), B.FileName b1 when MV.is_metavar_name a_str
->
(* Bind as a literal string expression so that pretty-printing works.
* This also means that this metavar can match both literal strings and
* filenames with the same string content. *)
envf a1 (MV.E (B.L (B.String b1) |> G.e))
(* dots: '...' on string or regexp *)
| G.FileName a, B.FileName b ->
m_string_ellipsis_or_metavar_or_default
(* TODO figure out what prefix support means here *)
~m_string_for_default:m_filepath_prefix a b
| G.DottedName a1, B.DottedName b1 -> m_dotted_name_prefix_ok a1 b1
| G.FileName _, _
| G.DottedName _, _ ->
fail ()
let m_module_name a b =
match (a, b) with
| G.FileName a1, B.FileName b1 -> (m_wrap m_string) a1 b1
| G.DottedName a1, B.DottedName b1 -> m_dotted_name a1 b1
| G.FileName _, _
| G.DottedName _, _ ->
fail ()
let m_sid a b = if a =|= b then return () else fail ()
let m_resolved_name_kind a b =
match (a, b) with
| G.Local, B.Local -> return ()
| G.EnclosedVar, B.EnclosedVar -> return ()
| G.Param, B.Param -> return ()
| G.Global, B.Global -> return ()
| G.ImportedEntity a1, B.ImportedEntity b1 -> m_dotted_name a1 b1
| G.ImportedModule a1, B.ImportedModule b1 -> m_module_name a1 b1
| G.Macro, B.Macro -> return ()
| G.EnumConstant, B.EnumConstant -> return ()
| G.TypeName, B.TypeName -> return ()
| G.Local, _
| G.Param, _
| G.Global, _
| G.EnclosedVar, _
| G.Macro, _
| G.EnumConstant, _
| G.TypeName, _
| G.ImportedEntity _, _
| G.ImportedModule _, _ ->
fail ()
let _m_resolved_name (a1, a2) (b1, b2) =
let* () = m_resolved_name_kind a1 b1 in
m_sid a2 b2
(* Supports deep expression matching, either when done explicitly (e.g. with deep ellipsis) or implicitly
*
* If "go_deeper_expr" is not enabled, reduces to `first_fun a b`.
* If "go_deeper_expr" is enabled, will first check `first_fun a b`, then, if that match fails, will
* match against all sub-expressions of b.
*
* See m_expr_deep for an example of usage.
*
* deep_fun: Matching function to use when matching sub-expressions
* first_fun: Matching function to use when matching the whole (top-level) expression
* sub_fun: Function to use to extract sub-expressions from b
* a: Pattern expression
* b: Target node
* 't: Type of the target node
*)
let m_deep (deep_fun : G.expr Matching_generic.matcher)
(first_fun : G.expr -> 't -> tin -> tout) (sub_fun : 't -> G.expr list)
(a : G.expr) (b : 't) =
if_config
(fun x -> not x.go_deeper_expr)
~then_:(first_fun a b)
~else_:
( first_fun a b >!> fun () ->
(* less: could use a fold *)
let rec aux xs =
match xs with
| [] -> fail ()
| x :: xs -> deep_fun a x >||> aux xs
in
b |> sub_fun |> aux )
(* start of recursive need *)
(* TODO: factorize with metavariable and aliasing logic in m_expr
* TODO: remove MV.Id and use always MV.N?
*)
let rec m_name a b =
match (a, b) with
(* equivalence: aliasing (name resolving) part 1 *)
| ( a,
B.Id
( idb,
{
B.id_resolved =
{
contents =
Some
( ( B.ImportedEntity dotted
| B.ImportedModule (B.DottedName dotted) ),
_sid );
};
_;
} ) ) ->
m_name a (B.Id (idb, B.empty_id_info ()))
>||> (* try this time a match with the resolved entity *)
m_name a (H.name_of_ids dotted)
| G.Id (a1, a2), B.Id (b1, b2) ->
(* this will handle metavariables in Id *)
m_ident_and_id_info (a1, a2) (b1, b2)
| G.Id ((str, tok), _info), G.IdQualified _ when MV.is_metavar_name str ->
envf (str, tok) (MV.N b)
(* equivalence: aliasing (name resolving) part 2 (mostly for OCaml) *)
| ( G.IdQualified _a1,
B.IdQualified
({
name_info =
{
B.id_resolved =
{ contents = Some (B.ImportedEntity dotted, _sid) };
_;
};
_;
} as nameinfo) ) ->
(* try without resolving anything *)
m_name a (B.IdQualified { nameinfo with name_info = B.empty_id_info () })
>||>
(* try this time by replacing the qualifier by the resolved one *)
let new_qualifier =
match List.rev dotted with
| [] -> raise Impossible
| _x :: xs -> List.rev xs |> List.map (fun id -> (id, None))
in
m_name a
(B.IdQualified
{
nameinfo with
name_middle = Some (B.QDots new_qualifier);
name_info = B.empty_id_info ();
})
(* semantic! try to handle open in OCaml by querying LSP! The
* target code is using an unqualified Id possibly because of some open!
*)
| G.IdQualified { name_last = ida, None; _ }, B.Id (idb, _infob)
when fst ida = fst idb -> (
match !Hooks.get_def idb with
| None -> fail ()
| Some file ->
let m = module_name_of_filename file in
let t = snd idb in
pr2_gen m;
let _n = H.name_of_ids [ (m, t); idb ] in
(* retry with qualified target *)
(* m_name a n *)
return ())
(* boilerplate *)
| G.IdQualified a1, B.IdQualified b1 -> m_name_info a1 b1
| G.Id _, _
| G.IdQualified _, _ ->
fail ()
and m_name_info a b =
match (a, b) with
| ( { G.name_last = a0; name_middle = a1; name_top = a2; name_info = a3 },
{ B.name_last = b0; name_middle = b1; name_top = b2; name_info = b3 } ) ->
let* () = m_ident_and_type_arguments a0 b0 in
let* () = (m_option m_qualifier) a1 b1 in
let* () = (m_option m_tok) a2 b2 in
let* () = m_id_info a3 b3 in
return ()
and m_ident_and_type_arguments (a1, a2) (b1, b2) =
let* () = m_ident a1 b1 in
let* () = m_option m_type_arguments a2 b2 in
return ()
and m_qualifier a b =
match (a, b) with
| G.QDots a, B.QDots b ->
(* TODO? like for m_dotted_name, [$X] should match anything? *)
m_list m_ident_and_type_arguments a b
| G.QExpr (a1, a2), B.QExpr (b1, b2) -> m_expr a1 b1 >>= fun () -> m_tok a2 b2
| G.QDots _, _
| G.QExpr _, _ ->
fail ()
(* semantic! try to handle typed metavariables by querying LSP
* to get inferred type info (only for OCaml for now) *)
and m_type_option_with_hook idb taopt tbopt =
match (taopt, tbopt) with
| Some ta, Some tb -> m_type_ ta tb
| Some ta, None -> (
match !Hooks.get_type idb with
| Some tb -> m_type_ ta tb
| None -> fail ())
(* less-is-ok:, like m_option_none_can_match_some *)
| None, _ -> return ()
(* This is similar to m_ident, but it will also add the id_info in
* the environment (via 'MV.Id (_, Some id_info)') when the pattern is
* a metavariable. This id_info is useful to make sure multiple
* occurences of the same metavariable binds to the same entity thanks
* to the sid stored in the id_info.
*)
and m_ident_and_id_info (a1, a2) (b1, b2) =
(* metavar: *)
match (a1, b1) with
| (str, tok), b when MV.is_metavar_name str ->
(* a bit OCaml specific, cos only ml_to_generic tags id_type in pattern *)
m_type_option_with_hook b1 !(a2.G.id_type) !(b2.B.id_type) >>= fun () ->
m_id_info a2 b2 >>= fun () -> envf (str, tok) (MV.Id (b, Some b2))
(* same code than for m_ident *)
(* in some languages such as Javascript certain entities like
* fields can use strings for identifiers (e.g., {"myfield": 1}),
* which gives the opportunity to use regexp string for fields
* (e.g., {"=~/.*field/": $X}).
*)
| (stra, _), (strb, _) when Pattern.is_regexp_string stra ->
let re_match = Matching_generic.regexp_matcher_of_regexp_string stra in
if re_match strb then return () else fail ()
(* general case *)
| _, _ ->
let* () = m_wrap m_string a1 b1 in
m_id_info a2 b2
and m_ident_and_empty_id_info a1 b1 =
let empty = G.empty_id_info () in
m_ident_and_id_info (a1, empty) (b1, empty)
(* Currently m_id_info is a Nop because the Semgrep pattern
* does not have correct name resolution (see the comment below).
* However, we do use id_info in equal_ast() to check
* whether two $X refers to the same code. In that case we are using
* the id_resolved tag and sid!
*)
and m_id_info a b =
match (a, b) with
| ( { G.id_resolved = _a1; id_type = _a2; id_constness = _a3; id_hidden = _a4 },
{
B.id_resolved = _b1;
id_type = _b2;
id_constness = _b3;
id_hidden = _b4;
} ) ->
(* old: (m_ref m_resolved_name) a3 b3 >>= (fun () ->
* but doing import flask in a source file means every reference
* to flask.xxx will be tagged with a ImportedEntity, but
* semgrep pattern will use flask.xxx directly, without the preceding
* import, without this tag, which would prevent
* matching. We need to correctly resolve names and always compare with
* the resolved_name instead of the name used in the code
* (which can be an alias)
*
* old: (m_ref (m_option m_type_)) a2 b2
* the same is true for types! Now we sometimes propagate type annotations
* in Naming_AST.ml, but we do that in the source file, not the pattern,
* which would prevent a match.
* More generally, id_info is something populated and used on the
* generic AST of the source, not on the pattern, hence we should
* not use it as a condition for matching here. Instead use
* the information in the caller.
*)
return ()
(*****************************************************************************)
(* Expression *)
(*****************************************************************************)
(* possibly go deeper when someone wants that a pattern like
* 'bar();'
* match also an expression statement like
* 'x = bar();'.
*
* This is very hacky.
*
* alternatives:
* - force the user to use 'if(... <expr> ...)' (isaac, jmelton)
* - do as in coccinelle and use 'if(<... <expr> ...>)'
* - CURRENT: impicitely go deep without requiring an extra syntax
*
* todo? we could restrict ourselves to only a few forms? see SubAST_generic.ml
* - x = <expr>,
* - <call>(<exprs).
*)
(* experimental! *)
and m_expr_deep a b =
m_deep m_expr_deep m_expr SubAST_generic.subexprs_of_expr a b
(* coupling: if you add special sgrep hooks here, you should probably
* also add them in m_pattern
*)
and m_expr a b =
Trace_matching.(if on then print_expr_pair a b);
match (a.G.e, b.G.e) with
(* the order of the matches matters! take care! *)
(* equivalence: user-defined equivalence! *)
| G.DisjExpr (a1, a2), _b -> m_expr a1 b >||> m_expr a2 b
(* equivalence: name resolving! *)
(* todo: it would be nice to factorize the aliasing code by just calling
* m_name, but below we use make_dotted, which is different from what
* we do in m_name.
*)
| ( _a,
B.N
(B.Id
( idb,
{
B.id_resolved =
{
contents =
Some
( ( B.ImportedEntity dotted
| B.ImportedModule (B.DottedName dotted) ),
_sid );
};
_;
} )) ) ->
(* We used to force to fully qualify entities in the pattern
* (e.g., with org.foo(...)) but this is confusing for users.
* We now allow an unqualified pattern like 'foo' to match resolved
* entities like import org.foo; foo(), just like for attributes.
*
* bugfix: important to call with empty_id_info() below to avoid
* infinite recursion.
*)
m_expr a (B.N (B.Id (idb, B.empty_id_info ())) |> G.e)
>||> (* try this time a match with the resolved entity *)
m_expr a (make_dotted dotted)
(* equivalence: name resolving on qualified ids (for OCaml) *)
(* Put this before the next case to prevent overly eager dealiasing *)
| G.N (G.IdQualified _ as na), B.N ((B.IdQualified _ | B.Id _) as nb) ->
m_name na nb
(* Matches pattern
* a.b.C.x
* to code
* import a.b.C
* C.x
*)
| ( G.N
(G.IdQualified
{
G.name_last = alabel, None;
name_middle = Some (G.QDots names);
name_top = None;
_;
}),
_b ) ->
(* TODO: double check names does not have any type_args *)
let full = (names |> List.map fst) @ [ alabel ] in
m_expr (make_dotted full) b
| G.DotAccess (_, _, _), B.N b1 -> (
(* Reinterprets a DotAccess expression such as a.b.c as a name, when
* a,b,c are all identifiers. Note that something like a.b.c could get
* parsed as either DotAccess or IdQualified depending on the context
* (e.g., in Python it is always a DotAccess *except* when it occurs
* in an attribute). *)
match H.name_of_dot_access a with
| None -> fail ()
| Some a1 -> m_name a1 b1)
(* $X should not match an IdSpecial in a call context,
* otherwise $X(...) would match a+b because this is transformed in a
* Call(IdSpecial Plus, ...).
* bugfix: note that we must forbid that only in a Call context; we want
* $THIS to match IdSpecial (This) for example.
*)
| ( G.Call ({ e = G.N (G.Id ((str, _tok), _id_info)); _ }, _argsa),
B.Call ({ e = B.IdSpecial _; _ }, _argsb) )
when MV.is_metavar_name str ->
fail ()
(* metavar: *)
(* Matching a generic Id metavariable to an IdSpecial will fail as it is missing the token
* info; instead the Id should match Call(IdSpecial _, _)
*)
| G.N (G.Id ((str, _), _)), B.IdSpecial (B.ConcatString _, _)
| G.N (G.Id ((str, _), _)), B.IdSpecial (B.Instanceof, _)
| G.N (G.Id ((str, _), _)), B.IdSpecial (B.New, _)
when MV.is_metavar_name str ->
fail ()
(* Important to bind to MV.Id when we can, so this must be before
* the next case where we bind to the more general MV.E.
* TODO: should be B.N (B.Id _ | B.IdQualified _)?
*)
| G.N (G.Id _ as na), B.N (B.Id _ as nb) -> m_name na nb
| G.N (G.Id ((str, tok), _id_info)), _b when MV.is_metavar_name str ->
envf (str, tok) (MV.E b)
(* metavar: typed! *)
| G.TypedMetavar ((str, tok), _, t), _b when MV.is_metavar_name str ->
m_compatible_type (str, tok) t b
(* dots: should be patterned-match before in arguments, or statements,
* but this is useful for keyword parameters, as in f(..., foo=..., ...)
*)
| G.Ellipsis _a1, _ -> return ()
| G.DeepEllipsis (_, a1, _), _b ->
m_expr_deep a1 b (*e: [[Generic_vs_generic.m_expr()]] ellipsis cases *)
(* must be before constant propagation case below *)
| G.L a1, B.L b1 -> m_literal a1 b1
(* equivalence: constant propagation and evaluation!
* TODO: too late, must do that before 'metavar:' so that
* const a = "foo"; ... a == "foo" would be catched by $X == $X.
*)
| G.L a1, _b ->
if_config
(fun x -> x.Config.constant_propagation)
~then_:
(match
Normalize_generic.constant_propagation_and_evaluate_literal b
with
| Some b1 -> m_literal_constness a1 b1
| None -> fail ())
~else_:(fail ())
| G.Container (G.Array, a2), B.Container (B.Array, b2) ->
(m_bracket m_container_ordered_elements) a2 b2
| G.Container (G.List, a2), B.Container (B.List, b2) ->
(m_bracket m_container_ordered_elements) a2 b2
| G.Container (G.Tuple, a1), B.Container (B.Tuple, b1) ->
(m_container_ordered_elements |> m_bracket) a1 b1
| ( G.Container (((G.Set | G.Dict) as a1), (_, a2, _)),
B.Container (((B.Set | B.Dict) as b1), (_, b2, _)) ) ->
m_container_set_or_dict_unordered_elements (a1, a2) (b1, b2)
(* dots: equivalence: Container and Comprehension *)
| ( G.Container (akind, (_, [ { e = G.Ellipsis _; _ } ], _)),
B.Comprehension (bkind, _) ) ->
m_container_operator akind bkind
(* dots '...' for string literal:
* Interpolated strings are transformed into Call(Special(Concat, ...),
* hence we want Python patterns like f"...{$X}...", which are expanded to
* Call(Special(Concat, [L"..."; Id "$X"; L"..."])) to
* match concrete code like f"foo{a}" such that "..." is seemingly
* matching 0 or more literal expressions.
* bugfix: note that we want to do that only when inside
* Call(Special(Concat(...))), hence the special m_arguments_concat below,
* otherwise regular call patterns like foo("...") would match code like
* foo().
*)
| ( G.Call ({ e = G.IdSpecial (G.ConcatString akind, _a1); _ }, a2),
B.Call ({ e = B.IdSpecial (B.ConcatString bkind, _b1); _ }, b2) ) ->
m_concat_string_kind akind bkind >>= fun () ->
m_bracket m_arguments_concat a2 b2
(* The pattern '$X = 1 + 2 + ...' is parsed as '$X = (1 + 2) + ...', but
* if the concrete code is 'foo = 1 + 2 + 3 + 4', this will naively not
* match because (1+2)+... will be matched against ((1+2)+3)+4 and fails.
* The ellipsis operator with binary operators should be more flexible
* and allows any number of additional arguments, which when translated
* in Call() means we need to go deeper.
*)
| ( G.Call
( { e = G.IdSpecial (G.Op aop, _toka); _ },
(_, [ G.Arg a1; G.Arg { e = G.Ellipsis _tdots; _ } ], _) ),
B.Call
( { e = B.IdSpecial (B.Op bop, _tokb); _ },
(_, [ B.Arg b1; B.Arg _b2 ], _) ) )
(* This applies to any binary operation! Associative operators (declared
* in AST_generic_helpers.is_associative_operator) are better handled by
* m_call_op below. *)
when (not (H.is_associative_operator aop)) || aop <> bop ->
m_arithmetic_operator aop bop >>= fun () ->
m_expr a1 b1 >!> fun () ->
(* try again deeper on b1 *)
m_expr a b1
| ( G.Call ({ e = G.IdSpecial (G.Op aop, toka); _ }, aargs),
B.Call ({ e = B.IdSpecial (B.Op bop, tokb); _ }, bargs) ) ->
m_call_op aop toka aargs bop tokb bargs
(* TODO? should probably do some equivalence like allowing extra
* paren in the pattern to still match code without parens
*)
| G.ParenExpr a1, B.ParenExpr b1 -> m_bracket m_expr a1 b1
(* boilerplate *)
| G.Call (a1, a2), B.Call (b1, b2) ->
m_expr a1 b1 >>= fun () -> m_arguments a2 b2
| G.Assign (a1, at, a2), B.Assign (b1, bt, b2) -> (
m_expr a1 b1
>>= (fun () -> m_tok at bt >>= fun () -> m_expr a2 b2)
(* If the code has tuples as b1 and b2 and the lengths of
* the tuples are equal, create a tuple of (variable, value)
* pairs and try to match the pattern with each entry in the tuple.
* This should enable multiple assignments if the number of
* variables and values are equal. *)
>||>
match (b1.e, b2.e) with
| B.Container (B.Tuple, (_, vars, _)), B.Container (B.Tuple, (_, vals, _))
when List.length vars = List.length vals ->
let create_assigns expr1 expr2 = B.Assign (expr1, bt, expr2) |> G.e in
let mult_assigns = List.map2 create_assigns vars vals in
let rec aux xs =
match xs with
| [] -> fail ()
| x :: xs -> m_expr a x >||> aux xs
in
aux mult_assigns
| _, _ -> fail ())
(* bugfix: we also want o. ... .foo() to match o.foo(), but
* o. ... would be matched against just 'o', so we need this
* extra case.
*)
| ( G.DotAccess (({ e = G.DotAccessEllipsis (a1_1, _a1_2); _ } as a1), at, a2),
B.DotAccess (b1, bt, b2) ) ->
let* () = m_expr a1 b1 >||> m_expr a1_1 b1 in
let* () = m_tok at bt in
m_field_name a2 b2
| G.DotAccess (a1, at, a2), B.DotAccess (b1, bt, b2) ->
m_expr a1 b1 >>= fun () ->
m_tok at bt >>= fun () -> m_field_name a2 b2
(* <a1> ... vs o.m1().m2().m3().
* Remember than o.m1().m2().m3() is parsed as (((o.m1()).m2()).m3())
*)
| ( G.DotAccessEllipsis (a1, _a2),
(B.DotAccess _ | B.Call ({ e = B.DotAccess _; _ }, _)) ) ->
(* => o, [m3();m2();m1() *)
let obj, ys = obj_and_method_calls_of_expr b in
(* the method chain ellipsis can match 0 or more of those method calls *)
let candidates = all_suffix_of_list ys in
let rec aux xxs =
match xxs with
| [] -> fail ()
| xs :: xxs ->
let b = expr_of_obj_and_method_calls (obj, xs) in
m_expr a1 b >||> aux xxs
in
aux candidates
| G.ArrayAccess (a1, a2), B.ArrayAccess (b1, b2) ->
m_expr a1 b1 >>= fun () -> m_bracket m_expr a2 b2
| G.Record a1, B.Record b1 -> (m_bracket m_fields) a1 b1
| G.Constructor (a1, a2), B.Constructor (b1, b2) ->
m_name a1 b1 >>= fun () -> m_bracket (m_list m_expr) a2 b2
| G.Comprehension (a1, a2), B.Comprehension (b1, b2) ->
let* () = m_container_operator a1 b1 in
m_bracket m_comprehension a2 b2
| G.Lambda a1, B.Lambda b1 ->
m_function_definition a1 b1 >>= fun () -> return ()
| G.AnonClass a1, B.AnonClass b1 -> m_class_definition a1 b1
| G.IdSpecial a1, B.IdSpecial b1 -> m_wrap m_special a1 b1
(* This is mainly for Go which generates an AssignOp (Eq)
* for the x := a short variable declaration.
* TODO: this should be a configurable equivalence: $X = $Y ==> $X := $Y.
* Some people want it, some people may not want it.
* At least we dont do the opposite (AssignOp matching Assign) so
* using := in a pattern will not match code using just =
* (but pattern using = will match both code using = or :=).
*)
| G.Assign (a1, a2, a3), B.AssignOp (b1, (B.Eq, b2), b3) ->
m_expr (G.Assign (a1, a2, a3) |> G.e) (B.Assign (b1, b2, b3) |> G.e)
| G.AssignOp (a1, a2, a3), B.AssignOp (b1, b2, b3) ->
m_expr a1 b1 >>= fun () ->
m_wrap m_arithmetic_operator a2 b2 >>= fun () -> m_expr a3 b3
| G.Xml a1, B.Xml b1 -> m_xml a1 b1
| G.LetPattern (a1, a2), B.LetPattern (b1, b2) ->
m_pattern a1 b1 >>= fun () -> m_expr a2 b2
| G.SliceAccess (a1, a2), B.SliceAccess (b1, b2) ->
let f = m_option m_expr in
m_expr a1 b1 >>= fun () -> m_bracket (m_tuple3 f f f) a2 b2
| G.Conditional (a1, a2, a3), B.Conditional (b1, b2, b3) ->
m_expr a1 b1 >>= fun () ->
m_expr a2 b2 >>= fun () -> m_expr a3 b3
| G.Yield (a0, a1, a2), B.Yield (b0, b1, b2) ->
m_tok a0 b0 >>= fun () ->
m_option m_expr a1 b1 >>= fun () -> m_bool a2 b2
| G.Await (a0, a1), B.Await (b0, b1) -> m_tok a0 b0 >>= fun () -> m_expr a1 b1
| G.Cast (a1, at, a2), B.Cast (b1, bt, b2) ->
m_type_ a1 b1 >>= fun () ->
m_tok at bt >>= fun () -> m_expr a2 b2
| G.Seq a1, B.Seq b1 -> (m_list m_expr) a1 b1
| G.Ref (a0, a1), B.Ref (b0, b1) -> m_tok a0 b0 >>= fun () -> m_expr a1 b1
| G.DeRef (a0, a1), B.DeRef (b0, b1) -> m_tok a0 b0 >>= fun () -> m_expr a1 b1
| G.StmtExpr a1, B.StmtExpr b1 -> m_stmt a1 b1
| G.OtherExpr (a1, a2), B.OtherExpr (b1, b2) ->
m_todo_kind a1 b1 >>= fun () -> (m_list m_any) a2 b2
| G.Container _, _
| G.Comprehension _, _
| G.Record _, _
| G.Constructor _, _
| G.Lambda _, _
| G.AnonClass _, _
| G.N _, _
| G.IdSpecial _, _
| G.Call _, _
| G.ParenExpr _, _
| G.Xml _, _
| G.Assign _, _
| G.AssignOp _, _
| G.LetPattern _, _
| G.DotAccess _, _
| G.ArrayAccess _, _
| G.SliceAccess _, _
| G.Conditional _, _
| G.Yield _, _
| G.Await _, _
| G.Cast _, _
| G.Seq _, _
| G.Ref _, _
| G.DeRef _, _
| G.StmtExpr _, _
| G.OtherExpr _, _
| G.TypedMetavar _, _
| G.DotAccessEllipsis _, _ ->
fail ()
and m_entity_name a b =
match (a, b) with
| G.EN a1, B.EN b1 -> m_name a1 b1
| G.EN (G.Id ((str, tok), _idinfoa)), B.EDynamic b1
when MV.is_metavar_name str ->
envf (str, tok) (MV.E b1)
(* boilerplate *)
| G.EDynamic a, B.EDynamic b -> m_expr a b
| G.EPattern a, B.EPattern b -> m_pattern a b
| G.OtherEntity (a1, a2), B.OtherEntity (b1, b2) ->
m_todo_kind a1 b1 >>= fun () -> (m_list m_any) a2 b2
| G.EN _, _
| G.EPattern _, _
| G.OtherEntity _, _
| G.EDynamic _, _ ->
fail ()
and m_field_name a b =
match (a, b) with
| G.FN a1, B.FN b1 -> m_name a1 b1
| G.FN (G.Id ((str, tok), _idinfoa)), B.FDynamic b1
when MV.is_metavar_name str ->
envf (str, tok) (MV.E b1)
(* boilerplate *)
| G.FDynamic a, B.FDynamic b -> m_expr a b
| G.FN _, _
| G.FDynamic _, _ ->
fail ()
and m_label_ident a b =
match (a, b) with
| G.LNone, B.LNone -> return ()
| G.LId a, B.LId b -> m_label a b
| G.LInt a, B.LInt b -> m_wrap m_int a b
| G.LDynamic a, B.LDynamic b -> m_expr a b
| G.LNone, _
| G.LId _, _
| G.LInt _, _
| G.LDynamic _, _ ->
fail ()
and m_comprehension (a1, a2) (b1, b2) =
let* () = m_expr a1 b1 in
m_list m_for_or_if_comp a2 b2
and m_for_or_if_comp a b =
match (a, b) with
| G.CompFor (a1, a2, a3, a4), B.CompFor (b1, b2, b3, b4) ->
let* () = m_tok a1 b1 in
let* () = m_pattern a2 b2 in
let* () = m_tok a3 b3 in
let* () = m_expr a4 b4 in
return ()
| G.CompIf (a1, a2), B.CompIf (b1, b2) ->
let* () = m_tok a1 b1 in
let* () = m_expr a2 b2 in
return ()
| G.CompFor _, _
| G.CompIf _, _ ->
fail ()
and m_literal a b =
Trace_matching.(if on then print_literal_pair a b);
match (a, b) with
(* dots: metavar: '...' and metavars on string/regexps/atoms *)
| G.String a, B.String b -> m_string_ellipsis_or_metavar_or_default a b
| G.Atom (_, a), B.Atom (_, b) -> m_ellipsis_or_metavar_or_string a b
| G.Regexp (a1, a2), B.Regexp (b1, b2) -> (
let* () = m_bracket m_ellipsis_or_metavar_or_string a1 b1 in
match (a2, b2) with
(* less_is_ok: *)
| None, _ -> return ()
| Some a, Some b -> m_ellipsis_or_metavar_or_string a b
| Some _, None -> fail ())
(* boilerplate *)
| G.Unit a1, B.Unit b1 -> m_tok a1 b1
| G.Bool a1, B.Bool b1 -> (m_wrap m_bool) a1 b1
| G.Int a1, B.Int b1 -> m_wrap_m_int_opt a1 b1
| G.Float a1, B.Float b1 -> m_wrap_m_float_opt a1 b1
| G.Imag a1, B.Imag b1 -> (m_wrap m_string) a1 b1
| G.Ratio a1, B.Ratio b1 -> (m_wrap m_string) a1 b1
| G.Char a1, B.Char b1 -> (m_wrap m_string) a1 b1
| G.Null a1, B.Null b1 -> m_tok a1 b1
| G.Undefined a1, B.Undefined b1 -> m_tok a1 b1
| G.Unit _, _
| G.Bool _, _
| G.Int _, _
| G.Float _, _
| G.Char _, _
| G.String _, _
| G.Regexp _, _
| G.Null _, _
| G.Undefined _, _
| G.Imag _, _
| G.Ratio _, _
| G.Atom _, _ ->
fail ()
and m_wrap_m_int_opt (a1, a2) (b1, b2) =
match (a1, b1) with
(* iso: semantic equivalence of value! 0x8 can match 8 *)
| Some i1, Some i2 -> if i1 =|= i2 then return () else fail ()
(* if the integers (or floats) were too large or were using
* a syntax OCaml int_of_string could not parse,
* we default to a string comparison *)
| _ ->
let a1 = Parse_info.str_of_info a2 in
(* bugfix: not that with constant propagation, some integers don't have
* a real token associated with them, so b2 may be a FakeTok, but
* Parse_info.str_of_info does not raise an exn anymore on a FakeTok
*)
let b1 = Parse_info.str_of_info b2 in
m_wrap m_string (a1, a2) (b1, b2)
and m_wrap_m_float_opt (a1, a2) (b1, b2) =
match (a1, b1) with
(* iso: semantic equivalence of value! 0x8 can match 8 *)
| Some i1, Some i2 when i1 = i2 -> return ()
| _ ->
let a1 = Parse_info.str_of_info a2 in
let b1 = Parse_info.str_of_info b2 in
m_wrap m_string (a1, a2) (b1, b2)
and m_literal_constness a b =
match b with
| B.Lit b1 -> m_literal a b1
| B.Cst B.Cstr -> (
match a with
| G.String ("...", _) -> return ()
| ___else___ -> fail ())
| B.Cst _
| B.NotCst ->
fail ()
and m_match_cases a b =
let has_ellipsis, a = has_match_case_ellipsis_and_filter_ellipsis a in
m_list_in_any_order ~less_is_ok:has_ellipsis m_action a b
and m_action (a : G.action) (b : G.action) =
match (a, b) with
| (a1, a2), (b1, b2) -> m_pattern a1 b1 >>= fun () -> m_expr a2 b2
and m_arithmetic_operator a b =