-
Notifications
You must be signed in to change notification settings - Fork 3
/
matchers.go
99 lines (85 loc) · 3.81 KB
/
matchers.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
package main
//
// Yapperbot-FRS, the Feedback Request Service bot for Wikipedia
// Copyright (C) 2020 Naypta
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
//
import (
"regexp"
"strings"
"yapperbot-frs/src/ga"
"yapperbot-frs/src/rfc"
)
var rfcMatcher *regexp.Regexp
var gaMatcher *regexp.Regexp
var namedParamMatcher *regexp.Regexp
const rfcIDParam string = "rfcid="
func init() {
// RfC matching regex.
// First capture group is all the params of the rfc template
// (all non-named params are to be treated as categories)
// second capture group is the actual content of the RfC
rfcMatcher = regexp.MustCompile(`(?i){{rfc\|(.*?)}}(.|\n)*?\(UTC\)`)
// GA nom matching regex.
// First capture group is the topic. Second capture group is the subtopic.
// IMPORTANT: If the subtopic is empty, the first capture group will be empty string, and likewise for the topic.
// This regex will always prefer a topic to a subtopic, but will settle for a subtopic if no topic is available.
// Great thanks go to Ouims from #regex on Freenode for the help with debugging and correcting this regex!
gaMatcher = regexp.MustCompile(`(?i){{GA nominee(?:\|(?:[^|}]*?\|)*(?:[\t\f\v ]*?(?:subtopic=([^|}]+).*?)|topic=([^|}]+))|.*?)*}}`)
// Matches against named parameters in the parameter list.
// Ensures the equals is after a named param specifically.
// The [\w\d\s] set means it won't trigger on {{=}} and the like.
namedParamMatcher = regexp.MustCompile(`(?m)^[\w\d\s]*?=`)
}
// extractRfcs takes a string of content containing rfcs, and the page title,
// and returns a slice of rfcs. It can optionally be passed excludeDone, which prevents
// already-done RfCs from being included in the generated list.
// extractRfcs output should be checked for RfCs with no ID string, as those haven't
// yet been assigned an ID by Legobot.
func extractRfcs(content string, title string, excludeDone bool) (rfcs []rfc.RfC, err error) {
matchedRfcTags := rfcMatcher.FindAllStringSubmatch(content, -1)
for _, tag := range matchedRfcTags {
// group 1 contains all the parameters; split on | to find the individual params
params := strings.Split(tag[1], "|")
rfcID, categories, feedbackDone := func(pr []string) (id string, cats map[string]bool, done bool) {
cats = make(map[string]bool)
for _, p := range pr {
if strings.Contains(p, rfcIDParam) {
id = strings.TrimPrefix(p, rfcIDParam)
} else if !namedParamMatcher.MatchString(p) {
// it's not a named param, we can assume it's a category
// for more on why this is like this, see frsRequesting
cats[p] = true
}
}
if id != "" && rfc.AlreadyDone(id) {
done = true
}
return
}(params)
if feedbackDone && excludeDone {
continue
} else {
rfcs = append(rfcs, rfc.RfC{ID: rfcID, Categories: categories, FeedbackDone: feedbackDone, PageHolding: title})
}
}
return
}
// extractGANom takes a page name and content that's been nominated for GA,
// and returns the GA nom object.
func extractGANom(content string, title string) (nom ga.Nom) {
matchedGaTag := gaMatcher.FindStringSubmatch(content)
// first capture group is name of topic, if applicable
// second capture group is name of subtopic
nom = ga.Nom{Topic: matchedGaTag[2], Subtopic: matchedGaTag[1], Article: title}
return
}