forked from npshub/mantid
-
Notifications
You must be signed in to change notification settings - Fork 0
/
RegexStrings.cpp
352 lines (319 loc) · 12 KB
/
RegexStrings.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
// Mantid Repository : https://github.com/mantidproject/mantid
//
// Copyright © 2018 ISIS Rutherford Appleton Laboratory UKRI,
// NScD Oak Ridge National Laboratory, European Spallation Source,
// Institut Laue - Langevin & CSNS, Institute of High Energy Physics, CAS
// SPDX - License - Identifier: GPL - 3.0 +
#include "MantidKernel/RegexStrings.h"
#include "MantidKernel/Logger.h"
#include "MantidKernel/Strings.h"
#include <algorithm>
#include <vector>
namespace Mantid {
namespace Kernel {
namespace Strings {
namespace {
Logger logger("Regex");
}
/**
Find the match in regular expression and places number in Aout
@param Text :: string to search
@param Re :: regular expression to use
@param Aout :: Place to put Unit found
@param compNum :: item to extract [0:N-1]
@return 0 on failure and 1 on success
*/
template <typename T> int StrComp(const std::string &Text, const boost::regex &Re, T &Aout, const int compNum) {
boost::sregex_iterator m1(Text.begin(), Text.end(), Re);
boost::sregex_iterator empty;
// Failed search
if (m1 == empty || static_cast<int>((*m1).size()) < compNum)
return 0;
int count = compNum;
for (; count != 0; count--)
m1++;
return convert((*m1)[0].str(), Aout);
}
/**
Find the match in regular expression and places number in Aout
@param Text :: string to search
@param Re :: regular expression to use
@param Aout :: Place to put Unit found
@param compNum :: item to extract [0:N-1]
@return 0 on failure and 1 on success
*/
template <typename T> int StrComp(const char *Text, const boost::regex &Re, T &Aout, const int compNum) {
return StrComp(std::string(Text), Re, Aout, compNum);
}
/**
Find the match in regular expression and return 1 if good match
@param Text :: string to match
@param Re :: regular expression to use
@return 0 on failure and 1 on success
*/
int StrLook(const std::string &Text, const boost::regex &Re) {
boost::sregex_iterator m1(Text.begin(), Text.end(), Re);
boost::sregex_iterator empty;
// Failed search
if (m1 == empty)
return 0;
return 1;
}
/**
Find the match, return the disected items.
Note it is complementary to support.h StrParts(Sdx)
@param Sdx :: Input string (note implicit copy since altered)
@param Re :: Regular expression for separator component
@return vector of string components
*/
std::vector<std::string> StrParts(std::string Sdx, const boost::regex &Re) {
std::vector<std::string> Aout;
boost::regex_split(std::back_inserter(Aout), Sdx,
Re); // Destroys string in process
return Aout;
}
/**
Find the match, return the disected items:
Then remove the whole of the match
The regexpression must have one ( ) around the area to extract
@param Text :: string to split, is returned with the string after
the find (if successful).
@param Re :: regular expression to use.
@param Aout :: Value to extract
@param compNum :: Index of matches [0->N-1] (-1 :: whole match)
@retval 0 :: failed to match the string or there were no parts to match.
@retval 1 :: success
*/
template <typename T> int StrFullCut(std::string &Text, const boost::regex &Re, T &Aout, const int compNum) {
boost::sregex_iterator m1(Text.begin(), Text.end(), Re);
boost::sregex_iterator empty;
if (m1 == empty)
return 0;
if (compNum + 1 >= static_cast<int>(m1->size()))
return 0;
// Mantid::Kernel::Strings::Convert to required output form
if (!Mantid::Kernel::Strings::convert((*m1)[compNum + 1].str(), Aout))
return 0;
// Found object
unsigned int zero = 0; // Needed for boost 1.40 (can't just put 0 in next line)
Text.erase(m1->position(zero), (*m1)[0].str().length());
return 1;
}
/**
Find the match, return the disected items:
Then remove the whole of the match
The regexpression must have one ( ) around the area to extract
@param Text :: string to split, is returned with the string after
the find (if successful).
@param Re :: regular expression to use.
@param Aout :: Values to extract
@retval 0 :: failed to match the string or there were no parts to match.
@retval 1 :: success
*/
template <typename T> int StrFullCut(std::string &Text, const boost::regex &Re, std::vector<T> &Aout) {
boost::sregex_iterator m1(Text.begin(), Text.end(), Re);
boost::sregex_iterator empty;
if (m1 == empty)
return 0;
logger.information() << "SFC :: \n";
Aout.clear();
unsigned int zero = 0; // Needed for boost 1.40
const size_t M0 = m1->position(zero);
size_t ML = M0;
for (; m1 != empty; m1++) {
for (unsigned int index = 1; index < m1->size(); index++) {
T tmp;
if (!Mantid::Kernel::Strings::convert((*m1)[index].str(), tmp))
return 0;
Aout.emplace_back(tmp);
}
ML = m1->position(zero) + (*m1)[0].str().length();
}
logger.information() << "SFC :: " << M0 << " " << ML << '\n';
// Found object
Text.erase(M0, ML);
return 1;
}
/**
Find the match, return the disected items:
Then remove the whole of the match
The regexpression must have one ( ) around the area to extract
This is specialised for string and thus does not need
a convert.
@param Text :: string to split, is returned with the string after
the find (if successful).
@param Re :: regular expression to use.
@param Aout :: Values to extract
@retval 0 :: failed to match the string or there were no parts to match.
@retval 1 :: success
*/
template <> int StrFullCut(std::string &Text, const boost::regex &Re, std::vector<std::string> &Aout) {
boost::sregex_iterator m1(Text.begin(), Text.end(), Re);
boost::sregex_iterator empty;
if (m1 == empty)
return 0;
unsigned int zero = 0; // Needed for boost 1.40
const auto M0 = static_cast<int>(m1->position(zero));
int ML = M0;
for (; m1 != empty; m1++) {
ML = static_cast<int>(m1->position(zero) + (*m1)[0].str().length());
for (unsigned int index = 1; index < m1->size(); index++)
Aout.emplace_back((*m1)[index].str());
}
logger.information() << "SFC :: " << M0 << " " << ML << '\n';
// Found object
Text.erase(M0, ML);
return 1;
}
/**
Find the match, return the disected items
The rege xpression must have ( ) around the area to extract.
The function appends the results onto Aout.
@param text :: string to split, is returned with the string after
the find (if successful).
@param Re :: regular expression to use.
@param Aout :: vector to add components to.
@retval 0 :: failed to match the string or there were no parts to match.
@retval Number :: number of components added to Aout.
*/
template <typename T> int StrFullSplit(const std::string &text, const boost::regex &Re, std::vector<T> &Aout) {
boost::sregex_iterator m1(text.begin(), text.end(), Re);
boost::sregex_iterator empty;
for (; m1 != empty; m1++)
for (unsigned int index = 1; index < m1->size(); index++) {
T tmp;
if (!Mantid::Kernel::Strings::convert((*m1)[index].str(), tmp))
return static_cast<int>(Aout.size());
Aout.emplace_back(tmp);
}
return static_cast<int>(Aout.size());
}
/**
Find the match, return the disected items
The regexpression must have ( ) around the area to extract.
The function appends the results onto Aout.
@param text :: string to split, is returned with the string after
the find (if successful).
@param Re :: regular expression to use.
@param Aout :: vector to add components to.
@retval 0 :: failed to match the string or there were no parts to match.
@retval Number :: number of components added to Aout.
*/
template <typename T> int StrSingleSplit(const std::string &text, const boost::regex &Re, std::vector<T> &Aout) {
boost::sregex_iterator m1(text.begin(), text.end(), Re);
boost::sregex_iterator empty;
if (m1 != empty)
for (unsigned int index = 1; index < m1->size(); index++) {
T tmp;
if (!Mantid::Kernel::Strings::convert((*m1)[index].str(), tmp))
return static_cast<int>(Aout.size());
Aout.emplace_back(tmp);
}
return static_cast<int>(Aout.size());
}
/**
Find the match, return the disected items
The regexpression must have ( ) around the area to extract.
The function appends the results onto Aout.
- Specialised to avoid convert for std::string
@param text :: string to split, is returned with the string after
the find (if successful).
@param Re :: regular expression to use.
@param Aout :: vector to add components to.
@retval 0 :: failed to match the string or there were no parts to match.
@retval Number :: number of components added to Aout.
*/
template <> int StrSingleSplit(const std::string &text, const boost::regex &Re, std::vector<std::string> &Aout) {
boost::sregex_iterator m1(text.begin(), text.end(), Re);
boost::sregex_iterator empty;
if (m1 != empty) {
for (unsigned int index = 1; index < m1->size(); index++)
Aout.emplace_back((*m1)[index].str());
return 1;
}
return 0;
}
/**
Finds the start of the tally
@param fh :: open file stream
@param Re :: regular expression to match
@param Out :: string to place match
@return count of line that matched (or zero on failure)
*/
DLLExport int findPattern(std::istream &fh, const boost::regex &Re, std::string &Out) {
char ss[512]; // max of 512
boost::cmatch ans;
int cnt = 1;
fh.getline(ss, 512, '\n');
while (!fh.fail() && !boost::regex_search(ss, ans, Re, boost::match_default)) {
fh.getline(ss, 512, '\n');
cnt++;
}
if (fh.fail())
return 0;
Out = ss;
return cnt;
}
/**
Finds the start of the tally
@param fh :: open file stream
@param Re :: regular expression to match
@param Out :: component in ( ) expression must be first.
@return count of line that matched (or zero on failure)
*/
template <typename T> int findComp(std::istream &fh, const boost::regex &Re, T &Out) {
char ss[512]; // max of 512
boost::cmatch ans;
int cnt(1);
fh.getline(ss, 512, '\n');
while (!fh.fail() && !boost::regex_search(ss, ans, Re, boost::match_default)) {
cnt++;
fh.getline(ss, 512, '\n');
}
if (ans[0].matched) {
std::string xout(ans[1].first, ans[1].second);
if (Mantid::Kernel::Strings::convert(xout, Out))
return cnt;
}
return 0;
}
/**
Finds the start of the tally
@param fh :: open file stream
@param Re :: regular expression to match
@param Out :: component in ( ) expression must be first.
@return count of line that matched (or zero on failure)
*/
template <> DLLExport int findComp(std::istream &fh, const boost::regex &Re, std::string &Out) {
char ss[512]; // max of 512
boost::cmatch ans;
int cnt(1);
fh.getline(ss, 512, '\n');
while (!fh.fail() && !boost::regex_search(ss, ans, Re, boost::match_default)) {
cnt++;
fh.getline(ss, 512, '\n');
}
if (ans[0].matched) {
Out = std::string(ans[1].first, ans[1].second);
return cnt;
}
return 0;
}
/// \cond TEMPLATE
template DLLExport int StrFullCut(std::string &, const boost::regex &, std::string &, const int);
template DLLExport int StrFullCut(std::string &, const boost::regex &, int &, const int);
template DLLExport int StrFullCut(std::string &, const boost::regex &, double &, const int);
template DLLExport int StrFullSplit(const std::string &, const boost::regex &, std::vector<int> &);
template DLLExport int StrFullSplit(const std::string &, const boost::regex &, std::vector<double> &);
template DLLExport int StrFullSplit(const std::string &, const boost::regex &, std::vector<std::string> &);
template DLLExport int StrSingleSplit(const std::string &, const boost::regex &, std::vector<int> &);
template DLLExport int StrSingleSplit(const std::string &, const boost::regex &, std::vector<double> &);
template DLLExport int StrComp(const char *, const boost::regex &, double &, const int);
template DLLExport int StrComp(const char *, const boost::regex &, int &, const int);
template DLLExport int StrComp(const std::string &, const boost::regex &, double &, const int);
template DLLExport int StrComp(const std::string &, const boost::regex &, int &, const int);
template DLLExport int findComp(std::istream &, const boost::regex &, int &);
/// \endcond TEMPLATE
} // NAMESPACE Strings
} // NAMESPACE Kernel
} // NAMESPACE Mantid