Skip to content

Commit

Permalink
chore: temporary template for new sorting
Browse files Browse the repository at this point in the history
  • Loading branch information
m62624 committed Aug 31, 2023
1 parent ae6561b commit 5a51a7f
Show file tree
Hide file tree
Showing 12 changed files with 88 additions and 746 deletions.
3 changes: 1 addition & 2 deletions .github/workflows/ci_cd.yml
Original file line number Diff line number Diff line change
Expand Up @@ -142,7 +142,6 @@ jobs:
docker run ${{ env.IMAGE_DOCKER }}:latest \
cargo test --manifest-path ./flexible_inspect_py/Cargo.toml
packages-js:
needs: [core-rust-tests, wasm-lib-tests]
runs-on: ubuntu-latest
Expand Down Expand Up @@ -239,7 +238,7 @@ jobs:
name: javascirpt-npm-${{ github.run_id }}
path: pkg
- name: Compress pkg folder (wasm->tgz)
run: |
run: |
tar czf npm-wasm-for-web.tgz -C ./pkg/pkg-web .; \
tar czf npm-wasm-for-node.tgz -C ./pkg/pkg-nodejs .;
- name: Extract changelog content
Expand Down
8 changes: 5 additions & 3 deletions flexible_inspect_rs/src/rules/common_elements/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,15 @@ pub mod range;
pub const DEFAULT_CAPTURE: &str = "main_capture";
// =======================================================

/// The struct for sorting all nested rules
/// The struct for sorting all nested rulesz
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
pub struct SlisedRules {
/// The rules are in the `IndexSet` collection to preserve
/// the order of the rules during index retrieval from the `RegexSet` and to avoid duplicate rules
pub simple_rules: IndexSet<Rule>,
pub complex_rules: IndexSet<Rule>,
pub smr_must_be_found: IndexSet<Rule>,
pub smr_must_not_be_found_with_subrules: IndexSet<Rule>,
pub smr_must_not_be_found_without_subrules: IndexSet<Rule>,
pub cmr: IndexSet<Rule>,
}

/// A Structure for common `Rule` modifiers
Expand Down
43 changes: 34 additions & 9 deletions flexible_inspect_rs/src/rules/init.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,27 +16,52 @@ impl GeneralModifiers {
impl SlisedRules {
/// The method for sorting all nested rules
pub fn new<T: IntoIterator<Item = Rule>>(all_rules: T) -> SlisedRules {
let mut o_simple_rules = IndexSet::new();
let mut o_complex_rules = IndexSet::new();
// smr - simple rules
// cmr - complex rules

// Based on this, we validate through `RegexSet` items,
// if we found less items than there are in the collections, then the validation failed
let mut smr_must_be_found = IndexSet::new();
// Based on this, we simply check with `RegexSet`.
let mut smr_must_not_be_found_with_subrules = IndexSet::new();
// Based on this, we validate through `RegexSet` items, if we found even one item from this collection, then the validation failed
let mut smr_must_not_be_found_without_subrules = IndexSet::new();
let mut cmr = IndexSet::new();
all_rules
.into_iter()
.for_each(|rule| match rule.0.str_with_type {
RegexRaw::DefaultRegex(_) => {
o_simple_rules.insert(rule);
}
RegexRaw::DefaultRegex(_) => match rule.0.general_modifiers.requirement {
MatchRequirement::MustBeFound => {
smr_must_be_found.insert(rule);
}
MatchRequirement::MustNotBeFound => match rule.0.subrules {
Some(subrules) => {
smr_must_not_be_found_with_subrules.insert(rule);
}
None => {
smr_must_not_be_found_without_subrules.insert(rule);
}
},
},
RegexRaw::FancyRegex(_) => {
o_complex_rules.insert(rule);
cmr.insert(rule);
}
});

SlisedRules {
simple_rules: o_simple_rules,
complex_rules: o_complex_rules,
smr_must_be_found,
smr_must_not_be_found_with_subrules,
smr_must_not_be_found_without_subrules,
cmr,
}
}

/// A method for checking if there are any rules
pub fn is_some(&self) -> bool {
!self.simple_rules.is_empty() || !self.complex_rules.is_empty()
!self.smr_must_be_found.is_empty()
|| !self.smr_must_not_be_found_with_subrules.is_empty()
|| !self.smr_must_not_be_found_without_subrules.is_empty()
|| !self.cmr.is_empty()
}
}

Expand Down
5 changes: 1 addition & 4 deletions flexible_inspect_rs/src/rules/rule_str/another_traits.rs
Original file line number Diff line number Diff line change
Expand Up @@ -62,10 +62,7 @@ mod hash_trait {

impl Hash for Subrules {
fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
self.simple_rules.hash(state);
if let Some(value) = &self.simple_rules {
value.hash(state);
}

}
}
}
Expand Down
9 changes: 1 addition & 8 deletions flexible_inspect_rs/src/rules/rule_str/base.rs
Original file line number Diff line number Diff line change
Expand Up @@ -30,14 +30,7 @@ impl RuleBase for Rule {
self.0.subrules.as_ref()
}

fn get_simple_rules(&self) -> Option<(&IndexSet<Self::RuleType>, &Self::RegexSet)> {
if let Some(subrules) = self.get_subrules() {
if let Some(simple_rules) = &subrules.simple_rules {
return Some((&simple_rules.all_rules, &simple_rules.regex_set.regex_set));
}
}
None
}


fn get_complex_rules(&self) -> Option<&IndexSet<Self::RuleType>> {
if let Some(subrules) = self.get_subrules() {
Expand Down
4 changes: 3 additions & 1 deletion flexible_inspect_rs/src/rules/rule_str/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,9 @@ pub struct Subrules {
pub struct SimpleRules {
/// The rules are in the `IndexSet` collection to preserve
/// the order of the rules during index retrieval from the `RegexSet` and to avoid duplicate rules
pub all_rules: IndexSet<Rule>,
pub smr_must_be_found: IndexSet<Rule>,
pub smr_must_not_be_found_with_subrules: IndexSet<Rule>,
pub smr_must_not_be_found_without_subrules: IndexSet<Rule>,
/// `RegexSet` Match multiple, possibly overlapping, regexes in a single search.
pub regex_set: RegexSetContainer,
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,218 +11,11 @@ where
{
let mut temp_stack: VecDeque<(&R::RuleType, CaptureData<C>)> = VecDeque::new();
if let Some(mut frame) = stack.pop_front() {
trace!(
"deleted rule from unique stack: ({}, {})",
frame.0.get_str().yellow(),
format!("{:#?}", frame.0.get_requirement()).yellow()
);
match NextStep::next_or_finish_or_error(frame.0, &mut frame.1) {
NextStep::Go => {
// ============================= LOG =============================
debug!(
"run subrules from the root rule `({}, {})`",
frame.0.get_str().yellow(),
format!("{:#?}", frame.0.get_requirement()).yellow()
);
// ===============================================================
if let Some(simple_rules) = &frame.0.get_simple_rules() {
// count of how many times one rule has worked for different matches
let mut counter_of_each_rule = HashMap::new();
// which matches have already been processed in the rule
// is necessary so you don't have to go through them again in the second cycle.
let mut selected_text = HashMap::new();
// rules that have passed the selections for all matches
let mut selected_rules = HashSet::new();
/*
The first step is to get a RegexSet for each match, based on it,
we get those rules that will definitely work, then check their modifiers
*/
for data in frame.1.text_for_capture.iter() {
// we get the indexes of the rules that are in the RegexSet
for index in R::get_selected_rules(simple_rules.1, &data) {
let rule_from_regexset = simple_rules.0.get_index(index).unwrap();
// ============================= LOG =============================
debug!(
"found the rule `({}, {})` (root rule `({}, {})`) from the `RegexSet` category\nfor data `{:#?}`",
rule_from_regexset.get_str().yellow(),
format!("{:#?}", rule_from_regexset.get_requirement()).yellow(),
frame.0.get_str().yellow(),
format!("{:#?}", frame.0.get_requirement()).yellow(),
data
);
// ===============================================================
let mut captures = R::find_captures(rule_from_regexset, &data);
if let NextStep::Error(error) =
NextStep::next_or_finish_or_error(rule_from_regexset, &mut captures)
{
// ============================= LOG =============================
error!(
"the rule `({}, {})` (root rule `({}, {})`)\nfailed condition\nfor data `{:#?}`",
rule_from_regexset.get_str().yellow(),
format!("{:#?}", rule_from_regexset.get_requirement()).yellow(),
frame.0.get_str().yellow(),
format!("{:#?}", frame.0.get_requirement()).yellow(),
data
);
// ===============================================================
return NextStep::Error(error);
}
/*
For each rule, let's mark the data that has already been checked,
so that we can exclude it in the second cycle
*/
selected_text
.entry(rule_from_regexset)
.or_insert_with(HashSet::new)
.insert(data);
/*
Since in this mode `rule` * `data`, where each rule should work for every match,
we check how many times one rule from regexset was passed for matches,
if a rule worked for all matches, we write it to an exception.
*/
*counter_of_each_rule.entry(index).or_insert(0) += 1;
if counter_of_each_rule[&index] == frame.1.text_for_capture.len() {
selected_rules.insert(rule_from_regexset);
temp_stack.push_back((rule_from_regexset, captures));
}
}
}
// The second step, in this stage we go through those rules and matches that are not in `RegexSet`.
for data in frame.1.text_for_capture.iter() {
// we go through all the simple rules
for rule in simple_rules.0 {
// So the first condition is that we exclude those rules
// that have already been processed in RegexSet.
// ( excluded only those who, for all the coincidence, have been successful )
if !selected_rules.contains(rule) {
// If this rule worked for several matches,
// but not for all of them, then we get those values that have already been processed
// and exclude them
if let Some(value) = selected_text.get(rule) {
if !value.contains(&data) {
let mut captures = R::find_captures(rule, &data);
if let NextStep::Error(err) = not_in_regexset::<R, C>(
frame.0,
rule,
&data,
&mut captures,
) {
return NextStep::Error(err);
}
temp_stack.push_back((rule, captures));
}
} else {
let mut captures = R::find_captures(rule, &data);
// If there were no successful matches in this rule,
// it means that this is the first time
// this rule has been run for validation
if let NextStep::Error(err) =
not_in_regexset::<R, C>(frame.0, rule, &data, &mut captures)
{
return NextStep::Error(err);
}
temp_stack.push_back((rule, captures));
}
}
}
}
}
// The hird step, bypass the rules with the Lookahead and Lookbehind regex.
if let Some(complex_rules) = frame.0.get_complex_rules() {
for data in frame.1.text_for_capture.iter() {
for cmplx_rule in complex_rules {
let mut captures = R::find_captures(cmplx_rule, &data);
if let NextStep::Error(err) =
NextStep::next_or_finish_or_error(cmplx_rule, &mut captures)
{
// ============================= LOG =============================
error!(
"the rule `({}, {})` (root rule `({}, {})`) \nfailed condition\nfor data `{:#?}`",
cmplx_rule.get_str().yellow(),
format!("{:#?}", cmplx_rule.get_requirement()).yellow(),
frame.0.get_str().yellow(),
format!("{:#?}", frame.0.get_requirement()).yellow(),
data
);
// ===============================================================
return NextStep::Error(err);
}
// ============================= LOG =============================
debug!(
"found the rule `({}, {})` (root rule `({}, {})`) from the `Complex Rule` category\nfor data `{:#?}`",
cmplx_rule.get_str().yellow(),
format!("{:#?}", cmplx_rule.get_requirement()).yellow(),
frame.0.get_str().yellow(),
format!("{:#?}", frame.0.get_requirement()).yellow(),
data);
// ===============================================================
temp_stack.push_back((cmplx_rule, captures));
}
}
}
}
NextStep::Finish => {
// ============================= LOG =============================
debug!(
"the rule `({}, {})` is finished, the result is `Ok`",
frame.0.get_str().yellow(),
format!("{:#?}", frame.0.get_requirement()).yellow(),
);
// ===============================================================
}
NextStep::Error(err) => {
// ================= (LOG) =================
error!(
"the rule `({}, {})` didn't work",
frame.0.get_str().yellow(),
format!("{:#?}", frame.0.get_requirement()).yellow(),
);
// =========================================
return NextStep::Error(err);
}
}


}
stack.extend(temp_stack.drain(..));
// ================= (LOG) =================
info!("for all matches all rules worked successfully");
// =========================================
NextStep::Finish
}

// Function for checking rules not included in `RegexSet`.
fn not_in_regexset<'a, R, C>(
root_rule: &R::RuleType,
rule: &R::RuleType,
data: &C,
captures: &mut CaptureData<'a, C>,
) -> NextStep
where
R: CalculateValueRules<'a, C> + Debug,
C: IntoSpecificCaptureType<'a>,
{
// ============================= LOG =============================
debug!(
"the rule `({}, {})` (root rule `({},{})`) isn't in the `RegexSet` category\nfor data `{:#?}`",
rule.get_str().yellow(),
format!("{:#?}", rule.get_requirement()).yellow(),
root_rule.get_str().yellow(),
format!("{:#?}", root_rule.get_requirement()).yellow(),
data
);
// ===============================================================

if let NextStep::Error(error) = NextStep::next_or_finish_or_error(rule, captures) {
// ============================= LOG =============================
error!(
"the rule `({}, {})` (root rule `({},{})`)\nfailed condition\nfor data `{:#?}`",
rule.get_str().yellow(),
format!("{:#?}", rule.get_requirement()).yellow(),
root_rule.get_str().yellow(),
format!("{:#?}", root_rule.get_requirement()).yellow(),
data
);
return NextStep::Error(error);
// ===============================================================
}
NextStep::Finish
}

Loading

0 comments on commit 5a51a7f

Please sign in to comment.