chore: temporary template for new sorting

m62624 · Aug 31, 2023 · 5a51a7f · 5a51a7f
1 parent ae6561b
commit 5a51a7f
Show file tree

Hide file tree

Showing 12 changed files with 88 additions and 746 deletions.
diff --git a/.github/workflows/ci_cd.yml b/.github/workflows/ci_cd.yml
@@ -142,7 +142,6 @@ jobs:
           docker run ${{ env.IMAGE_DOCKER }}:latest \
           cargo test --manifest-path ./flexible_inspect_py/Cargo.toml
 
-
   packages-js:
     needs: [core-rust-tests, wasm-lib-tests]
     runs-on: ubuntu-latest
@@ -239,7 +238,7 @@ jobs:
           name: javascirpt-npm-${{ github.run_id }}
           path: pkg
       - name: Compress pkg folder (wasm->tgz)
-        run: | 
+        run: |
           tar czf npm-wasm-for-web.tgz -C ./pkg/pkg-web .; \
           tar czf npm-wasm-for-node.tgz -C ./pkg/pkg-nodejs .;
       - name: Extract changelog content

diff --git a/flexible_inspect_rs/src/rules/common_elements/mod.rs b/flexible_inspect_rs/src/rules/common_elements/mod.rs
@@ -8,13 +8,15 @@ pub mod range;
 pub const DEFAULT_CAPTURE: &str = "main_capture";
 // =======================================================
 
-/// The struct for sorting all nested rules
+/// The struct for sorting all nested rulesz
 #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
 pub struct SlisedRules {
     /// The rules are in the `IndexSet` collection to preserve
     /// the order of the rules during index retrieval from the `RegexSet` and to avoid duplicate rules
-    pub simple_rules: IndexSet<Rule>,
-    pub complex_rules: IndexSet<Rule>,
+    pub smr_must_be_found: IndexSet<Rule>,
+    pub smr_must_not_be_found_with_subrules: IndexSet<Rule>,
+    pub smr_must_not_be_found_without_subrules: IndexSet<Rule>,
+    pub cmr: IndexSet<Rule>,
 }
 
 /// A Structure for common `Rule` modifiers

diff --git a/flexible_inspect_rs/src/rules/init.rs b/flexible_inspect_rs/src/rules/init.rs
@@ -16,27 +16,52 @@ impl GeneralModifiers {
 impl SlisedRules {
     /// The method for sorting all nested rules
     pub fn new<T: IntoIterator<Item = Rule>>(all_rules: T) -> SlisedRules {
-        let mut o_simple_rules = IndexSet::new();
-        let mut o_complex_rules = IndexSet::new();
+        // smr - simple rules
+        // cmr - complex rules
+
+        // Based on this, we validate through `RegexSet` items,
+        // if we found less items than there are in the collections, then the validation failed
+        let mut smr_must_be_found = IndexSet::new();
+        // Based on this, we simply check with `RegexSet`.
+        let mut smr_must_not_be_found_with_subrules = IndexSet::new();
+        // Based on this, we validate through `RegexSet` items, if we found even one item from this collection, then the validation failed
+        let mut smr_must_not_be_found_without_subrules = IndexSet::new();
+        let mut cmr = IndexSet::new();
         all_rules
             .into_iter()
             .for_each(|rule| match rule.0.str_with_type {
-                RegexRaw::DefaultRegex(_) => {
-                    o_simple_rules.insert(rule);
-                }
+                RegexRaw::DefaultRegex(_) => match rule.0.general_modifiers.requirement {
+                    MatchRequirement::MustBeFound => {
+                        smr_must_be_found.insert(rule);
+                    }
+                    MatchRequirement::MustNotBeFound => match rule.0.subrules {
+                        Some(subrules) => {
+                            smr_must_not_be_found_with_subrules.insert(rule);
+                        }
+                        None => {
+                            smr_must_not_be_found_without_subrules.insert(rule);
+                        }
+                    },
+                },
                 RegexRaw::FancyRegex(_) => {
-                    o_complex_rules.insert(rule);
+                    cmr.insert(rule);
                 }
             });
+
         SlisedRules {
-            simple_rules: o_simple_rules,
-            complex_rules: o_complex_rules,
+            smr_must_be_found,
+            smr_must_not_be_found_with_subrules,
+            smr_must_not_be_found_without_subrules,
+            cmr,
         }
     }
 
     /// A method for checking if there are any rules
     pub fn is_some(&self) -> bool {
-        !self.simple_rules.is_empty() || !self.complex_rules.is_empty()
+        !self.smr_must_be_found.is_empty()
+            || !self.smr_must_not_be_found_with_subrules.is_empty()
+            || !self.smr_must_not_be_found_without_subrules.is_empty()
+            || !self.cmr.is_empty()
     }
 }
 

diff --git a/flexible_inspect_rs/src/rules/rule_str/another_traits.rs b/flexible_inspect_rs/src/rules/rule_str/another_traits.rs
@@ -62,10 +62,7 @@ mod hash_trait {
 
     impl Hash for Subrules {
         fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
-            self.simple_rules.hash(state);
-            if let Some(value) = &self.simple_rules {
-                value.hash(state);
-            }
+
         }
     }
 }

diff --git a/flexible_inspect_rs/src/rules/rule_str/base.rs b/flexible_inspect_rs/src/rules/rule_str/base.rs
@@ -30,14 +30,7 @@ impl RuleBase for Rule {
         self.0.subrules.as_ref()
     }
 
-    fn get_simple_rules(&self) -> Option<(&IndexSet<Self::RuleType>, &Self::RegexSet)> {
-        if let Some(subrules) = self.get_subrules() {
-            if let Some(simple_rules) = &subrules.simple_rules {
-                return Some((&simple_rules.all_rules, &simple_rules.regex_set.regex_set));
-            }
-        }
-        None
-    }
+
 
     fn get_complex_rules(&self) -> Option<&IndexSet<Self::RuleType>> {
         if let Some(subrules) = self.get_subrules() {

diff --git a/flexible_inspect_rs/src/rules/rule_str/mod.rs b/flexible_inspect_rs/src/rules/rule_str/mod.rs
@@ -73,7 +73,9 @@ pub struct Subrules {
 pub struct SimpleRules {
     /// The rules are in the `IndexSet` collection to preserve
     /// the order of the rules during index retrieval from the `RegexSet` and to avoid duplicate rules
-    pub all_rules: IndexSet<Rule>,
+    pub smr_must_be_found: IndexSet<Rule>,
+    pub smr_must_not_be_found_with_subrules: IndexSet<Rule>,
+    pub smr_must_not_be_found_without_subrules: IndexSet<Rule>,
     /// `RegexSet` Match multiple, possibly overlapping, regexes in a single search.
     pub regex_set: RegexSetContainer,
 }

diff --git a/flexible_inspect_rs/src/rules/runner/context_match/all_rules_for_all_matches.rs b/flexible_inspect_rs/src/rules/runner/context_match/all_rules_for_all_matches.rs
@@ -11,218 +11,11 @@ where
 {
     let mut temp_stack: VecDeque<(&R::RuleType, CaptureData<C>)> = VecDeque::new();
     if let Some(mut frame) = stack.pop_front() {
-        trace!(
-            "deleted rule from unique stack: ({}, {})",
-            frame.0.get_str().yellow(),
-            format!("{:#?}", frame.0.get_requirement()).yellow()
-        );
-        match NextStep::next_or_finish_or_error(frame.0, &mut frame.1) {
-            NextStep::Go => {
-                // ============================= LOG =============================
-                debug!(
-                    "run subrules from the root rule `({}, {})`",
-                    frame.0.get_str().yellow(),
-                    format!("{:#?}", frame.0.get_requirement()).yellow()
-                );
-                // ===============================================================
-                if let Some(simple_rules) = &frame.0.get_simple_rules() {
-                    // count of how many times one rule has worked for different matches
-                    let mut counter_of_each_rule = HashMap::new();
-                    // which matches have already been processed in the rule
-                    // is necessary so you don't have to go through them again in the second cycle.
-                    let mut selected_text = HashMap::new();
-                    // rules that have passed the selections for all matches
-                    let mut selected_rules = HashSet::new();
-                    /*
-                    The first step is to get a RegexSet for each match, based on it,
-                    we get those rules that will definitely work, then check their modifiers
-                     */
-                    for data in frame.1.text_for_capture.iter() {
-                        // we get the indexes of the rules that are in the RegexSet
-                        for index in R::get_selected_rules(simple_rules.1, &data) {
-                            let rule_from_regexset = simple_rules.0.get_index(index).unwrap();
-                            // ============================= LOG =============================
-                            debug!(
-                                "found the rule `({}, {})` (root rule `({}, {})`) from the `RegexSet` category\nfor data `{:#?}`",
-                                rule_from_regexset.get_str().yellow(),
-                                format!("{:#?}", rule_from_regexset.get_requirement()).yellow(),
-                                frame.0.get_str().yellow(),
-                                format!("{:#?}", frame.0.get_requirement()).yellow(),
-                                data
-                            );
-                            // ===============================================================
-                            let mut captures = R::find_captures(rule_from_regexset, &data);
-                            if let NextStep::Error(error) =
-                                NextStep::next_or_finish_or_error(rule_from_regexset, &mut captures)
-                            {
-                                // ============================= LOG =============================
-                                error!(
-                                    "the rule `({}, {})` (root rule `({}, {})`)\nfailed condition\nfor data `{:#?}`",
-                                    rule_from_regexset.get_str().yellow(),
-                                    format!("{:#?}", rule_from_regexset.get_requirement()).yellow(),
-                                    frame.0.get_str().yellow(),
-                                    format!("{:#?}", frame.0.get_requirement()).yellow(),
-                                    data
-                                );
-                                // ===============================================================
-                                return NextStep::Error(error);
-                            }
-                            /*
-                            For each rule, let's mark the data that has already been checked,
-                            so that we can exclude it in the second cycle
-                             */
-                            selected_text
-                                .entry(rule_from_regexset)
-                                .or_insert_with(HashSet::new)
-                                .insert(data);
-                            /*
-                            Since in this mode `rule` * `data`, where each rule should work for every match,
-                            we check how many times one rule from regexset was passed for matches,
-                            if a rule worked for all matches, we write it to an exception.
-                             */
-                            *counter_of_each_rule.entry(index).or_insert(0) += 1;
-                            if counter_of_each_rule[&index] == frame.1.text_for_capture.len() {
-                                selected_rules.insert(rule_from_regexset);
-                                temp_stack.push_back((rule_from_regexset, captures));
-                            }
-                        }
-                    }
-                    // The second step, in this stage we go through those rules and matches that are not in `RegexSet`.
-                    for data in frame.1.text_for_capture.iter() {
-                        // we go through all the simple rules
-                        for rule in simple_rules.0 {
-                            // So the first condition is that we exclude those rules
-                            // that have already been processed in RegexSet.
-                            // ( excluded only those who, for all the coincidence, have been successful )
-                            if !selected_rules.contains(rule) {
-                                // If this rule worked for several matches,
-                                // but not for all of them, then we get those values that have already been processed
-                                // and exclude them
-                                if let Some(value) = selected_text.get(rule) {
-                                    if !value.contains(&data) {
-                                        let mut captures = R::find_captures(rule, &data);
-                                        if let NextStep::Error(err) = not_in_regexset::<R, C>(
-                                            frame.0,
-                                            rule,
-                                            &data,
-                                            &mut captures,
-                                        ) {
-                                            return NextStep::Error(err);
-                                        }
-                                        temp_stack.push_back((rule, captures));
-                                    }
-                                } else {
-                                    let mut captures = R::find_captures(rule, &data);
-                                    // If there were no successful matches in this rule,
-                                    // it means that this is the first time
-                                    // this rule has been run for validation
-                                    if let NextStep::Error(err) =
-                                        not_in_regexset::<R, C>(frame.0, rule, &data, &mut captures)
-                                    {
-                                        return NextStep::Error(err);
-                                    }
-                                    temp_stack.push_back((rule, captures));
-                                }
-                            }
-                        }
-                    }
-                }
-                // The hird step, bypass the rules with the Lookahead and Lookbehind regex.
-                if let Some(complex_rules) = frame.0.get_complex_rules() {
-                    for data in frame.1.text_for_capture.iter() {
-                        for cmplx_rule in complex_rules {
-                            let mut captures = R::find_captures(cmplx_rule, &data);
-                            if let NextStep::Error(err) =
-                                NextStep::next_or_finish_or_error(cmplx_rule, &mut captures)
-                            {
-                                // ============================= LOG =============================
-                                error!(
-                                    "the rule `({}, {})` (root rule `({}, {})`) \nfailed condition\nfor data `{:#?}`",
-                                    cmplx_rule.get_str().yellow(),
-                                    format!("{:#?}", cmplx_rule.get_requirement()).yellow(),
-                                    frame.0.get_str().yellow(),
-                                    format!("{:#?}", frame.0.get_requirement()).yellow(),
-                                    data
-                                );
-                                // ===============================================================
-                                return NextStep::Error(err);
-                            }
-                            // ============================= LOG =============================
-                            debug!(
-                                "found the rule `({}, {})` (root rule `({}, {})`) from the `Complex Rule` category\nfor data `{:#?}`",
-                                cmplx_rule.get_str().yellow(),
-                                format!("{:#?}", cmplx_rule.get_requirement()).yellow(),
-                                frame.0.get_str().yellow(),
-                                format!("{:#?}", frame.0.get_requirement()).yellow(),
-                                data);
-                            // ===============================================================
-                            temp_stack.push_back((cmplx_rule, captures));
-                        }
-                    }
-                }
-            }
-            NextStep::Finish => {
-                // ============================= LOG =============================
-                debug!(
-                    "the rule `({}, {})` is finished, the result is `Ok`",
-                    frame.0.get_str().yellow(),
-                    format!("{:#?}", frame.0.get_requirement()).yellow(),
-                );
-                // ===============================================================
-            }
-            NextStep::Error(err) => {
-                // ================= (LOG) =================
-                error!(
-                    "the rule `({}, {})` didn't work",
-                    frame.0.get_str().yellow(),
-                    format!("{:#?}", frame.0.get_requirement()).yellow(),
-                );
-                // =========================================
-                return NextStep::Error(err);
-            }
-        }
+
+
     }
-    stack.extend(temp_stack.drain(..));
-    // ================= (LOG) =================
-    info!("for all matches all rules worked successfully");
-    // =========================================
     NextStep::Finish
 }
 
-// Function for checking rules not included in `RegexSet`.
-fn not_in_regexset<'a, R, C>(
-    root_rule: &R::RuleType,
-    rule: &R::RuleType,
-    data: &C,
-    captures: &mut CaptureData<'a, C>,
-) -> NextStep
-where
-    R: CalculateValueRules<'a, C> + Debug,
-    C: IntoSpecificCaptureType<'a>,
-{
-    // ============================= LOG =============================
-    debug!(
-        "the rule `({}, {})` (root rule `({},{})`) isn't in the `RegexSet` category\nfor data `{:#?}`",
-        rule.get_str().yellow(),
-        format!("{:#?}", rule.get_requirement()).yellow(),
-        root_rule.get_str().yellow(),
-        format!("{:#?}", root_rule.get_requirement()).yellow(),
-        data
-    );
-    // ===============================================================
 
-    if let NextStep::Error(error) = NextStep::next_or_finish_or_error(rule, captures) {
-        // ============================= LOG =============================
-        error!(
-            "the rule `({}, {})` (root rule `({},{})`)\nfailed condition\nfor data `{:#?}`",
-            rule.get_str().yellow(),
-            format!("{:#?}", rule.get_requirement()).yellow(),
-            root_rule.get_str().yellow(),
-            format!("{:#?}", root_rule.get_requirement()).yellow(),
-            data
-        );
-        return NextStep::Error(error);
-        // ===============================================================
-    }
-    NextStep::Finish
-}
+