In [None]:
HEDGES = {
    'epistemic_verbs': r'\b(think|believe|suppose|assume|suggest|seem|appear|tend|guess|reckon|figure|imagine|suspect)\b',
    'epistemic_adverbs': r'\b(maybe|perhaps|possibly|probably|presumably|apparently|seemingly|arguably)\b',
    'approximators': r'\b(sort of|kind of|sorta|kinda|like|somewhat|quite|rather|fairly|pretty)\b',
    'shields': r'\b(according to|as far as|to my knowledge|if I recall|if I remember correctly)\b',
    'downgraders': r'\b(just|only|merely|simply|basically|actually|really)\b',
    'memory_hedges': r'\b(to the best of my (?:knowledge|recollection)|as I (?:recall|remember)|if (?:my )?memory serves)\b',
    'perception_hedges': r'\b(it (?:seemed|looked|appeared|sounded) (?:like|as (?:if|though))|from what I could (?:see|tell|hear))\b',
    'estimation': r'\b(approximately|roughly|about|around|or so|give or take|more or less|ish)\b',
    'plausibility_shields': r'\b(I (?:would|wouldn\'t) say|I\'d (?:imagine|think|guess)|it\'s (?:possible|plausible) that)\b',
    'evidential_hedges': r'\b(as far as I (?:know|can tell)|from what I (?:understand|gathered|heard))\b',
    'conditional_knowledge': r'\b(if I\'m not mistaken|unless I\'m (?:wrong|mistaken)|correct me if I\'m wrong)\b',
    'scope_limiters': r'\b(in (?:some|many|most|certain) (?:cases|ways|respects)|to (?:some|a certain) (?:extent|degree))\b',
    'attribution_hedges': r'\b(I was told|someone (?:said|mentioned)|supposedly|allegedly|reportedly)\b',
    'numerical_hedges': r'\b(?:a (?:few|couple|handful|bunch)|several|various|numerous|many)\b',

    # Double/compound hedging
    'compound_hedges': r'\b(I think (?:maybe|perhaps)|possibly (?:around|about)|might (?:possibly|perhaps))\b',

    # Metacognitive hedges
    'metacognitive': r'\b(I\'m not (?:sure|certain|positive)|I (?:don\'t|can\'t) (?:know|remember|recall) (?:for sure|exactly))\b'
}

#################################################################################

MODALS = {
    # Basic epistemic (possibility/probability)
    'epistemic_possibility': r'\b(might|may|could)\b(?!\s+(?:I|you|we|they|he|she|it)\s+(?:ask|tell|go|do|have\s+to))',
    'epistemic_probability': r'\b(would|should|must)\b(?=\s+(?:be|have\s+been|probably))',

    # Deontic (obligation/permission) - less relevant for uncertainty but important to distinguish
    'deontic_obligation': r'\b(must|should|ought to|have to|need to|supposed to|required to)\b',
    'deontic_permission': r'\b(may|can|could|allowed to|permitted to)\b(?=\s+(?:go|leave|enter|ask))',

    # Dynamic (ability/willingness)
    'dynamic_ability': r'\b(can|could|(?:be\s+)?able to|capable of)\b',
    'dynamic_volition': r'\b(will|would|shall|won\'t|wouldn\'t)\b',

    # Past modals
    'past_possibility': r'\b(might have|may have|could have)\b(?:\s+been)?',
    'past_probability': r'\b(would have|should have|must have)\b(?:\s+been)?',
    'past_ability': r'\b(could have|couldn\'t have)\b(?:\s+been)?',

    # Tentative/conditional modals
    'conditional': r'\b(would|could|might|should)\b(?=\s+if)|(?:if\s+.{1,30})\b(would|could|might)\b',
    'tentative_future': r'\b(might|may|could)\b(?=\s+(?:be|go|come|happen|occur))',

    # Modal adverbs
    'modal_adverbs': r'\b(possibly|probably|certainly|definitely|surely|likely|unlikely)\b',

    # Semi-modals
    'semi_modals': r'\b((?:be\s+)?going to|gonna|gotta|hafta|wanna|got to|have got to|had better|better)\b',

    # Negative modals (express impossibility/inability)
    'negative_modals': r'\b(can\'t|cannot|couldn\'t|wouldn\'t|shouldn\'t|mustn\'t|won\'t|may not|might not)\b',

    # Modal clusters (double modality)
    'double_modals': r'\b(might could|might would|might should|may could|must could)\b',
    'modal_hedge_combo': r'\b((?:I think|maybe|perhaps)\s+(?:I\s+)?(?:might|could|would|should))\b',

    'modal_with_perception': r'\b(might|could|may)\s+have\s+(?:seen|heard|noticed|observed)\b',
    'modal_with_memory': r'\b(might|could|may)\s+(?:remember|recall|forget)\b',
    'modal_with_time': r'\b(would|could|might)\s+have\s+been\s+(?:around|about|approximately)\b',

    # Inferential modals (drawing conclusions)
    'inferential': r'\b(must|should|would)\s+(?:mean|suggest|indicate|imply)\b',

    # Counterfactual modals (alternative scenarios)
    'counterfactual': r'\bif\s+.{1,20}\s+(?:had|hadn\'t).{1,20}(?:would|could|might)\s+have\b'

}

#################################################################################

DISFLUENCIES = {
            # Standard filled pauses
          'filled_pauses': r'\b(u+h+|u+m+|e+r+|a+h+|hmm+|mm+)\b',

          # L2-specific filled pauses
          'l2_filled_pauses': r'\b(eh+|ah+|euh+|eto|ano|emmm)\b',

          # Word-search indicators
          'word_search': r'\b(how (?:do you|to) say|what\'s the word|how you call it|you know what I mean)\b',

          # Explicit retrieval markers
          'lexical_retrieval': r'\b(I mean|I want to say|trying to say|what I mean is|I don\'t know the word)\b',


          # Grammatical repair sequences (L2 speakers often self-correct grammar)
          'grammar_repairs': r'\b(\w+)\s+(?:no|sorry|I mean)\s+\w+\b',  # "is... no, was"
          'article_repairs': r'\b(a|an|the)\s+\.{2,3}\s+(?:a|an|the)?\b',  # "a... the"

          # False starts with reformulation
          'false_starts': r'\b(?:I|we|he|she|they)\s+\w+\s*[-–—]\s*(?:I mean|sorry|no)',

          # Repetition for processing time (more extensive in L2)
          'processing_repetition': r'\b(\w+\s+\w+)\s+\1\b',  # phrase repetition
          'stalling_repetition': r'\b((?:yes|no|okay|right|so)\s+){2,}\b',  # "yes yes yes"

          # Circumlocution markers (explaining around missing vocabulary)
          'circumlocution': r'\b(it\'s (?:like|kind of)|something (?:like|that)|the thing (?:that|which))\b',

          # Confirmation checks (L2 speakers often verify understanding)
          'confirmation_seeking': r'\b((?:you know|right|yes|no)\?|is that correct\?|you understand\?)\b',

          # Lengthening (written representation of sound stretching)
          'sound_lengthening': r'\b\w*([aeiou])\1{2,}\w*\b',  # "sooo", "welll"

          # Incomplete utterances (more frequent in L2)
          'trailing_off': r'\.{3}(?:\s|$)|—$|–$',

}

#################################################################################


CONDITIONALS = {
        # Basic if-then structures
        'if_then_explicit': r'\bif\b[^.!?]{1,50}\bthen\b',
        'if_comma': r'\bif\b[^,]{5,50},(?!\s*(?:and|but|or))',  # "if X, Y" structure

        # Unless/except conditionals
        'unless_clause': r'\bunless\b[^.!?]{5,}',
        'except_clause': r'\bexcept (?:if|when|where|that)\b',

        # Provided/providing/given that
        'provision_conditional': r'\b(?:provided|providing|given) (?:that|if)\b',

        # Supposing/assuming hypotheticals
        'hypothetical': r'\b(?:suppose|supposing|assuming|say) (?:that|we|you|I|he|she|they)\b',

        # When/whenever/where temporospatial conditionals
        'temporal_conditional': r'\b(?:when|whenever|where|wherever)\b[^.!?]{5,}',

        # As long as/so long as
        'duration_conditional': r'\b(?:as|so) long as\b',

        # In case/in the event
        'eventuality': r'\bin (?:case|the event) (?:of|that)\b',

        # Whether constructions
        'whether_conditional': r'\bwhether (?:or not )?.{5,}',

        # Conditional perfect (would have/could have scenarios)
        'conditional_perfect': r'\b(?:would|could|might|should) have (?:been|done|gone|seen|heard)\b',

        # But for constructions (legal counterfactuals)
        'but_for': r'\bbut for\b[^.!?]{5,}',

        # Only if/even if qualifiers
        'qualified_conditional': r'\b(?:only|even) if\b',

        # Implicit conditionals (no explicit "if")
        'implicit_conditional': r'\b(?:had (?:I|we|he|she|they|you)|were (?:I|we|he|she|they|you) to)\b',

        # Otherwise/else alternatives
        'alternative': r'\b(?:otherwise|or else|alternatively)\b',

        # Contingency markers
        'contingent': r'\b(?:depend(?:s|ing)? on|contingent (?:on|upon)|subject to)\b',

        # Comparative conditionals (as if/as though)
        'comparative': r'\bas (?:if|though)\b[^.!?]{5,}'
    }
#################################################################################
TEMPORALS = {
        # Sequence markers
        'sequence': r'\b(before|after|during|while|when|as|until|since|following|prior to)\b',
        'order_markers': r'\b(first|then|next|subsequently|afterwards|finally|lastly|meanwhile)\b',

        # Duration markers
        'duration': r'\bfor (?:about|around|approximately)?\s*(?:a few|several|\d+)\s*(?:seconds?|minutes?|hours?|days?|weeks?|months?|years?)\b',
        'time_span': r'\b(?:from|between)\s+.{1,20}\s+(?:to|until|and|through)\s+.{1,20}',

        # Approximate time (crucial for testimony)
        'time_approximation': r'\b(?:around|about|approximately|roughly|nearly|almost|close to)\s+(?:\d{1,2}(?::\d{2})?|noon|midnight|morning|afternoon|evening)\b',
        'vague_time': r'\b(?:early|late)\s+(?:morning|afternoon|evening|night|January|February|March|April|May|June|July|August|September|October|November|December)\b',

        # Relative time references
        'relative_past': r'\b(?:earlier|previously|before that|prior to that|the day before|last (?:week|month|year))\b',
        'relative_future': r'\b(?:later|afterwards|after that|the next day|following (?:week|month|year))\b',
        'relative_same': r'\b(?:at the same time|simultaneously|meanwhile|during this time|that day)\b',

        # Frequency markers
        'frequency': r'\b(?:always|usually|often|sometimes|occasionally|rarely|never|every|daily|weekly|monthly)\b',
        'iteration': r'\b(?:again|once more|repeatedly|multiple times|several times|twice|three times)\b',

        # Specific time indicators
        'clock_time': r'\b(?:at )?\d{1,2}(?::\d{2})?\s*(?:a\.?m\.?|p\.?m\.?|o\'clock)\b',
        'date_reference': r'\b(?:on |)(?:Monday|Tuesday|Wednesday|Thursday|Friday|Saturday|Sunday|January|February|March|April|May|June|July|August|September|October|November|December)\b',
        'seasonal': r'\b(?:spring|summer|fall|autumn|winter)\s*(?:of |)\d{4}\b',

        # Boundary markers
        'temporal_boundary': r'\b(?:by the time|up until|as of|starting from|beginning|ending)\b',

        # Aspectual markers (ongoing vs completed)
        'ongoing': r'\b(?:was|were) (?:still|already|just)\b',
        'completion': r'\b(?:had already|had just|finished|completed|done)\b',

        # Temporal hedging (uncertainty about time)
        'temporal_hedge': r'\b(?:sometime|somewhere around|I think it was|maybe around|possibly)\s+(?:in |on |at |during |)\b',
        'memory_temporal': r'\b(?:I remember it being|if I recall correctly, it was|as I remember)\b'
    }

#################################################################################

CERTAINTY_MARKERS = {
        # High certainty expressions
        'absolute_certainty': r'\b(definitely|certainly|absolutely|clearly|obviously|undoubtedly|surely|for sure|without a doubt)\b',
        'strong_certainty': r'\b(I\'m (?:positive|certain|sure)|I know for a fact|I clearly remember|I distinctly recall)\b',

        # Low certainty expressions
        'uncertainty_explicit': r'\b(not (?:sure|certain|positive)|uncertain|unsure|doubtful)\b',
        'uncertainty_implicit': r'\b(I (?:don\'t|can\'t) (?:know|remember|recall|say)|hard to say|difficult to tell)\b',

        # Memory qualifiers (crucial for testimony)
        'memory_confidence': r'\b(I (?:clearly|vividly|distinctly) (?:remember|recall))\b',
        'memory_doubt': r'\b(I (?:don\'t|can\'t|barely) (?:remember|recall)|my memory is (?:fuzzy|hazy|unclear|vague))\b',
        'memory_partial': r'\b(I (?:vaguely|partly|somewhat) (?:remember|recall)|rings a bell)\b',

        # Perceptual certainty
        'perceptual_confidence': r'\b(I (?:clearly|definitely) (?:saw|heard|noticed|observed))\b',
        'perceptual_doubt': r'\b(I (?:think I|might have|possibly) (?:saw|heard|noticed)|couldn\'t (?:see|hear|tell) clearly)\b',

        # Inferential markers (conclusions vs observations)
        'inference': r'\b(I (?:assume|presume|suppose|infer|deduce|conclude)|it seems that|appears that)\b',
        'speculation': r'\b(I (?:would guess|imagine|suspect|speculate)|my guess is|if I had to guess)\b',

        # Source certainty (hearsay vs direct knowledge)
        'direct_knowledge': r'\b(I (?:personally|myself) (?:saw|heard|witnessed)|first-hand|with my own eyes)\b',
        'hearsay': r'\b(I (?:was told|heard|understood) that|someone (?:said|told me)|apparently|supposedly|allegedly)\b',

        # Commitment markers
        'hedge_commitment': r'\b((?:as far as|to the best of) (?:I know|my knowledge)|if I\'m not mistaken)\b',
        'qualified_assertion': r'\b(I\'m (?:fairly|pretty|reasonably) (?:sure|certain|confident))\b',

        # Emphatic certainty (sometimes overcompensation)
        'emphatic': r'\b(I\'m (?:100|one hundred) percent (?:sure|certain)|absolutely (?:positive|certain)|no doubt whatsoever)\b',

        # Gradable certainty
        'percentage_certainty': r'\b\d{1,3}\s*(?:%|percent)\s+(?:sure|certain|confident)\b',
        'scalar_certainty': r'\b(very|quite|somewhat|slightly|barely)\s+(?:sure|certain|confident)\b',

        #witness-specific certainty patterns
        'refreshed_recollection': r'\b(now that (?:I think about it|you mention it)|that reminds me|oh (?:yes|right))\b',
        'certainty_change': r'\b((?:wait|actually),?\s+I\'m (?:not |more |less )(?:sure|certain))\b',
        'conditional_certainty': r'\bif .{1,30}, (?:then |)I\'m (?:sure|certain|confident)\b',

        # Double-checking patterns (witness self-monitoring)
        'self_questioning': r'\b(?:am I |was it |is that |do I )(?:sure|right|correct)\?\b',
        'confirmation_seeking': r'\b(?:right|correct|yes)\?\s*$',

        # Certainty about absence (important for alibi/denial)
        'negative_certainty': r'\b(I\'m (?:certain|sure) (?:it wasn\'t|I didn\'t|that didn\'t))\b',
        'absence_uncertainty': r'\b(I don\'t think (?:I|it|they)|not that I (?:know|remember|recall))\b'

        }
