From d7d8de2c4d9678f41045d714c9aef1acf195134c Mon Sep 17 00:00:00 2001 From: Aiden Mitchell Date: Wed, 9 Jul 2025 09:52:19 -0700 Subject: [PATCH 1/4] Update link_credential_phishing_intent_and_other_indicators.yml --- ...dential_phishing_intent_and_other_indicators.yml | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/detection-rules/link_credential_phishing_intent_and_other_indicators.yml b/detection-rules/link_credential_phishing_intent_and_other_indicators.yml index 1be6230b9a5..fe18fc12589 100644 --- a/detection-rules/link_credential_phishing_intent_and_other_indicators.yml +++ b/detection-rules/link_credential_phishing_intent_and_other_indicators.yml @@ -407,19 +407,6 @@ source: | ) ) ) - ), - // common greetings via email.local_part - any(recipients.to, - length(.email.local_part) > 2 - and - // use count to ensure the email address is not part of a disclaimer - strings.icount(body.current_thread.text, .email.local_part) > - // sum allows us to add more logic as needed - strings.icount(body.current_thread.text, - strings.concat('was sent to ', .email.email) - ) + strings.icount(body.current_thread.text, - strings.concat('intended for ', .email.email) - ) ) ) or ( From f90bbf0cd5ddae8d0ac65bdf6a2b7f4ee9d9abd8 Mon Sep 17 00:00:00 2001 From: Aiden Mitchell Date: Wed, 9 Jul 2025 10:03:26 -0700 Subject: [PATCH 2/4] Update link_credential_phishing_intent_and_other_indicators.yml --- ...l_phishing_intent_and_other_indicators.yml | 20 ++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/detection-rules/link_credential_phishing_intent_and_other_indicators.yml b/detection-rules/link_credential_phishing_intent_and_other_indicators.yml index fe18fc12589..307372fd6ee 100644 --- a/detection-rules/link_credential_phishing_intent_and_other_indicators.yml +++ b/detection-rules/link_credential_phishing_intent_and_other_indicators.yml @@ -296,13 +296,6 @@ source: | ) and ( 4 of ( - any(recipients.to, - .email.domain.valid - and ( - strings.icontains(body.current_thread.text, .email.email) - or strings.icontains(body.current_thread.text, .email.local_part) - ) - ), any(ml.nlu_classifier(body.current_thread.text).intents, .name == "cred_theft" and .confidence in ("medium", "high") ), @@ -407,6 +400,19 @@ source: | ) ) ) + ), + // common greetings via email.local_part + any(recipients.to, + length(.email.local_part) > 2 + and + // use count to ensure the email address is not part of a disclaimer + strings.icount(body.current_thread.text, .email.local_part) > + // sum allows us to add more logic as needed + strings.icount(body.current_thread.text, + strings.concat('was sent to ', .email.email) + ) + strings.icount(body.current_thread.text, + strings.concat('intended for ', .email.email) + ) ) ) or ( From 86086eaf99a52d5679dfa4317c5f28de9f742729 Mon Sep 17 00:00:00 2001 From: Aiden Mitchell Date: Wed, 9 Jul 2025 11:54:59 -0700 Subject: [PATCH 3/4] Update link_credential_phishing_intent_and_other_indicators.yml --- ...l_phishing_intent_and_other_indicators.yml | 531 +----------------- 1 file changed, 1 insertion(+), 530 deletions(-) diff --git a/detection-rules/link_credential_phishing_intent_and_other_indicators.yml b/detection-rules/link_credential_phishing_intent_and_other_indicators.yml index 307372fd6ee..e86164b9aae 100644 --- a/detection-rules/link_credential_phishing_intent_and_other_indicators.yml +++ b/detection-rules/link_credential_phishing_intent_and_other_indicators.yml @@ -4,536 +4,7 @@ description: | type: "rule" severity: "medium" source: | - type.inbound - and ( - regex.icontains(subject.subject, - "termination.*notice", - "38417", - ":completed", - "[il1]{2}mit.*ma[il1]{2} ?bo?x", - "[il][il][il]egai[ -]", - "[li][li][li]ega[li] attempt", - "[ng]-?[io]n .*block", - "[ng]-?[io]n .*cancel", - "[ng]-?[io]n .*deactiv", - "[ng]-?[io]n .*disabl", - "action.*required", - "abandon.*package", - "about.your.account", - "acc(ou)?n?t (is )?on ho[li]d", - "acc(ou)?n?t.*terminat", - "acc(oun)?t.*[il1]{2}mitation", - "access.*limitation", - "account (will be )?block", - "account.*de-?activat", - "account.*locked", - "account.*re-verification", - "account.*security", - "account.*suspension", - "account.has.expired", - "account.will.be.blocked", - "account v[il]o[li]at", - "activity.*acc(oun)?t", - "almost.full", - "app[li]e.[il]d", - "authenticate.*account", - "been.*suspend", - "crediential.*notif", - "clos.*of.*account.*processed", - "confirm.your.account", - "courier.*able", - "crediential.*notif", - "deactivation.*in.*progress", - "delivery.*attempt.*failed", - "disconnection.*notice", - "document.received", - "documented.*shared.*with.*you", - "dropbox.*document", - "e-?ma[il1]+ .{010}suspen", - "e-?ma[il1]{1} user", - "e-?ma[il1]{2} acc", - "e-?ma[il1]{2} preview", - "e-?ma[il1]{2}.*up.?grade", - "e.?ma[il1]{2}.*server", - "e.?ma[il1]{2}.*suspend", - "email.update", - "faxed you", - "fraud(ulent)?.*charge", - "from.helpdesk", - "fu[il1]{2}.*ma[il1]+[ -]?box", - "has.been.*suspended", - "has.been.limited", - "have.locked", - "he[li]p ?desk upgrade", - "heipdesk", - "i[il]iega[il]", - "ii[il]ega[il]", - "incoming e?mail", - "incoming.*fax", - "lock.*security", - "ma[il1]{1}[ -]?box.*quo", - "ma[il1]{2}[ -]?box.*fu[il1]", - "ma[il1]{2}box.*[il1]{2}mit", - "ma[il1]{2}box stor", - "mail on.?hold", - "mail.*box.*migration", - "mail.*de-?activat", - "mail.update.required", - "mails.*pending", - "messages.*pending", - "missed.*shipping.*notification", - "missed.shipment.notification", - "must.update.your.account", - "new [sl][io]g?[nig][ -]?in from", - "new voice ?-?mail", - "notifications.*pending", - "office.*3.*6.*5.*suspend", - "office365", - "on google docs with you", - "online doc", - "password.*compromised", - "(?:payroll|salary|bonus).*Distribution", - "periodic maintenance", - "potential(ly)? unauthorized", - "refund not approved", - "report", - "revised.*policy", - "scam", - "scanned.?invoice", - "secured?.update", - "security breach", - "securlty", - "signed.*delivery", - "status of your .{314}? ?delivery", - "susp[il1]+c[il1]+ous.*act[il1]+v[il1]+ty", - "suspicious.*sign.*[io]n", - "suspicious.activit", - "temporar(il)?y deactivate", - "temporar[il1]{2}y disab[li]ed", - "temporarily.*lock", - "un-?usua[li].activity", - "unable.*deliver", - "unauthorized.*activit", - "unauthorized.device", - "undelivered message", - "unread.*doc", - "unusual.activity", - "(?:unrecognized|Unusual|suspicious|unknown) (?:log|sign).?[io]n attempt", - "upgrade.*account", - "upgrade.notice", - "urgent message", - "urgent.verification", - "v[il1]o[li1]at[il1]on security", - "va[il1]{1}date.*ma[il1]{2}[ -]?box", - "verification ?-?require", - "verification( )?-?need", - "verify.your?.account", - "web ?-?ma[il1]{2}", - "web[ -]?ma[il1]{2}", - "will.be.suspended", - "your (customer )?account .as", - "your.office.365", - "your.online.access", - "de.activation", - // https://github.com/sublime-security/static-files/blob/master/suspicious_subjects.txt - "account has been limited", - "action required", - "almost full", - "apd notifi cation", - "are you at your desk", - "are you available", - "attached file to docusign", - "banking is temporarily unavailable", - "bankofamerica", - "closing statement invoice", - "completed: docusign", - "de-activation of", - "delivery attempt", - "delivery stopped for shipment", - "detected suspicious", - "detected suspicious actvity", - "docu sign", - "document for you", - "document has been sent to you via docusign", - "document is ready for signature", - "docusign", - "encrypted message", - "failed delivery", - "fedex tracking", - "file was shared", - "freefax", - "fwd: due invoice paid", - "has shared", - "inbox is full", - "invitation to comment", - "invitation to edit", - "invoice due", - "left you a message", - "message from", - "new message", - "new voicemail", - "on desk", - "out of space", - "password reset", - "payment status", - "pay notification", - "quick reply", - "re: w-2", - "required", - "required: completed docusign", - "remittance", - "ringcentral", - "scanned image", - "secured files", - "secured pdf", - "security alert", - "new sign-in", - "new sign in", - "sign-in attempt", - "sign in attempt", - "staff review", - "suspicious activity", - "unrecognized login attempt", - "unusual signin", - "upgrade immediately", - "urgent", - "wants to share", - "w2", - "you have notifications pending", - "your account", - "your amazon order", - "your document settlement", - "your order with amazon", - "your password has been compromised", - ) - or ( - regex.icontains(subject.subject, 'account.has.been') - and not regex.icontains(subject.subject, 'account.has.been.*created') - ) - or ( - regex.icontains(sender.display_name, - "Admin", - "Administrator", - "Alert", - "Assistant", - "Authenticat(or|ion)", - "Billing", - "Benefits", - "Bonus", - "CEO", - "CFO", - "CIO", - "CTO", - "Chairman", - "Claim", - "Confirm", - "Cpanel Mail", - "Critical", - "Customer Service", - "Deal", - "Discount", - "Director", - "Exclusive", - "Executive", - "Fax", - "Free", - "Gift", - '\bHR\b', - "Helpdesk", - "Human Resources", - "Immediate", - "Important", - "Info", - "Information", - "Invoice", - '\bIT\b', - '\bLegal\b', - "Lottery", - "Management", - "Manager", - "Member Services", - "Notification", - "Offer", - "Official Communication", - "Operations", - "Order", - "Partner", - "Payment", - "Payroll", - "Postmaster", - "President", - "Premium", - "Prize", - "Receipt", - "Refund", - "Registrar", - "Required", - "Reward", - "Sales", - "Secretary", - "Security", - "Server", - "Service", - "Storage", - "Support", - "Sweepstakes", - "System", - "Tax", - "Tech Support", - "Update", - "Upgrade", - "Urgent", - "Validate", - "Verify", - "VIP", - "Webmaster", - "Winner", - ) - // add negation for common FPs in the sender display_name - and not strings.icontains(sender.display_name, "service bulletin") - and not strings.icontains(sender.display_name, "automotive service") - ) - ) - and ( - 4 of ( - any(ml.nlu_classifier(body.current_thread.text).intents, - .name == "cred_theft" and .confidence in ("medium", "high") - ), - any(ml.nlu_classifier(body.current_thread.text).entities, - .name == "request" - ), - // recipient email address base64 encoded in link - any(body.links, - any(recipients.to, - any(beta.scan_base64(..href_url.url, - ignore_padding=true, - format="url" - ), - strings.icontains(., ..email.email) - ) - ) - ), - ( - // freemail providers should never be sending this type of email - sender.email.domain.domain in $free_email_providers - - // if not freemail, it's suspicious if the sender's root domain - // doesn't match any links in the body - or all(body.links, - .href_url.domain.root_domain != sender.email.domain.root_domain - and ( - .href_url.domain.root_domain not in $org_domains - // ignore recipient email addresses in the body in relation to this check - or ( - .href_url.domain.root_domain in $org_domains - and any(recipients.to, - strings.icount(body.current_thread.text, .email.email) == strings.icount(body.current_thread.text, - .email.domain.domain - ) - ) - ) - ) - ) - - // bulk mailers should also never be sending this type of email - or all(filter(body.links, - .href_url.domain.domain not in ( - "aka.ms", - "mimecast.com", - "mimecastprotect.com", - "cisco.com" - ) - ), - .href_url.domain.root_domain in $bulk_mailer_url_root_domains - ) - ), - // in case it's embedded in an image attachment - // note: don't use message_screenshot() because it's not limited to current_thread - // and may FP - any(attachments, - .file_type in $file_types_images - and any(file.explode(.), - any(ml.nlu_classifier(.scan.ocr.raw).intents, - .name == "cred_theft" and .confidence == "high" - ) - ) - ), - strings.contains(body.current_thread.text, - "Your mailbox can no longer send or receive messages." - ), - any(body.links, - strings.icontains(.href_url.query_params, 'redirect') - or any(.href_url.rewrite.encoders, - strings.icontains(., "open_redirect") - ) - ), - // multiple entities displaying urgency - length(filter(ml.nlu_classifier(body.current_thread.text).entities, - .name == "urgency" - ) - ) >= 2 - // and any body links - and any(body.links, - // display text contains a request - any(ml.nlu_classifier(.display_text).entities, .name == "request") - ), - any(body.links, - // display text contains a request - ( - any(ml.nlu_classifier(.display_text).entities, .name == "request") - or regex.match(.display_text, '^[^a-z]+$') - ) - and ( - .href_url.domain.domain in $url_shorteners - or .href_url.domain.root_domain in $url_shorteners - or .href_url.domain.domain in $free_file_hosts - or ( - .href_url.domain.root_domain in ( - "mimecast.com", - "mimecastprotect.com" - ) - and any(.href_url.query_params_decoded['domain'], - strings.parse_url(strings.concat("https://", .)).domain.domain in $url_shorteners - or strings.parse_url(strings.concat("https://", .)).domain.root_domain in $url_shorteners - or strings.parse_url(strings.concat("https://", .)).domain.domain in $free_file_hosts - or strings.parse_url(strings.concat("https://", .)).domain.root_domain in $free_subdomain_hosts - ) - ) - ) - ), - // common greetings via email.local_part - any(recipients.to, - length(.email.local_part) > 2 - and - // use count to ensure the email address is not part of a disclaimer - strings.icount(body.current_thread.text, .email.local_part) > - // sum allows us to add more logic as needed - strings.icount(body.current_thread.text, - strings.concat('was sent to ', .email.email) - ) + strings.icount(body.current_thread.text, - strings.concat('intended for ', .email.email) - ) - ) - ) - or ( - ( - // recipient's email address is in the body - any(recipients.to, - // use count to ensure the email address is not part of a disclaimer - strings.icount(body.current_thread.text, .email.email) > - // sum allows us to add more logic as needed - sum([ - strings.icount(body.current_thread.text, - strings.concat('was sent to ', .email.email) - ), - strings.icount(body.current_thread.text, - strings.concat('intended for ', .email.email) - ) - ] - ) - ) - // suspicious display text - or ( - length(body.links) == 1 - and all(body.links, - strings.ilike(.display_text, "*click here*", "*password*") - ) - ) - ) - // link leads to a suspicious TLD or contains an IP address or contains multiple redirects - and any(body.links, - ( - ml.link_analysis(., mode="aggressive").effective_url.domain.tld in $suspicious_tlds - or length(distinct(map(ml.link_analysis(., mode="aggressive").redirect_history, - .domain.root_domain - ) - ) - ) >= 4 - or ( - any(body.ips, - any(body.links, strings.icontains(.href_url.url, ..ip)) - ) - ) - ) - ) - ) - ) - // exclude Google shared calendar messages - // Subject: " has shared a calendar with you" - and headers.return_path.domain.domain != "calendar-server.bounces.google.com" - // negate calendar invites - and not ( - 0 < length(attachments) < 3 - and all(attachments, .content_type in ("text/calendar", "application/ics")) - ) - // negate replies - and ( - ( - ( - length(headers.references) > 0 - or not any(headers.hops, - any(.fields, strings.ilike(.name, "In-Reply-To")) - ) - ) - and not ( - ( - strings.istarts_with(subject.subject, "RE:") - or strings.istarts_with(subject.subject, "R:") - or strings.istarts_with(subject.subject, "ODG:") - or strings.istarts_with(subject.subject, "答复:") - or strings.istarts_with(subject.subject, "AW:") - or strings.istarts_with(subject.subject, "TR:") - or strings.istarts_with(subject.subject, "FWD:") - or regex.icontains(subject.subject, - '^(\[[^\]]+\]\s?){0,3}(re|fwd?)\s?:' - ) - ) - ) - ) - or length(headers.references) == 0 - ) - // bounce-back and DMARC report negations - and not ( - strings.like(sender.email.local_part, - "*postmaster*", - "*mailer-daemon*", - "*administrator*" - ) - and ( - any(attachments, - .content_type in ( - "message/rfc822", - "message/delivery-status", - "text/calendar" - ) - ) - or ( - length(attachments) == 1 - and all(attachments, .content_type in ("application/gzip")) - and regex.icontains(subject.subject, - '(?:(Report\sDomain).*(Submitter).*(Report-ID))' - ) - ) - ) - ) - and ( - ( - profile.by_sender().prevalence != "common" - and not profile.by_sender_email().solicited - ) - or ( - profile.by_sender().any_messages_malicious_or_spam - and not profile.by_sender().any_false_positives - ) - ) - // negate highly trusted sender domains unless they fail DMARC authentication - and ( - ( - sender.email.domain.root_domain in $high_trust_sender_root_domains - and not headers.auth_summary.dmarc.pass - ) - or sender.email.domain.root_domain not in $high_trust_sender_root_domains - ) + false attack_types: - "Credential Phishing" tactics_and_techniques: From dc43877650644186a004d2e75611fa087465d1a6 Mon Sep 17 00:00:00 2001 From: Alex Herold Date: Tue, 28 Oct 2025 07:58:25 -0600 Subject: [PATCH 4/4] Sync .github directory from main branch - Applied .github directory from main to aiden.fp.credphish - Ensures workflows and GitHub configurations are up to date - Automated sync via script --- .github/workflows/clear-old-test-rules.yml | 6 +- .github/workflows/pr-auto-tag.yml | 39 +++++ .github/workflows/rule-validate.yml | 189 ++++++++++++++------- .github/workflows/update-test-rules.yml | 49 +++++- 4 files changed, 209 insertions(+), 74 deletions(-) create mode 100644 .github/workflows/pr-auto-tag.yml diff --git a/.github/workflows/clear-old-test-rules.yml b/.github/workflows/clear-old-test-rules.yml index 2758c19199d..3ee9ce96d45 100644 --- a/.github/workflows/clear-old-test-rules.yml +++ b/.github/workflows/clear-old-test-rules.yml @@ -55,10 +55,10 @@ jobs: echo "" >> message.txt cd destination - files=$(ls **/*.yml) || true + files=$(ls -- **/*.yml) || true for file in $files; do - file_pr_num=$(yq '.testing_pr' $file) + file_pr_num=$(yq '.testing_pr' "$file") in_open_pr=false IFS=',' read -ra PR_ARRAY <<< "$OPEN_PRS" @@ -70,7 +70,7 @@ jobs: echo "$file is in open PR: $in_open_pr. File PR num: $file_pr_num" if [[ "$in_open_pr" = "false" ]]; then - rm $file + rm "$file" echo "Removed $file_pr_num" >> ../message.txt fi done diff --git a/.github/workflows/pr-auto-tag.yml b/.github/workflows/pr-auto-tag.yml new file mode 100644 index 00000000000..6a4976e85b3 --- /dev/null +++ b/.github/workflows/pr-auto-tag.yml @@ -0,0 +1,39 @@ +name: Auto-tag External PRs + +on: + pull_request_target: + types: [opened, ready_for_review] + +jobs: + auto-tag: + runs-on: ubuntu-latest + permissions: + pull-requests: write + steps: + - name: Check if PR author is external + uses: actions/github-script@v7 + with: + script: | + const pr = context.payload.pull_request; + const username = pr.user.login; + const authorAssociation = pr.author_association; + + console.log(`PR author: ${username}`); + console.log(`Author association: ${authorAssociation}`); + + // MEMBER, OWNER, and COLLABORATOR are considered internal + const internalAssociations = ['MEMBER', 'OWNER', 'COLLABORATOR']; + const isInternal = internalAssociations.includes(authorAssociation); + + if (!isInternal) { + console.log('User is external, adding review-needed label'); + await github.rest.issues.addLabels({ + owner: context.repo.owner, + repo: context.repo.repo, + issue_number: pr.number, + labels: ['review-needed'] + }); + console.log('Added review-needed label to external PR'); + } else { + console.log(`User is internal (${authorAssociation}), no label added`); + } \ No newline at end of file diff --git a/.github/workflows/rule-validate.yml b/.github/workflows/rule-validate.yml index a2569e29f61..428e1c62017 100644 --- a/.github/workflows/rule-validate.yml +++ b/.github/workflows/rule-validate.yml @@ -4,7 +4,9 @@ on: push: branches: [ "main", "test-rules" ] pull_request_target: - branches: [ "**" ] + branches: + - "main" + - 'ci-testing**' workflow_dispatch: {} issue_comment: types: [ created ] @@ -38,18 +40,23 @@ jobs: - name: Get Refs id: get_head_ref + env: + GITHUB_EVENT_PULL_REQUEST_HEAD_REPO_FULL_NAME: ${{ github.event.pull_request.head.repo.full_name }} + STEPS_COMMENT_BRANCH_OUTPUTS_HEAD_REF: ${{ steps.comment_branch.outputs.head_ref }} + STEPS_COMMENT_BRANCH_OUTPUTS_HEAD_OWNER: ${{ steps.comment_branch.outputs.head_owner }} + STEPS_COMMENT_BRANCH_OUTPUTS_HEAD_REPO: ${{ steps.comment_branch.outputs.head_repo }} run: | # Accurate for push events, merge queues, and workflow dispatch. - head_ref="${{ github.ref }}" - repo="${{ github.repository }}" + head_ref="${GITHUB_REF}" + repo="${GITHUB_REPOSITORY}" - if [[ "${{ github.event_name }}" == 'pull_request_target' ]]; then - head_ref="${{ github.head_ref }}" - repo="${{ github.event.pull_request.head.repo.full_name }}" - elif [[ "${{ github.event_name }}" == 'issue_comment' ]]; then + if [[ "${GITHUB_EVENT_NAME}" == 'pull_request_target' ]]; then + head_ref="${GITHUB_HEAD_REF}" + repo="${GITHUB_EVENT_PULL_REQUEST_HEAD_REPO_FULL_NAME}" + elif [[ "${GITHUB_EVENT_NAME}" == 'issue_comment' ]]; then # Rely on comment_branch to figure out the head and base - head_ref="${{ steps.comment_branch.outputs.head_ref }}" - repo="${{ steps.comment_branch.outputs.head_owner }}/${{ steps.comment_branch.outputs.head_repo }}" + head_ref="${STEPS_COMMENT_BRANCH_OUTPUTS_HEAD_REF}" + repo="${STEPS_COMMENT_BRANCH_OUTPUTS_HEAD_OWNER}/${STEPS_COMMENT_BRANCH_OUTPUTS_HEAD_REPO}" fi echo "##[set-output name=head_ref;]$head_ref" @@ -63,11 +70,13 @@ jobs: fetch-depth: 0 - name: Validate Branch vs. Trigerring SHA + env: + GITHUB_EVENT_PULL_REQUEST_HEAD_SHA: ${{ github.event.pull_request.head.sha }} run: | # If this is from a pull request validate that what we checked out is the same as the PR head. # If not we'll just fail -- the workflow will be cancelled momentarily. - if [[ "${{ github.event_name }}" == 'pull_request_target' ]]; then - if [[ "${{ github.event.pull_request.head.sha }}" != "$(git rev-parse HEAD)" ]]; then + if [[ "${GITHUB_EVENT_NAME}" == 'pull_request_target' ]]; then + if [[ "${GITHUB_EVENT_PULL_REQUEST_HEAD_SHA}" != "$(git rev-parse HEAD)" ]]; then echo "Workflow is out of date with branch, cancelling" exit 1 fi @@ -75,22 +84,27 @@ jobs: - name: Get Refs id: get_base_ref + env: + STEPS_COMMENT_BRANCH_OUTPUTS_BASE_REF: ${{ steps.comment_branch.outputs.base_ref }} run: | run_all="" base_ref="" - if [[ "${{ github.event_name }}" == 'pull_request_target' ]]; then - # Detect changes based on whatever we're merging into. - base_ref="${{ github.base_ref }}" - elif [[ "${{ github.event_name }}" == 'push' || "${{ github.event_name }}" == 'merge_group' ]]; then + if [[ "${GITHUB_EVENT_NAME}" == 'pull_request_target' ]]; then + # Ensure we have the latest base branch ref for accurate merge-base calculation + git fetch origin "${GITHUB_BASE_REF}:refs/remotes/origin/${GITHUB_BASE_REF}" + # Use the merge base to avoid including changes from target branch + # that happened after this PR branch was created. + base_ref=$(git merge-base HEAD "origin/${GITHUB_BASE_REF}") + elif [[ "${GITHUB_EVENT_NAME}" == 'push' || "${GITHUB_EVENT_NAME}" == 'merge_group' ]]; then # Detect changes based on the previous commit base_ref="$(git rev-parse HEAD^)" - elif [[ "${{ github.event_name }}" == 'workflow_dispatch' ]]; then + elif [[ "${GITHUB_EVENT_NAME}" == 'workflow_dispatch' ]]; then # Run on a target, so run for all rules. run_all="true" - elif [[ "${{ github.event_name }}" == 'issue_comment' ]]; then + elif [[ "${GITHUB_EVENT_NAME}" == 'issue_comment' ]]; then # Rely on comment_branch to figure out base - base_ref="${{ steps.comment_branch.outputs.base_ref }}" + base_ref="${STEPS_COMMENT_BRANCH_OUTPUTS_BASE_REF}" fi echo "##[set-output name=run_all;]$run_all" @@ -100,48 +114,87 @@ jobs: with: python-version: '3.10' + - name: Checkout script from Sublime fork main + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 + with: + repository: sublime-security/sublime-rules + ref: main + path: sublime-rules-main + - name: Add Rule IDs as Needed & Check for Duplicates if: github.event_name != 'issue_comment' # Run before testing, just in case this could invalidate the rule itself run: | - pip install -r scripts/generate-rule-ids/requirements.txt - python scripts/generate-rule-ids/main.py + pip install -r sublime-rules-main/scripts/generate-rule-ids/requirements.txt + python sublime-rules-main/scripts/generate-rule-ids/main.py + + # Delete path to prevent interference with later steps (such as git add and commit) + rm -r sublime-rules-main - name: Validate Rules if: github.event_name != 'issue_comment' run: | - echo '{"rules_or_queries": [' > bulk_validate_request.json - - file_count=$(ls -1 {*-rules/*.yml,insights/**/*.yml} | wc -l) + BATCH_SIZE=100 counter=0 - - for f in *-rules/*.yml - do - counter=$((counter + 1)) - yq -o=json eval 'del(.type)' "$f" >> bulk_validate_request.json - if [[ $counter -ne $file_count ]]; then - echo "," >> bulk_validate_request.json + batch_num=0 + + # Collect all files + all_files=($(ls -1 *-rules/*.yml insights/**/*.yml)) + total_files=${#all_files[@]} + + echo "Total files to validate: $total_files" + + # Start first batch + echo '{"rules_or_queries": [' > bulk_validate_request.json + + for i in "${!all_files[@]}"; do + f="${all_files[$i]}" + + # Determine if this is a rule or insight + if [[ "$f" == insights/* ]]; then + yq -o=json eval 'del(.type) | .source = "length([\n\n" + .source + "\n]) >= 0"' "$f" >> bulk_validate_request.json + else + yq -o=json eval 'del(.type)' "$f" >> bulk_validate_request.json fi - done - for f in insights/**/*.yml - do counter=$((counter + 1)) - yq -o=json eval 'del(.type) | .source = "length([\n\n" + .source + "\n]) >= 0"' "$f" >> bulk_validate_request.json - - if [[ $counter -ne $file_count ]]; then + + # Check if we need to submit this batch + should_submit=false + if [[ $counter -eq $BATCH_SIZE ]]; then + should_submit=true + elif [[ $((i + 1)) -eq $total_files ]]; then + # Last file + should_submit=true + else + # Not submitting yet, add comma echo "," >> bulk_validate_request.json fi + + if [[ "$should_submit" == "true" ]]; then + # Close JSON and submit + echo "]}" >> bulk_validate_request.json + + batch_num=$((batch_num + 1)) + echo "Submitting batch $batch_num with $counter files..." + + http_code=$(curl -H "Content-Type: application/json" -X POST -d @bulk_validate_request.json -o response.txt -w "%{http_code}" --silent https://play.sublime.security/v1/rules/bulk_validate) + echo '' >> response.txt + cat response.txt + if [[ "$http_code" != "200" ]]; then + echo "Unexpected response $http_code for batch $batch_num" + exit 1 + fi + + # Reset for next batch if there are more files + if [[ $((i + 1)) -lt $total_files ]]; then + counter=0 + echo '{"rules_or_queries": [' > bulk_validate_request.json + fi + fi done - echo "]}" >> bulk_validate_request.json - http_code=$(curl -H "Content-Type: application/json" -X POST -d @bulk_validate_request.json -o response.txt -w "%{http_code}" --silent https://play.sublime.security/v1/rules/bulk_validate) - echo '' >> response.txt - cat response.txt - if [[ "$http_code" != "200" ]]; then - echo "Unexpected response $http_code" - exit 1 - fi + echo "All batches submitted successfully!" - name: Verify no .yaml files exist if: github.event_name != 'issue_comment' @@ -157,6 +210,8 @@ jobs: - name: Commit & Push Results, if needed if: github.event_name != 'issue_comment' id: final_basic_validation + env: + STEPS_GET_HEAD_REF_OUTPUTS_HEAD_REF: ${{ steps.get_head_ref.outputs.head_ref }} run: | rm response.txt rm bulk_validate_request.json @@ -168,12 +223,11 @@ jobs: git config user.name 'ID Generator' git config user.email 'hello@sublimesecurity.com' - git add **/*.yml + git add -- **/*.yml git commit -m "Auto add rule ID" # This will only work when running for a pull_request_target, but rather than filter we'll let this expose # any issues. - git push origin ${{ steps.get_head_ref.outputs.head_ref }} - + git push origin "${STEPS_GET_HEAD_REF_OUTPUTS_HEAD_REF}" - name: Get the head SHA id: get_head if: ${{ always() }} @@ -191,6 +245,7 @@ jobs: env: run_url: "${{ format('https://github.com/{0}/actions/runs/{1}', steps.get_head_ref.outputs.repo, github.run_id) }}" conclusion: "${{ steps.final_basic_validation.outcome == 'success' && 'success' || 'failure' }}" + HEAD_SHA: ${{ steps.get_head.outputs.HEAD }} with: debug: ${{ secrets.ACTIONS_STEP_DEBUG || false }} retries: 3 @@ -205,7 +260,7 @@ jobs: await github.rest.checks.create({ owner: context.repo.owner, repo: context.repo.repo, - head_sha: "${{ steps.get_head.outputs.HEAD }}", + head_sha: process.env.HEAD_SHA, name: "Rule Tests and ID Updated", status: "completed", conclusion: process.env.conclusion, @@ -247,12 +302,15 @@ jobs: - name: "Find updated rule IDs" id: find_ids + env: + STEPS_GET_BASE_REF_OUTPUTS_RUN_ALL: ${{ steps.get_base_ref.outputs.run_all }} + STEPS_CHANGED_FILES_OUTPUTS_DELETED_FILES: ${{ steps.changed-files.outputs.deleted_files }} run: | for file in detection-rules/*.yml; do - rule_id=$(yq '.id' $file) + rule_id=$(yq '.id' "$file") - if [[ "${{ steps.get_base_ref.outputs.run_all }}" == "true" ]]; then - altered_rule_ids=$(echo "$rule_id"" ""$altered_rule_ids") + if [[ "${STEPS_GET_BASE_REF_OUTPUTS_RUN_ALL}" == "true" ]]; then + altered_rule_ids="${rule_id} ${altered_rule_ids}" continue fi @@ -262,18 +320,18 @@ jobs: # We only need to care when rule source is changed. This will handle renames, tag changes, etc. if [[ "$new_source" != "$old_source" ]]; then echo "$file ($rule_id) has altered source" - altered_rule_ids=$(echo "$rule_id"" ""$altered_rule_ids") + altered_rule_ids="${rule_id} ${altered_rule_ids}" fi done - for file in ${{ steps.changed-files.outputs.deleted_files }}; do - rule_id=$(yq '.id' $file) + for file in ${STEPS_CHANGED_FILES_OUTPUTS_DELETED_FILES}; do + rule_id=$(yq '.id' "$file") echo "$file ($rule_id) was deleted" - altered_rule_ids=$(echo "$rule_id"" ""$altered_rule_ids") + altered_rule_ids="${rule_id} ${altered_rule_ids}" done echo "Altered Ruled IDs: [$altered_rule_ids]" - echo "##[set-output name=rule_ids;]$(echo $altered_rule_ids)" + echo "##[set-output name=rule_ids;]${altered_rule_ids}" # TODO: This doesn't solve for a modified rule_id. We could merge with any files known on 'main', but changing # a rule ID is a separate problem. @@ -297,6 +355,8 @@ jobs: uses: actions/github-script@v6 id: find_emls_to_skip if: steps.find_pr_number.outputs.result != '' + env: + ISSUE_NUMBER: ${{ steps.find_pr_number.outputs.result }} with: debug: ${{ secrets.ACTIONS_STEP_DEBUG || false }} result-encoding: string @@ -304,7 +364,7 @@ jobs: const opts = github.rest.issues.listComments.endpoint.merge({ owner: context.repo.owner, repo: context.repo.repo, - issue_number: "${{ steps.find_pr_number.outputs.result }}", + issue_number: process.env.ISSUE_NUMBER, }) const comments = await github.paginate(opts) @@ -376,10 +436,14 @@ jobs: only_rule_ids: '${{ steps.find_ids.outputs.rule_ids }}' skip_eml_ids: '${{ steps.find_emls_to_skip.outputs.result }}' run: | - body='{"branch":"'$branch'","repo":"'$repo'","token":"'$token'","sha":"'$sha'","only_rule_ids":"'$only_rule_ids'","skip_eml_ids":"'$skip_eml_ids'"}' - echo $body - - curl -X POST $trigger_url \ + body=$(cat <<__EOF__ + {"branch":"${branch}","repo":"${repo}","token":"${token}","sha":"${sha}","only_rule_ids":"${only_rule_ids}","skip_eml_ids":"${skip_eml_ids}"} + __EOF__ + ) + + echo "$body" + + curl -X POST "$trigger_url" \ -H 'Content-Type: application/json' \ -d "$body" @@ -402,6 +466,7 @@ jobs: id: create_check env: run_url: "${{ format('https://github.com/{0}/actions/runs/{1}', steps.get_head_ref.outputs.repo, github.run_id) }}" + HEAD_SHA: ${{ steps.get_head.outputs.HEAD }} with: debug: ${{ secrets.ACTIONS_STEP_DEBUG || false }} retries: 3 @@ -416,7 +481,7 @@ jobs: const response = await github.rest.checks.create({ owner: context.repo.owner, repo: context.repo.repo, - head_sha: "${{ steps.get_head.outputs.HEAD }}", + head_sha: process.env.HEAD_SHA, name: "MQL Mimic Tests", status: "completed", conclusion: "success", diff --git a/.github/workflows/update-test-rules.yml b/.github/workflows/update-test-rules.yml index ec2fa97f04e..e2c4861cb2e 100644 --- a/.github/workflows/update-test-rules.yml +++ b/.github/workflows/update-test-rules.yml @@ -53,8 +53,12 @@ jobs: ORG_NAME: 'sublime-security' INCLUDE_PRS_WITH_COMMENT: 'true' COMMENT_TRIGGER: '/update-test-rules' + # SKIP_FILES pattern and labls are managed within the script SKIP_FILES_WITH_TEXT: 'true' - SKIP_TEXT: 'ml.link_analysis' + # Skip PRs with too many rules + SKIP_BULK_PRS: 'true' + MAX_RULES_PER_PR: '10' + BULK_PR_LABEL: 'test-rules:excluded:bulk_rules' # Disable adding tags that aren't useful ADD_RULE_STATUS_TAG: 'false' ADD_PR_REFERENCE: 'false' @@ -63,8 +67,7 @@ jobs: CREATE_OPEN_PR_TAG: 'false' ADD_TEST_RULES_LABEL: 'true' IN_TEST_RULES_LABEL: 'in-test-rules' - ADD_SKIP_TEXT_LABEL: 'true' - SKIP_TEXT_LABEL: 'hunting-required' + AUTHOR_MEMBERSHIP_EXCLUSION_LABEL: 'test-rules:excluded:author_membership' run: python scripts/sync_detection_rules.py @@ -99,6 +102,7 @@ jobs: # Process each file individually for FILE in $FILES; do + echo "Processing $FILE" # Skip non-rule files if [[ ! "$FILE" =~ .*\.yml$ ]]; then continue @@ -110,30 +114,57 @@ jobs: PR_NUMBER=$(echo "$BASENAME" | grep -o "^[0-9]*") UPDATED_PRS+=("$PR_NUMBER") + echo "Processing $BASENAME from $PR_NUMBER for inclusion" + # Handle removed files if [[ ! -f "$FILE" ]]; then + echo "$BASENAME was deleted, commiting deletion." + DELETED_COUNT=$((DELETED_COUNT+1)) git add "$FILE" git commit -m "[PR #${PR_NUMBER}] Delete detection rule" - DELETED_COUNT=$((DELETED_COUNT+1)) continue fi - # Extract rule name - RULE_NAME=$(grep -m 1 "name:" "$FILE" | sed 's/name: //' | sed 's/^"//' | sed 's/"$//' | sed "s/^'//" | sed "s/'$//") + # Ensure that new files are tracked (otherwise they won't show up in diff) + git add -N "$FILE" + + # Check if only testing_sha field changed + DIFF_OUTPUT=$(git diff HEAD -- "$FILE") + + # Skip files with no changes at all + if [[ -z "$DIFF_OUTPUT" ]]; then + echo "\tSkipping $FILE: no changes" + continue + fi + + # Check if the diff only contains testing_sha changes + # Look for lines that are only additions/deletions of testing_sha with hash values + # Handle files that may not have trailing newlines + # Filter out hunk headers & file paths so we just have +- lines of changed content. + LINES_CHANGES_ONLY=$(echo "$DIFF_OUTPUT" | grep -E '^[+-]' | grep -v -E '^[+-]{3}' | grep -v '^\\ No newline at end of file') + NON_TESTING_SHA_CHANGES=$(echo "$LINES_CHANGES_ONLY" | grep -v -E '^[+-]testing_sha: [0-9a-f]+' || true) + if [[ -z "$NON_TESTING_SHA_CHANGES" ]]; then + echo "\tSkipping $FILE: only testing_sha field changed. Lines changed:" + echo "$LINES_CHANGES_ONLY" + continue + fi # Determine status directly from git GIT_STATUS=$(git status --porcelain "$FILE" | cut -c1-2 | tr -d ' ') if [[ "$GIT_STATUS" == "A" ]]; then - STATUS="added" ADDED_COUNT=$((ADDED_COUNT+1)) + STATUS="added" elif [[ "$GIT_STATUS" == "M" ]]; then - STATUS="modified" MODIFIED_COUNT=$((MODIFIED_COUNT+1)) + STATUS="modified" else - STATUS="changed" MODIFIED_COUNT=$((MODIFIED_COUNT+1)) + STATUS="changed" fi + # Extract rule name and commit + RULE_NAME=$(grep -m 1 "name:" "$FILE" | sed 's/name: //' | sed 's/^"//' | sed 's/"$//' | sed "s/^'//" | sed "s/'$//") + # Build commit message COMMIT_MSG="[PR #${PR_NUMBER}] ${STATUS} rule: ${RULE_NAME}"