Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 1 addition & 21 deletions petprep/cli/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,13 +59,9 @@ def main():
setup_exceptionhook()
config.nipype.plugin = 'Linear'

sentry_sdk = None
if not config.execution.notrack and not config.execution.debug:
import sentry_sdk
from ..utils.telemetry import setup_migas

from ..utils.telemetry import sentry_setup, setup_migas

sentry_setup()
setup_migas(init_ping=True)

# CRITICAL Save the config to a file. This is necessary because the execution graph
Expand Down Expand Up @@ -124,14 +120,6 @@ def main():
# Clean up master process before running workflow, which may create forks
gc.collect()

# Sentry tracking
if sentry_sdk is not None:
with sentry_sdk.configure_scope() as scope:
scope.set_tag('run_uuid', config.execution.run_uuid)
scope.set_tag('npart', len(config.execution.participant_label))
sentry_sdk.add_breadcrumb(message='PETPrep started', level='info')
sentry_sdk.capture_message('PETPrep started', level='info')

config.loggers.workflow.log(
15,
'\n'.join(['PETPrep config:'] + [f'\t\t{s}' for s in config.dumps().splitlines()]),
Expand All @@ -152,16 +140,10 @@ def main():
for crashfile in crashfolder.glob('crash*.*'):
process_crashfile(crashfile)

if sentry_sdk is not None and 'Workflow did not execute cleanly' not in str(e):
sentry_sdk.capture_exception(e)
config.loggers.workflow.critical('PETPrep failed: %s', e)
raise
else:
config.loggers.workflow.log(25, 'PETPrep finished successfully!')
if sentry_sdk is not None:
success_message = 'PETPrep finished without errors'
sentry_sdk.add_breadcrumb(message=success_message, level='info')
sentry_sdk.capture_message(success_message, level='info')

# Bother users with the boilerplate only iff the workflow went okay.
boiler_file = config.execution.petprep_dir / 'logs' / 'CITATION.md'
Expand Down Expand Up @@ -231,7 +213,5 @@ def main():
f': {", ".join(failed_reports)}.'
)
config.loggers.cli.error(msg)
if sentry_sdk is not None:
sentry_sdk.capture_message(msg, level='error')

sys.exit(int((errno + len(failed_reports)) > 0))
157 changes: 0 additions & 157 deletions petprep/utils/telemetry.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,169 +22,12 @@
#
"""Stripped out routines for telemetry"""

import os
import re

from nibabel.optpkg import optional_package
from niworkflows.utils.misc import read_crashfile

from .. import __version__, config

sentry_sdk = optional_package('sentry_sdk')[0]
migas = optional_package('migas')[0]

CHUNK_SIZE = 16384
# Group common events with pre specified fingerprints
KNOWN_ERRORS = {
'permission-denied': ['PermissionError: [Errno 13] Permission denied'],
'memory-error': [
'MemoryError',
'Cannot allocate memory',
'Return code: 134',
],
'reconall-already-running': ['ERROR: it appears that recon-all is already running'],
'no-disk-space': ['[Errno 28] No space left on device', '[Errno 122] Disk quota exceeded'],
'segfault': [
'Segmentation Fault',
'Segfault',
'Return code: 139',
],
'potential-race-condition': [
'[Errno 39] Directory not empty',
'_unfinished.json',
],
'keyboard-interrupt': [
'KeyboardInterrupt',
],
}


def sentry_setup():
"""Set-up sentry."""
release = config.environment.version or 'dev'
environment = (
'dev'
if (
os.getenv('PETPREP_DEV', '').lower in ('1', 'on', 'yes', 'y', 'true')
or ('+' in release)
)
else 'prod'
)

sentry_sdk.init(
'https://d5a16b0c38d84d1584dfc93b9fb1ade6@sentry.io/1137693',
release=release,
environment=environment,
before_send=before_send,
)
with sentry_sdk.configure_scope() as scope:
for k, v in config.get(flat=True).items():
scope.set_tag(k, v)


def process_crashfile(crashfile):
"""Parse the contents of a crashfile and submit sentry messages."""
crash_info = read_crashfile(str(crashfile))
with sentry_sdk.push_scope() as scope:
scope.level = 'fatal'

# Extract node name
node_name = crash_info.pop('node').split('.')[-1]
scope.set_tag('node_name', node_name)

# Massage the traceback, extract the gist
traceback = crash_info.pop('traceback')
# last line is probably most informative summary
gist = traceback.splitlines()[-1]
exception_text_start = 1
for line in traceback.splitlines()[1:]:
if not line[0].isspace():
break
exception_text_start += 1

exception_text = '\n'.join(traceback.splitlines()[exception_text_start:])

# Extract inputs, if present
inputs = crash_info.pop('inputs', None)
if inputs:
scope.set_extra('inputs', dict(inputs))

# Extract any other possible metadata in the crash file
for k, v in crash_info.items():
strv = _chunks(str(v))
if len(strv) == 1:
scope.set_extra(k, strv[0])
else:
for i, chunk in enumerate(strv):
scope.set_extra(f'{k}_{i:02d}', chunk)

fingerprint = ''
issue_title = f'{node_name}: {gist}'
for new_fingerprint, error_snippets in KNOWN_ERRORS.items():
for error_snippet in error_snippets:
if error_snippet in traceback:
fingerprint = new_fingerprint
issue_title = new_fingerprint
break
if fingerprint:
break

message = issue_title + '\n\n'
message += exception_text[-8192:]
if fingerprint:
sentry_sdk.add_breadcrumb(message=fingerprint, level='fatal')
else:
# remove file paths
fingerprint = re.sub(r'(/[^/ ]*)+/?', '', message)
# remove words containing numbers
fingerprint = re.sub(r'([a-zA-Z]*[0-9]+[a-zA-Z]*)+', '', fingerprint)
# adding the return code if it exists
for line in message.splitlines():
if line.startswith('Return code'):
fingerprint += line
break

scope.fingerprint = [fingerprint]
sentry_sdk.capture_message(message, 'fatal')


def before_send(event, hints):
"""Filter log messages about crashed nodes."""
if 'logentry' in event and 'message' in event['logentry']:
msg = event['logentry']['message']
if msg.startswith('could not run node:'):
return None
if msg.startswith('Saving crash info to '):
return None
if re.match('Node .+ failed to run on host .+', msg):
return None

if 'breadcrumbs' in event and isinstance(event['breadcrumbs'], list):
fingerprints_to_propagate = [
'no-disk-space',
'memory-error',
'permission-denied',
'keyboard-interrupt',
]
for bc in event['breadcrumbs']:
msg = bc.get('message', 'empty-msg')
if msg in fingerprints_to_propagate:
event['fingerprint'] = [msg]
break

return event


def _chunks(string, length=CHUNK_SIZE):
"""
Split a string into smaller chunks.

>>> list(_chunks('some longer string.', length=3))
['som', 'e l', 'ong', 'er ', 'str', 'ing', '.']

"""
return [string[i : i + length] for i in range(0, len(string), length)]


def setup_migas(init_ping: bool = True, exit_ping: bool = True) -> None:
"""
Expand Down
1 change: 0 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,6 @@ container = [
]
telemetry = [
"migas >= 0.4.0",
"sentry-sdk >= 1.3",
]
test = [
"coverage[toml] >= 5.2.1",
Expand Down
12 changes: 1 addition & 11 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,6 @@ certifi==2025.1.31
# httpcore
# httpx
# requests
# sentry-sdk
cffi==1.17.1
# via cryptography
chardet==5.2.0
Expand All @@ -65,9 +64,7 @@ contourpy==1.3.1
# bokeh
# matplotlib
cryptography==44.0.2
# via
# jwcrypto
# secretstorage
# via jwcrypto
cycler==0.12.1
# via matplotlib
datalad==1.1.5
Expand Down Expand Up @@ -99,8 +96,6 @@ frozendict==2.4.6
# via pybids
fsspec==2025.2.0
# via universal-pathlib
greenlet==3.1.1
# via sqlalchemy
h11==0.14.0
# via httpcore
h5py==3.13.0
Expand Down Expand Up @@ -138,10 +133,6 @@ jaraco-context==6.0.1
# keyrings-alt
jaraco-functools==4.1.0
# via keyring
jeepney==0.9.0
# via
# keyring
# secretstorage
jinja2==3.1.6
# via
# bokeh
Expand Down Expand Up @@ -508,7 +499,6 @@ urllib3==2.3.0
# via
# botocore
# requests
# sentry-sdk
wcwidth==0.2.13
# via prompt-toolkit
wrapt==1.17.2
Expand Down
Loading