Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 18 additions & 0 deletions app/measurement/migrations/0057_monitor_do_daily_digest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
# Generated by Django 3.1.13 on 2023-02-22 23:06

from django.db import migrations, models


class Migration(migrations.Migration):

dependencies = [
('measurement', '0056_auto_20230201_0013'),
]

operations = [
migrations.AddField(
model_name='monitor',
name='do_daily_digest',
field=models.BooleanField(default=False),
),
]
165 changes: 163 additions & 2 deletions app/measurement/models.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,13 @@
from django.db import models
from django.db.models import (Avg, Count, Max, Min, Sum, F, Value,
IntegerField, FloatField)
from django.db.models.functions import Abs
from django.conf import settings
from django.utils.translation import gettext_lazy as _
from django.core.exceptions import ValidationError
from django.core.mail import send_mail

from measurement.aggregates.percentile import Percentile
from .validators import validate_email_list
from nslc.models import Channel, Group

Expand Down Expand Up @@ -106,6 +108,12 @@ class Stat(models.TextChoices):
AVERAGE = 'avg', _('Avg')
MINIMUM = 'min', _('Min')
MAXIMUM = 'max', _('Max')
MINABS = 'minabs', _('MinAbs')
MAXABS = 'maxabs', _('MaxAbs')
MEDIAN = 'median', _('Median')
P90 = 'p90', _('P90')
P95 = 'p95', _('P95')

channel_group = models.ForeignKey(
Group,
on_delete=models.CASCADE,
Expand All @@ -128,6 +136,7 @@ class Stat(models.TextChoices):
default=Stat.SUM
)
name = models.CharField(max_length=255, default='')
do_daily_digest = models.BooleanField(default=False)

def calc_interval_seconds(self):
'''Return the number of seconds in the alarm interval'''
Expand Down Expand Up @@ -184,7 +193,12 @@ def agg_measurements(self, endtime=datetime.now(tz=pytz.UTC)):
sum=Sum('value'),
avg=Avg('value'),
max=Max('value'),
min=Min('value')
min=Min('value'),
minabs=Min(Abs('value')),
maxabs=Max(Abs('value')),
median=Percentile('value', percentile=0.5),
p90=Percentile('value', percentile=0.90),
p95=Percentile('value', percentile=0.95)
)

# Get default values if there are no measurements
Expand All @@ -193,7 +207,12 @@ def agg_measurements(self, endtime=datetime.now(tz=pytz.UTC)):
sum=Value(None, output_field=FloatField()),
avg=Value(None, output_field=FloatField()),
max=Value(None, output_field=FloatField()),
min=Value(None, output_field=FloatField())
min=Value(None, output_field=FloatField()),
maxabs=Value(None, output_field=FloatField()),
minabs=Value(None, output_field=FloatField()),
median=Value(None, output_field=FloatField()),
p90=Value(None, output_field=FloatField()),
p95=Value(None, output_field=FloatField())
)

# Combine querysets in case of zero measurements. Kludgy but
Expand Down Expand Up @@ -226,6 +245,73 @@ def evaluate_alarm(self, endtime=datetime.now(tz=pytz.UTC) - relativedelta(
in_alarm = trigger.in_alarm_state(breaching_channels, endtime)
trigger.evaluate_alert(in_alarm, breaching_channels, endtime)

# Set the digest to be evaluated at this time. Should it be a field?
digesttime = datetime.now(tz=pytz.UTC) - relativedelta(
hour=0, minute=0, second=0, microsecond=0)
endtimecheck = endtime - relativedelta(
minute=0, second=0, microsecond=0)
if self.do_daily_digest and endtimecheck == digesttime:
self.check_daily_digest(digesttime)

def check_daily_digest(self, digesttime=datetime.now(tz=pytz.UTC)):
# Get the date string for yesterday
yesterday_str = (
digesttime - relativedelta(days=1)).strftime("%Y-%m-%d")

triggers = self.triggers.all()
# Each trigger result is a tuple:
# (in/out of alarm, text description)
trigger_results = []
for trigger in triggers:
trigger_results.append(
trigger.get_daily_trigger_digest(digesttime))

n_in_alert = sum([1 for res in trigger_results if res[0]])
n_triggers = len(triggers)

message = ''
message += f'Daily digest for {yesterday_str} prepared at '
message += f'{datetime.now(tz=pytz.UTC).strftime("%Y-%m-%dT%H:%M %Z")}'

message += f'\n\nMonitor: {self.name}'
message += f'\nChannel group: {self.channel_group}'
message += f'\nMetric: {self.metric.name}'
message += f'\nChecking {self.stat} '
add_s = 's' if self.interval_count > 1 else ''
message += f'over {self.interval_count}'
message += f' {self.interval_type}{add_s}'

message += f'\n\n{n_in_alert} of {n_triggers} '
message += 'triggers were in alert over the last day'
line_break = '________________________________________________________'
for i, trigger_result in enumerate(trigger_results):
message += f'\n\n{line_break}\n\n'
message += f'TRIGGER {i} WAS {"" if trigger_result[0] else "NOT "}'
message += f'in alert during {yesterday_str}'
message += f'\n{trigger_result[1]}'

message += '\n\n\n'
# message += "Unsubscribe from this monitor's alert emails"

# Get emails for triggers that were in alarm
email_list = []
for i, trigger in enumerate(triggers):
if trigger_results[i][0]:
email_list += [email for email in trigger.email_list]

if not email_list:
# There is noone specified to send to
return

subject = f"SQUAC daily digest for '{self.__str__()}'"
subject += f", {yesterday_str}"
send_mail(subject,
message,
settings.EMAIL_NO_REPLY,
[email for email in email_list],
fail_silently=False,
)

def __str__(self):
if not self.name:
return (f"{str(self.channel_group)}, "
Expand Down Expand Up @@ -455,6 +541,11 @@ def evaluate_alert(self,
elif removed:
send_new = self.alert_on_out_of_alarm

# Make sure to not send individual alerts if daily digest is on.
# They can still be created
if self.monitor.do_daily_digest:
send_new = False

if create_new:
alert = self.create_alert(in_alarm, breaching_channels, reftime)
if send_new:
Expand Down Expand Up @@ -489,6 +580,76 @@ def get_text_description(self):
desc += f' {self.monitor.interval_type}{add_s}'
return desc

def get_daily_trigger_digest(self, digesttime):
"""
digesttime is the time this is evaluated. So the code will actually
check what happened the day before.

Return boolean of whether this trigger was in alarm today
Then text description:
- At top should be brief trigger description
- Then summary of alerts for the day
- Then breaching channels, including breaching since times
"""
# First generate the initial description of the trigger.
# Could use self.get_text_description(), but need to make it consistent
# for digest and "normal" alerts first
val = (self.val1, self.val2) if self.val2 is not None else self.val1
desc = ''
desc += f'Description: In alert if {self.monitor.stat} of'
desc += f' "{self.monitor.metric.name}" measurements was '
desc += f'{self.value_operator} {val} '
if self.num_channels_operator == self.NumChannelsOperator.ANY:
desc += 'for ANY channel'
elif self.num_channels_operator == self.NumChannelsOperator.ALL:
desc += 'for ALL channels'
else:
desc += f'for {self.num_channels_operator}'
add_s = 's' if self.num_channels > 1 else ''
desc += f' {self.num_channels} channel{add_s}'

# Use check time to ensure digesttime is 00:00, and the trigger will be
# evaluated for the day before
checktime = digesttime - relativedelta(
hour=0, minute=0, second=0, microsecond=0)
alerts = self.alerts.filter(
timestamp__gte=checktime - relativedelta(days=1),
timestamp__lte=checktime).order_by('timestamp')

# If there were no alerts today, return now.
if len(alerts) == 0:
return False, desc

# There were alerts, so categorize them and generate a summary
in_alarm_times = [
alert.timestamp.strftime('%H:%M') for alert in alerts
if alert.in_alarm
]
out_of_alarm_times = [
alert.timestamp.strftime('%H:%M') for alert in alerts
if not alert.in_alarm
]

desc += "\n\nSummary of alerts:"
desc += f"\nIn alert: {', '.join(in_alarm_times)}"
desc += f"\nOut of alert: {', '.join(out_of_alarm_times)}"

# Now add the list of breaching channels (first generate it)
channels_dict = {}
for alert in alerts:
for channel in alert.breaching_channels:
# Keep track of the most recent breaching time per channel
channels_dict[channel['channel']] = alert.timestamp

# Sort the channels
sorted_channels = sorted(channels_dict.keys())

for channel in sorted_channels:
desc += f"\n{channel:16s} - last breaching time: "
desc += f"{channels_dict[channel].strftime('%Y-%m-%dT%H:%M %Z')}"

return True, desc

def __str__(self):
return (f"Monitor: {str(self.monitor)}, "
f"Val1: {self.val1}, "
Expand Down
74 changes: 73 additions & 1 deletion app/measurement/tests/test_alarms_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
from rest_framework import status

from datetime import datetime
from dateutil.relativedelta import relativedelta
import pytz
from squac.test_mixins import sample_user

Expand Down Expand Up @@ -297,6 +298,53 @@ def test_agg_measurements(self):
self.assertEqual(res['count'], expected[res['channel']]['count'])
self.assertEqual(res['sum'], expected[res['channel']]['sum'])

def test_agg_measurements_min_max_abs(self):
# Create fake data to test minabs, maxabs
endtime = datetime(2020, 1, 2, 3, 0, 0, 0, tzinfo=pytz.UTC)
vals = [-20, -1, 2, 5, 12]
for val in vals:
Measurement.objects.create(
metric=self.metric,
channel=self.chan1,
value=val,
starttime=endtime - relativedelta(hours=5),
endtime=endtime - relativedelta(hours=4),
user=self.user
)

q_list = self.monitor.agg_measurements(endtime=endtime)

# This monitor had 2 channels, though only one has data
self.assertEqual(2, len(q_list))
for q_dict in q_list:
if self.chan1.id == q_dict['channel']:
self.assertEqual(1, q_dict['minabs'])
self.assertEqual(20, q_dict['maxabs'])

def test_agg_measurements_percentile(self):
# Create fake data to test median, p90, p95
endtime = datetime(2020, 1, 2, 3, 0, 0, 0, tzinfo=pytz.UTC)

for val in range(101):
Measurement.objects.create(
metric=self.metric,
channel=self.chan1,
value=val,
starttime=endtime - relativedelta(hours=5),
endtime=endtime - relativedelta(hours=4),
user=self.user
)

q_list = self.monitor.agg_measurements(endtime=endtime)

# This monitor had 2 channels, though only one has data
self.assertEqual(2, len(q_list))
for q_dict in q_list:
if self.chan1.id == q_dict['channel']:
self.assertEqual(50, q_dict['median'])
self.assertEqual(90, q_dict['p90'])
self.assertEqual(95, q_dict['p95'])

def test_agg_measurements_missing_channel(self):
monitor = Monitor.objects.get(pk=2)
endtime = datetime(2018, 2, 1, 4, 30, 0, 0, tzinfo=pytz.UTC)
Expand Down Expand Up @@ -522,7 +570,6 @@ def test_evaluate_alert_true_alert_in_alarm(self, send_alert):

alert = trigger.evaluate_alert(True)

self.assertEqual(81, alert.id)
self.assertTrue(alert.in_alarm)
self.assertFalse(send_alert.called)

Expand Down Expand Up @@ -973,3 +1020,28 @@ def test_last_n_monitor(self):
# Compare results
for q_item in q_list:
self.assertEqual(q_item['sum'], chan_vals[q_item['channel']])

def test_check_daily_digest_no_alerts(self):
self.monitor.check_daily_digest()

# was an email sent? If no alerts then it shouldn't be
self.assertEqual(len(mail.outbox), 0)

def test_check_daily_digest_with_alerts(self):
# Create a couple alerts for this monitor
reftime = datetime(2020, 1, 2, 3, 0, 0, 0, tzinfo=pytz.UTC)

self.trigger.create_alert(True, [], timestamp=reftime + relativedelta(
hours=1))
self.trigger.create_alert(False, [], timestamp=reftime + relativedelta(
hours=4))
self.trigger.create_alert(True, [], timestamp=reftime + relativedelta(
hours=6))

self.monitor.check_daily_digest(digesttime=reftime + relativedelta(
days=1))

# was an email sent?
self.assertEqual(len(mail.outbox), 1)
for email in self.trigger.email_list:
self.assertTrue(email in mail.outbox[0].recipients())
6 changes: 3 additions & 3 deletions app/measurement/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from squac.filters import CharInFilter, NumberInFilter
from measurement.aggregates.percentile import Percentile
from django.db.models import Avg, StdDev, Min, Max, Sum, Count, FloatField
from django.db.models.functions import (Coalesce, Abs, Least, Greatest)
from django.db.models.functions import Coalesce, Abs
from squac.mixins import (SetUserMixin, DefaultPermissionsMixin,
OverrideParamsMixin, OverrideReadParamsMixin,
AdminOrOwnerPermissionMixin)
Expand Down Expand Up @@ -294,8 +294,8 @@ def list(self, request):
min=Min('value'),
max=Max('value'),
sum=Sum('value'),
minabs=Least(Abs(Min('value')), Abs(Max('value'))),
maxabs=Greatest(Abs(Min('value')), Abs(Max('value'))),
minabs=Min(Abs('value')),
maxabs=Max(Abs('value')),
stdev=Coalesce(StdDev('value', sample=True), 0,
output_field=FloatField()),
p05=Percentile('value', percentile=0.05),
Expand Down