-
Notifications
You must be signed in to change notification settings - Fork 245
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Expand Oban usage for logging, rerror reporting, and periodic jobs #378
Changes from all commits
b4579c3
3a5e17b
a642c3f
9c615fa
3a5c58a
9b57862
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -22,6 +22,5 @@ config :changelog, Changelog.Repo, | |
username: System.get_env("DB_USER", "postgres") | ||
|
||
config :changelog, Oban, | ||
crontab: false, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This hasn't been necessary for a while now. The top-level |
||
queues: false, | ||
plugins: false |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -24,16 +24,19 @@ defmodule Changelog.Application do | |
global_ttl: :timer.minutes(5), | ||
touch_on_read: false | ||
), | ||
Changelog.Scheduler, | ||
Changelog.EpisodeTracker, | ||
Changelog.Metacasts.Filterer.Cache, | ||
{Oban, oban_config()} | ||
] | ||
|
||
# See https://hexdocs.pm/elixir/Supervisor.html | ||
# for other strategies and supported options | ||
opts = [strategy: :one_for_one, name: Changelog.Supervisor] | ||
Supervisor.start_link(children, opts) | ||
# Only attach the telemetry logger when we aren't in an IEx shell | ||
unless Code.ensure_loaded?(IEx) && IEx.started?() do | ||
Oban.Telemetry.attach_default_logger(:info) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This gets start/stop/error type span logging for all jobs. Rather than sprinkling custom logging into the workers you can debug and get timing automatically. |
||
|
||
Changelog.ObanReporter.attach() | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This gets native sentry errors when a job encounters an error. Without this, the errors are silent and only visible in the |
||
end | ||
|
||
Supervisor.start_link(children, strategy: :one_for_one, name: Changelog.Supervisor) | ||
end | ||
|
||
defp oban_config do | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,23 @@ | ||
defmodule Changelog.ObanReporter do | ||
@moduledoc false | ||
|
||
def attach do | ||
:telemetry.attach( | ||
"oban-errors", | ||
[:oban, :job, :exception], | ||
&handle_event/4, | ||
[] | ||
) | ||
end | ||
|
||
def handle_event([:oban, :job, _], measure, meta, _) do | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The bulk of this was pulled directly from the Oban README. |
||
extra = | ||
meta.job | ||
|> Map.take([:id, :args, :meta, :queue, :worker, :attempt, :max_attempts]) | ||
|> Map.merge(measure) | ||
|
||
Sentry.capture_exception(meta.error, stacktrace: meta.stacktrace, extra: extra) | ||
end | ||
|
||
def handle_event(_event, _measure, _meta, _opts), do: :ok | ||
end |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -7,8 +7,6 @@ defmodule Changelog.ObanWorkers.CommentNotifier do | |
|
||
alias Changelog.{NewsItemComment, Notifier, Repo} | ||
|
||
@five_mins 60 * 5 | ||
|
||
@impl Oban.Worker | ||
def perform(%Oban.Job{args: %{"comment_id" => comment_id}}) do | ||
comment = Repo.get(NewsItemComment, comment_id) | ||
|
@@ -22,7 +20,7 @@ defmodule Changelog.ObanWorkers.CommentNotifier do | |
""" | ||
def schedule_notification(%NewsItemComment{id: id}) do | ||
%{comment_id: id} | ||
|> __MODULE__.new(schedule_in: @five_mins) | ||
|> new(schedule_in: {5, :minutes}) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Here I've switched to the brand new time unit syntax. |
||
|> Oban.insert() | ||
end | ||
end |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
defmodule Changelog.ObanWorkers.NewsPublisher do | ||
use Oban.Worker, queue: :scheduled | ||
|
||
alias Changelog.NewsQueue | ||
|
||
@impl Oban.Worker | ||
def perform(_job) do | ||
{:ok, NewsQueue.publish()} | ||
end | ||
end |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,28 @@ | ||
defmodule Changelog.ObanWorkers.SlackImporter do | ||
use Oban.Worker, queue: :scheduled | ||
|
||
alias Changelog.{Person, Repo} | ||
alias Changelog.Slack.Client | ||
|
||
import Ecto.Query | ||
|
||
@impl Oban.Worker | ||
def perform(_job) do | ||
%{"members" => members} = Client.list() | ||
|
||
for %{"id" => id, "profile" => profile} <- members do | ||
email = Map.get(profile, "email", "") | ||
|
||
import_member_id(id, email) | ||
end | ||
|
||
:ok | ||
end | ||
|
||
def import_member_id(id, email) do | ||
Person | ||
|> where([p], p.email == ^email) | ||
|> where([p], is_nil(p.slack_id) or p.slack_id == "pending") | ||
|> Repo.update_all(set: [slack_id: id]) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The use of |
||
end | ||
end |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,59 +1,57 @@ | ||
defmodule Changelog.Stats do | ||
import Ecto | ||
import Ecto.Changeset | ||
defmodule Changelog.ObanWorkers.StatsProcessor do | ||
use Oban.Worker, queue: :scheduled, unique: [period: 600] | ||
|
||
import Ecto.Query, only: [select: 3] | ||
|
||
alias Changelog.{Cache, Podcast, Repo, Episode, EpisodeStat} | ||
alias Changelog.Stats.{Analyzer, Parser, S3} | ||
alias Ecto.Changeset | ||
|
||
require Logger | ||
|
||
def process do | ||
end_date = Timex.today() | ||
start_date = Timex.shift(end_date, days: -2) | ||
@impl Oban.Worker | ||
def perform(%Job{args: %{"date" => date, "podcast_id" => podcast_id}}) do | ||
date = Date.from_iso8601!(date) | ||
podcast = Repo.get!(Podcast, podcast_id) | ||
|
||
for time <- Timex.Interval.new(from: start_date, until: end_date) do | ||
time |> Timex.to_date() |> process() | ||
end | ||
end | ||
|
||
def process(date) do | ||
Logger.info("Stats: Start processing for #{date}") | ||
podcasts = Repo.all(Podcast.public()) | ||
process(date, podcasts) | ||
Cache.delete_prefix("stats-") | ||
Logger.info("Stats: Finished processing for #{date}") | ||
end | ||
processed = | ||
date | ||
|> S3.get_logs(podcast.slug) | ||
|> Parser.parse() | ||
|> Enum.group_by(& &1.episode) | ||
|> Enum.map(fn {slug, entries} -> process_episode(date, podcast, slug, entries) end) | ||
|
||
def process(date, podcast) when not is_list(podcast), do: process(date, [podcast]) | ||
Podcast.update_stat_counts(podcast) | ||
|
||
def process(date, podcasts) do | ||
podcasts | ||
|> Enum.map(&Task.async(fn -> process_podcast(date, &1) end)) | ||
# 10 minutes | ||
|> Enum.flat_map(&Task.await(&1, 600_000)) | ||
{:ok, processed} | ||
end | ||
|
||
defp process_podcast(date, podcast) do | ||
Logger.info("Stats: Processing #{podcast.name}") | ||
def perform(_job) do | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This clause is the "fall-through" that's triggered by cron. In this case, we don't process anything, we only generate one job per date/podcast pair. Those jobs then run independently and in isolation. |
||
today = Date.utc_today() | ||
range = Date.range(Date.add(today, -2), Date.add(today, -1)) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Newer versions of Elixir have native Date range iteration, which allows us to skip the Timex interval with date conversion step. Fun fact, the use of |
||
|
||
processed = | ||
S3.get_logs(date, podcast.slug) | ||
|> Parser.parse() | ||
|> Enum.group_by(& &1.episode) | ||
|> Enum.map(fn {slug, entries} -> | ||
process_episode(date, podcast, slug, entries) | ||
end) | ||
podcast_ids = | ||
Podcast.public() | ||
|> select([p], p.id) | ||
|> Repo.all() | ||
|
||
Podcast.update_stat_counts(podcast) | ||
Logger.info("Stats: Finished Processing #{podcast.name}") | ||
jobs = | ||
for(date <- range, pid <- podcast_ids, do: %{date: date, podcast_id: pid}) | ||
|> Enum.map(&new/1) | ||
|> Oban.insert_all() | ||
|
||
Cache.delete_prefix("stats-") | ||
|
||
processed | ||
{:ok, jobs} | ||
end | ||
|
||
@impl Oban.Worker | ||
def timeout(_job), do: 600_000 | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The |
||
|
||
defp process_episode(date, podcast, slug, entries) do | ||
if episode = Repo.get_by(assoc(podcast, :episodes), slug: slug) do | ||
if episode = Repo.get_by(Ecto.assoc(podcast, :episodes), slug: slug) do | ||
stat = | ||
case Repo.get_by(assoc(episode, :episode_stats), date: date) do | ||
case Repo.get_by(Ecto.assoc(episode, :episode_stats), date: date) do | ||
nil -> | ||
%EpisodeStat{ | ||
episode_id: episode.id, | ||
|
@@ -67,7 +65,7 @@ defmodule Changelog.Stats do | |
end | ||
|
||
stat = | ||
change(stat, %{ | ||
Changeset.change(stat, %{ | ||
total_bytes: Analyzer.bytes(entries), | ||
downloads: Analyzer.downloads(entries, stat.episode_bytes), | ||
uniques: Analyzer.uniques_count(entries), | ||
|
This file was deleted.
This file was deleted.
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -5,6 +5,6 @@ defmodule Mix.Tasks.Changelog.Slack do | |
|
||
def run(_) do | ||
Mix.Task.run("app.start") | ||
Changelog.Slack.Tasks.import_member_ids() | ||
Changelog.ObanWorkers.SlackImporter.perform(%Oban.Job{}) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Synchronous invocation using an empty job struct. |
||
end | ||
end |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,6 +1,9 @@ | ||
defmodule Mix.Tasks.Changelog.Stats.Process do | ||
use Mix.Task | ||
|
||
alias Changelog.{Podcast, Repo} | ||
alias Changelog.ObanWorkers.StatsProcessor | ||
|
||
@shortdoc "Processes stats for given date, or all missing dates" | ||
|
||
def run(args) when is_nil(args), do: run([]) | ||
|
@@ -9,8 +12,22 @@ defmodule Mix.Tasks.Changelog.Stats.Process do | |
Mix.Task.run("app.start") | ||
|
||
case Timex.parse(List.first(args), "{YYYY}-{0M}-{D}") do | ||
{:ok, time} -> Changelog.Stats.process(Timex.to_date(time)) | ||
{:error, _message} -> Changelog.Stats.process() | ||
{:ok, time} -> | ||
date = Timex.to_date(time) | ||
|
||
Podcast.public() | ||
|> Repo.all() | ||
|> Enum.map(&StatsProcessor.new(%{date: date, podcast_id: &1.id})) | ||
|> Oban.insert_all() | ||
|
||
{:error, _message} -> | ||
%{} | ||
|> StatsProcessor.new() | ||
|> Oban.insert!() | ||
end | ||
|
||
results = Oban.drain_queue(queue: :scheduled, with_recursion: true, with_safety: false) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The |
||
|
||
Mix.shell().info("Stats processed for #{results.success - 1} dates/podcasts") | ||
end | ||
end |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Typically these would go in the primary config, but to mimic the "prod only tasks" that were already set up I've only defined them in
prod.exs
.