Skip to content

Commit

Permalink
Revert to Cowboy as the webserver
Browse files Browse the repository at this point in the history
While using Bandit, it was discovered that a herd of _disconnects_
could cause catastrophic failures leading to node crashes (in our
testing, these were decently sized 4-core, 8GB instances with 5-6k
devices connected). When testing again with Cowboy, the problem
was resolved.

This change reverts to use cowboy to allow progression of other
fixes and features while Bandit/Thousand Island is investigated to
find the root cause
  • Loading branch information
jjcarstens committed Feb 11, 2024
1 parent ed1e23f commit 6daaf0e
Show file tree
Hide file tree
Showing 8 changed files with 87 additions and 169 deletions.
2 changes: 0 additions & 2 deletions config/config.exs
Expand Up @@ -24,15 +24,13 @@ config :nerves_hub,
# NervesHub Device
#
config :nerves_hub, NervesHubWeb.DeviceEndpoint,
adapter: Bandit.PhoenixAdapter,
render_errors: [view: NervesHubWeb.ErrorView, accepts: ~w(html json)],
pubsub_server: NervesHub.PubSub

##
# NervesHub Web
#
config :nerves_hub, NervesHubWeb.Endpoint,
adapter: Bandit.PhoenixAdapter,
secret_key_base: "ZH9GG2S5CwIMWXBg92wUuoyKFrjgqaAybHLTLuUk1xZO0HeidcJbnMBSTHDcyhSn",
live_view: [
signing_salt: "Kct3W8U7uQ6KAczYjzNbiYS6A8Pbtk3f"
Expand Down
37 changes: 16 additions & 21 deletions config/dev.exs
Expand Up @@ -43,27 +43,22 @@ config :nerves_hub, NervesHubWeb.DeviceEndpoint,
ip: {0, 0, 0, 0},
port: 4001,
otp_app: :nerves_hub,
thousand_island_options: [
transport_module: NervesHub.DeviceSSLTransport,
transport_options: [
# Enable client SSL
# Older versions of OTP 25 may break using using devices
# that support TLS 1.3 or 1.2 negotiation. To mitigate that
# potential error, we enforce TLS 1.2. If you're using OTP >= 25.1
# on all devices, then it is safe to allow TLS 1.3 by removing
# the versions constraint and setting `certificate_authorities: false`
# See https://github.com/erlang/otp/issues/6492#issuecomment-1323874205
#
# certificate_authorities: false,
versions: [:"tlsv1.2"],
verify: :verify_peer,
verify_fun: {&NervesHub.SSL.verify_fun/3, nil},
fail_if_no_peer_cert: true,
keyfile: Path.join(ssl_dir, "device.nerves-hub.org-key.pem"),
certfile: Path.join(ssl_dir, "device.nerves-hub.org.pem"),
cacertfile: Path.join(ssl_dir, "ca.pem")
]
]
# Enable client SSL
# Older versions of OTP 25 may break using using devices
# that support TLS 1.3 or 1.2 negotiation. To mitigate that
# potential error, we enforce TLS 1.2. If you're using OTP >= 25.1
# on all devices, then it is safe to allow TLS 1.3 by removing
# the versions constraint and setting `certificate_authorities: false`
# See https://github.com/erlang/otp/issues/6492#issuecomment-1323874205
#
# certificate_authorities: false,
versions: [:"tlsv1.2"],
verify: :verify_peer,
verify_fun: {&NervesHub.SSL.verify_fun/3, nil},
fail_if_no_peer_cert: true,
keyfile: Path.join(ssl_dir, "device.nerves-hub.org-key.pem"),
certfile: Path.join(ssl_dir, "device.nerves-hub.org.pem"),
cacertfile: Path.join(ssl_dir, "ca.pem")
]

##
Expand Down
41 changes: 18 additions & 23 deletions config/runtime.exs
Expand Up @@ -105,29 +105,24 @@ if config_env() == :prod do
https: [
port: https_port,
otp_app: :nerves_hub,
thousand_island_options: [
transport_module: NervesHub.DeviceSSLTransport,
transport_options: [
# Enable client SSL
# Older versions of OTP 25 may break using using devices
# that support TLS 1.3 or 1.2 negotiation. To mitigate that
# potential error, we enforce TLS 1.2. If you're using OTP >= 25.1
# on all devices, then it is safe to allow TLS 1.3 by removing
# the versions constraint and setting `certificate_authorities: false`
# since we don't expect devices to send full chains to the server
# See https://github.com/erlang/otp/issues/6492#issuecomment-1323874205
#
# certificate_authorities: false,
versions: [:"tlsv1.2"],
verify: :verify_peer,
verify_fun: {&NervesHub.SSL.verify_fun/3, nil},
fail_if_no_peer_cert: true,
keyfile: keyfile,
certfile: certfile,
cacertfile: CAStore.file_path(),
hibernate_after: 15_000
]
]
# Enable client SSL
# Older versions of OTP 25 may break using using devices
# that support TLS 1.3 or 1.2 negotiation. To mitigate that
# potential error, we enforce TLS 1.2. If you're using OTP >= 25.1
# on all devices, then it is safe to allow TLS 1.3 by removing
# the versions constraint and setting `certificate_authorities: false`
# since we don't expect devices to send full chains to the server
# See https://github.com/erlang/otp/issues/6492#issuecomment-1323874205
#
# certificate_authorities: false,
versions: [:"tlsv1.2"],
verify: :verify_peer,
verify_fun: {&NervesHub.SSL.verify_fun/3, nil},
fail_if_no_peer_cert: true,
keyfile: keyfile,
certfile: certfile,
cacertfile: CAStore.file_path(),
hibernate_after: 15_000
]
end
end
Expand Down
18 changes: 7 additions & 11 deletions config/test.exs
Expand Up @@ -25,17 +25,13 @@ config :nerves_hub, NervesHubWeb.DeviceEndpoint,
https: [
port: 4101,
otp_app: :nerves_hub,
thousand_island_options: [
transport_options: [
# Enable client SSL
verify: :verify_peer,
verify_fun: {&NervesHub.SSL.verify_fun/3, nil},
fail_if_no_peer_cert: true,
keyfile: Path.join([__DIR__, "../test/fixtures/ssl/device.nerves-hub.org-key.pem"]),
certfile: Path.join([__DIR__, "../test/fixtures/ssl/device.nerves-hub.org.pem"]),
cacertfile: Path.join([__DIR__, "../test/fixtures/ssl/ca.pem"])
]
]
# Enable client SSL
verify: :verify_peer,
verify_fun: {&NervesHub.SSL.verify_fun/3, nil},
fail_if_no_peer_cert: true,
keyfile: Path.join([__DIR__, "../test/fixtures/ssl/device.nerves-hub.org-key.pem"]),
certfile: Path.join([__DIR__, "../test/fixtures/ssl/device.nerves-hub.org.pem"]),
cacertfile: Path.join([__DIR__, "../test/fixtures/ssl/ca.pem"])
]

##
Expand Down
79 changes: 0 additions & 79 deletions lib/nerves_hub/device_ssl_transport.ex

This file was deleted.

68 changes: 39 additions & 29 deletions lib/nerves_hub/ssl.ex
@@ -1,6 +1,7 @@
defmodule NervesHub.SSL do
alias NervesHub.Devices
alias NervesHub.Certificate
alias NervesHub.RateLimit

@type pkix_path_validation_reason ::
:cert_expired
Expand Down Expand Up @@ -33,42 +34,51 @@ defmodule NervesHub.SSL do
# or the signer cert was included by the client and is valid
# for the peer (device) cert
def verify_fun(otp_cert, :valid_peer, state) do
:telemetry.execute([:nerves_hub, :rate_limit, :accepted], %{count: 1})

do_verify(otp_cert, state)
if RateLimit.increment() do
:telemetry.execute([:nerves_hub, :rate_limit, :accepted], %{count: 1})
do_verify(otp_cert, state)
else
:telemetry.execute([:nerves_hub, :rate_limit, :rejected], %{count: 1})
{:fail, :rate_limit}
end
end

def verify_fun(_certificate, :valid, state) do
{:valid, state}
end

def verify_fun(otp_cert, {:bad_cert, err}, state) when err in [:unknown_ca, :cert_expired] do
:telemetry.execute([:nerves_hub, :rate_limit, :accepted], %{count: 1})

aki = Certificate.get_aki(otp_cert)
ski = Certificate.get_ski(otp_cert)

cond do
aki == ski ->
# If the signer CA is also the root, then AKI == SKI. We can skip
# checking as it will be validated later on if the device needs
# registration
{:valid, state}

is_binary(ski) and match?({:ok, _db_ca}, Devices.get_ca_certificate_by_ski(ski)) ->
# Signer CA sent with the device certificate, but is an intermediary
# so the chain is incomplete labeling it as unknown_ca.
#
# Since we have this CA registered, validate so we can move on to the device
# cert next and expiration will be checked later if registration of a new
# device cert needs to happen.
{:valid, state}

true ->
# The signer CA was not included in the request, so this is most
# likely a device cert that needs verification. If it isn't, then
# this is some other unknown CA that will fail
do_verify(otp_cert, state)
if RateLimit.increment() do
:telemetry.execute([:nerves_hub, :rate_limit, :accepted], %{count: 1})

aki = Certificate.get_aki(otp_cert)
ski = Certificate.get_ski(otp_cert)

cond do
aki == ski ->
# If the signer CA is also the root, then AKI == SKI. We can skip
# checking as it will be validated later on if the device needs
# registration
{:valid, state}

is_binary(ski) and match?({:ok, _db_ca}, Devices.get_ca_certificate_by_ski(ski)) ->
# Signer CA sent with the device certificate, but is an intermediary
# so the chain is incomplete labeling it as unknown_ca.
#
# Since we have this CA registered, validate so we can move on to the device
# cert next and expiration will be checked later if registration of a new
# device cert needs to happen.
{:valid, state}

true ->
# The signer CA was not included in the request, so this is most
# likely a device cert that needs verification. If it isn't, then
# this is some other unknown CA that will fail
do_verify(otp_cert, state)
end
else
:telemetry.execute([:nerves_hub, :rate_limit, :rejected], %{count: 1})
{:fail, :rate_limit}
end
end

Expand Down
3 changes: 2 additions & 1 deletion mix.exs
Expand Up @@ -51,12 +51,12 @@ defmodule NervesHub.MixProject do
[
{:mix_test_watch, "~> 1.0", only: :test, runtime: false},
{:recon, "~> 2.5"},
{:bandit, "~> 1.0"},
{:base62, "~> 1.2"},
{:bcrypt_elixir, "~> 3.0"},
{:castore, "~> 1.0"},
{:circular_buffer, "~> 0.4.1"},
{:comeonin, "~> 5.3"},
{:cowboy, "~> 2.0"},
{:crontab, "~> 1.1"},
{:decorator, "~> 1.2"},
{:ecto, "~> 3.8", override: true},
Expand Down Expand Up @@ -85,6 +85,7 @@ defmodule NervesHub.MixProject do
{:phoenix_swoosh, "~> 1.0"},
{:phoenix_view, "~> 2.0"},
{:plug, "~> 1.7"},
{:plug_cowboy, "~> 2.1"},
{:postgrex, "~> 0.14"},
{:scrivener_ecto, "~> 2.7"},
{:scrivener_html, git: "https://github.com/nerves-hub/scrivener_html", branch: "phx-1.5"},
Expand Down

0 comments on commit 6daaf0e

Please sign in to comment.