Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ACM MVP Behavior changes #2738

Open
wants to merge 8 commits into
base: develop
Choose a base branch
from
10 changes: 6 additions & 4 deletions system/inc/system_cloud.h
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,8 @@ const CloudVariableTypeDouble DOUBLE;
*/
enum CloudDisconnectFlag {
CLOUD_DISCONNECT_GRACEFULLY = 0x01, ///< Disconnect gracefully.
CLOUD_DISCONNECT_DONT_CLOSE = 0x02 ///< Do not close the socket.
CLOUD_DISCONNECT_DONT_CLOSE = 0x02, ///< Do not close the socket.
CLOUD_DISCONNECT_RECONNECT_IMMEDIATELY = 0x03 ///< Close the socket, but leave the cloud connect flag set
};

/**
Expand Down Expand Up @@ -313,7 +314,8 @@ bool spark_cloud_flag_auto_connect(void);
typedef enum spark_cloud_disconnect_option_flag {
SPARK_CLOUD_DISCONNECT_OPTION_GRACEFUL = 0x01, ///< The `graceful` option is set.
SPARK_CLOUD_DISCONNECT_OPTION_TIMEOUT = 0x02, ///< The `timeout` option is set.
SPARK_CLOUD_DISCONNECT_OPTION_CLEAR_SESSION = 0x04 ///< The `clear_session` option is set.
SPARK_CLOUD_DISCONNECT_OPTION_CLEAR_SESSION = 0x04, ///< The `clear_session` option is set.
SPARK_CLOUD_DISCONNECT_OPTION_RECONNECT_IMMEDIATELY = 0x08 ///< The `reconnect_immediately` option is set.
} spark_cloud_disconnect_option_flag;

/**
Expand All @@ -325,6 +327,7 @@ typedef struct spark_cloud_disconnect_options {
uint8_t graceful; ///< Enables graceful disconnection if set to a non-zero value.
uint32_t timeout; ///< Maximum time in milliseconds to wait for message acknowledgements.
uint8_t clear_session; ///< Clears the session after disconnecting if set to a non-zero value.
uint8_t reconnect_immediately;
} spark_cloud_disconnect_options;

/**
Expand All @@ -351,8 +354,7 @@ typedef enum spark_connection_property {
SPARK_CLOUD_MAX_EVENT_DATA_SIZE = 3, ///< Maximum size of event data (get).
SPARK_CLOUD_MAX_VARIABLE_VALUE_SIZE = 4, ///< Maximum size of a variable value (get).
SPARK_CLOUD_MAX_FUNCTION_ARGUMENT_SIZE = 5, ///< Maximum size of a function call argument (get).
SPARK_CLOUD_BIND_NETWORK_INTERFACE = 6, ///< The cloud connection should only use a specified network interface
SPARK_CLOUD_GET_NETWORK_INTERFACE = 7 ///< Which interface is being used for the current cloud connection
SPARK_CLOUD_GET_NETWORK_INTERFACE = 6 ///< Which interface is being used for the current cloud connection
} spark_connection_property;

int spark_set_connection_property(unsigned property, unsigned value, const void* data, void* reserved);
Expand Down
2 changes: 0 additions & 2 deletions system/inc/system_task.h
Original file line number Diff line number Diff line change
Expand Up @@ -55,8 +55,6 @@ inline void Spark_Idle() { Spark_Idle_Events(false); }
void SPARK_WLAN_Loop(void) __attribute__ ((deprecated("Please use Particle.process() instead.")));
inline void SPARK_WLAN_Loop(void) { spark_process(); }

void disconnect_cloud();

extern volatile uint8_t SPARK_WLAN_RESET;
extern volatile uint8_t SPARK_WLAN_SLEEP;
extern volatile uint8_t SPARK_WLAN_CONNECT_RESTORE;
Expand Down
13 changes: 1 addition & 12 deletions system/src/system_cloud.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -302,10 +302,6 @@ int spark_set_connection_property(unsigned property, unsigned value, const void*
const auto r = spark_protocol_set_connection_property(sp, property, value, d, reserved);
return spark_protocol_to_system_error(r);
}
case SPARK_CLOUD_BIND_NETWORK_INTERFACE: {
CloudConnectionSettings::instance()->setBoundInterface((network_interface_t)value);
return 0;
}

default:
return SYSTEM_ERROR_INVALID_ARGUMENT;
Expand All @@ -331,16 +327,9 @@ int spark_get_connection_property(unsigned property, void* data, size_t* size, v
return SYSTEM_ERROR_INVALID_STATE;
}
return getConnectionProperty(protocol::Connection::MAX_FUNCTION_ARGUMENT_SIZE, data, size);
case SPARK_CLOUD_BIND_NETWORK_INTERFACE: {
if (*size >= sizeof(network_interface_t)) {
*((network_interface_t*)data) = CloudConnectionSettings::instance()->getBoundInterface();
return 0;
}
return SYSTEM_ERROR_INVALID_ARGUMENT;
}
case SPARK_CLOUD_GET_NETWORK_INTERFACE: {
if (*size >= sizeof(network_interface_t)) {
#if HAL_PLATFORM_AUTOMATIC_CONNECTION_MANAGEMENT
#if HAL_PLATFORM_IFAPI
*((network_interface_t*)data) = ConnectionManager::instance()->getCloudConnectionNetwork();
#else
*((network_interface_t*)data) = NETWORK_INTERFACE_ALL;
Expand Down
32 changes: 28 additions & 4 deletions system/src/system_cloud_connection_posix.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -166,7 +166,6 @@ int system_cloud_connect(int protocol, const ServerAddress* address, sockaddr* s
break;
}
}
LOG(INFO, "Cloud socket=%d, connecting to %s#%u", s, serverHost, serverPort);

/* We are using fixed source port only for IPv6 connections */
if (protocol == IPPROTO_UDP && a->ai_family == AF_INET6) {
Expand Down Expand Up @@ -203,13 +202,19 @@ int system_cloud_connect(int protocol, const ServerAddress* address, sockaddr* s

network_interface_t cloudInterface = particle::system::ConnectionManager::instance()->selectCloudConnectionNetwork();

LOG(INFO, "Cloud socket=%d, connecting to %s#%u using if %d", s, serverHost, serverPort, cloudInterface);

if (cloudInterface != NETWORK_INTERFACE_ALL) {
// Bind to specific netif
struct ifreq ifr = {};
if_index_to_name(cloudInterface, ifr.ifr_name);

auto sockOptRet = sock_setsockopt(s, SOL_SOCKET, SO_BINDTODEVICE, &ifr, sizeof(ifr));
LOG(TRACE, "%d Bound cloud socket to lwip interface %s", sockOptRet, ifr.ifr_name);
if (sockOptRet) {
LOG(WARN, "Failed to bind cloud socket to interface %u error: %d", cloudInterface, sockOptRet);
} else {
LOG(INFO, "Bound cloud socket to lwip if %u (\"%s\")", cloudInterface, ifr.ifr_name);
}
}

/* FIXME: timeout for TCP */
Expand Down Expand Up @@ -314,8 +319,27 @@ int system_cloud_disconnect(int flags)
int system_cloud_send(const uint8_t* buf, size_t buflen, int flags)
{
(void)flags;
LOG_DEBUG(TRACE, "TX s_state.socket %d buflen %d", s_state.socket, buflen);
int r = sock_send(s_state.socket, buf, buflen, 0);
int r = 0;

auto netIfIndex = particle::system::ConnectionManager::instance()->getCloudConnectionNetwork();
if (netIfIndex) {
if_t ifHandle = 0;
if_get_by_index(netIfIndex, &ifHandle);
unsigned int ifFlags = 0;
if_get_flags(ifHandle, &ifFlags);
LOG_DEBUG(TRACE, "Bound cloud netif idx %d flags 0x%0x (IFF_RUNNING %d)", netIfIndex, ifFlags, (ifFlags & IFF_RUNNING)!=0);

if (!(ifFlags & IFF_RUNNING)) {
LOG(WARN, "Netif %d not running. if flags %0x", netIfIndex, ifFlags);
r = ERR_CONN;
}
}

if (!r) {
LOG_DEBUG(TRACE, "TX s_state.socket %d buflen %d", s_state.socket, buflen);
r = sock_send(s_state.socket, buf, buflen, 0);
}
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Unclear if this approach detects underlying network state faster, the netif flags did not seem to change before the wifi client realized the AP had been disconnected:

0000050622 [comm.coap] TRACE: Received CoAP message
0000050650 [comm.coap] TRACE: CON POST /E/particle/device/updates/pending size=47 token=02 id=33128
0000050706 [comm.coap] TRACE: Sending CoAP message
0000050736 [comm.coap] TRACE: ACK 0.00  size=4 token= id=33128
0000050770 [system] TRACE: Bound cloud netif idx 5 flags 0x18043 (IFF_RUNNING 1).    // <--- flags = 0x18043 when IF is connected
0000050817 [system] TRACE: TX s_state.socket 0 buflen 53
0000050949 [system] TRACE: RX s_state.socket 0 buflen 1485 recvd 33
0000050987 [comm.coap] TRACE: Received CoAP message
0000051021 [comm.coap] TRACE: ACK 0.00  size=4 token= id=10
0000051052 [system] INFO: All handshake messages have been processed
0000051194 [system] INFO: Cloud connected
0000051219 [system.ledger] TRACE: Connected

// < WIFI AP POWERED OFF >

0000076144 [comm.coap] TRACE: Sending CoAP message
0000076172 [comm.coap] TRACE: CON 0.00  size=4 token= id=11
0000076200 [system] TRACE: Bound cloud netif idx 5 flags 0x18043 (IFF_RUNNING 1) // <---- netif flags remain unchanged
0000076239 [system] TRACE: TX s_state.socket 0 buflen 53
0000079294 [ncp.rltk.client] INFO: disconnected       // <---- rtlk NCP client detects AP is gone, notifies network manager
0000079296 [ncp.rltk.client] TRACE: NCP connection state changed: 0
0000079297 [net.rltkncp] TRACE: NCP event 2
0000079298 [net.rltkncp] TRACE: State changed event: 0
0000079299 [net.ifapi] INFO: Netif wl4 link DOWN, profile=NONE
0000079301 [net.rltkncp] ERROR: linkOutput up=1 link_up=0
0000079302 [net.ifapi] TRACE: Netif wl4 ipv4 configuration changed
0000079305 [system.nm] INFO: State changed: IP_CONFIGURED -> IFACE_LINK_UP
0000079306 [system.nm] WARN: Cloud connection interface 5 link state down, disconnecting


if (r < 0) {
if (errno == ENOMEM) {
/* Not an error */
Expand Down
21 changes: 11 additions & 10 deletions system/src/system_cloud_internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -138,12 +138,13 @@ class CloudConnectionSettings {
static const bool DEFAULT_DISCONNECT_GRACEFULLY = false;
static const unsigned DEFAULT_DISCONNECT_TIMEOUT = 30000;
static const bool DEFAULT_DISCONNECT_CLEAR_SESSION = false;
static const bool DEFAULT_RECONNECT = false;

CloudConnectionSettings() :
defaultDisconnectTimeout_(DEFAULT_DISCONNECT_TIMEOUT),
defaultDisconnectGracefully_(DEFAULT_DISCONNECT_GRACEFULLY),
defaultDisconnectClearSession_(DEFAULT_DISCONNECT_CLEAR_SESSION),
boundInterface_(NETWORK_INTERFACE_ALL) {
defaultDisconnectReconnect_(DEFAULT_RECONNECT) {
}

void setDefaultDisconnectOptions(const CloudDisconnectOptions& options) {
Expand All @@ -156,6 +157,9 @@ class CloudConnectionSettings {
if (options.isClearSessionSet()) {
defaultDisconnectClearSession_ = options.clearSession();
}
if (options.isReconnectSet()) {
defaultDisconnectReconnect_ = options.reconnect();
}
}

bool defaultDisconnectGracefully() const {
Expand Down Expand Up @@ -190,19 +194,16 @@ class CloudConnectionSettings {
} else {
result.clearSession(defaultDisconnectClearSession_);
}
if (pending.isReconnectSet()) {
result.reconnect(pending.reconnect());
} else {
result.reconnect(defaultDisconnectReconnect_);
}
return result;
}

static CloudConnectionSettings* instance();

void setBoundInterface(network_interface_t network) {
boundInterface_ = network;
}

network_interface_t getBoundInterface() {
return boundInterface_;
}

private:
SimpleAtomicFlagMutex mutex_;
// Default disconnection options can only be set in the context of the system thread.
Expand All @@ -212,7 +213,7 @@ class CloudConnectionSettings {
bool defaultDisconnectClearSession_;
// Pending disconnection options are set atomically and guarded by a spinlock
CloudDisconnectOptions pendingDisconnectOptions_;
network_interface_t boundInterface_;
bool defaultDisconnectReconnect_;
};

// Use this function instead of Particle.publish() in the system code
Expand Down
48 changes: 21 additions & 27 deletions system/src/system_connection_manager.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,13 @@ ConnectionManager* ConnectionManager::instance() {
void ConnectionManager::setPreferredNetwork(network_handle_t network, bool preferred) {
if (preferred) {
if (network != NETWORK_INTERFACE_ALL) {
preferredNetwork_ = network;
preferredNetwork_ = network;

// If cloud is already connected, and a preferred network is set, and it is up, move cloud connection to it immediately
if (spark_cloud_flag_connected() && network_ready(spark::Network.from(preferredNetwork_), 0, nullptr)) {
auto options = CloudDisconnectOptions().graceful(true).reconnect(true).toSystemOptions();
eberseth marked this conversation as resolved.
Show resolved Hide resolved
spark_cloud_disconnect(&options, nullptr);
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same concern here about executing this on the system task as from network manager context

}
}
} else {
if (network == preferredNetwork_ || network == NETWORK_INTERFACE_ALL) {
Expand Down Expand Up @@ -124,25 +130,14 @@ network_handle_t ConnectionManager::getCloudConnectionNetwork() {
network_handle_t ConnectionManager::selectCloudConnectionNetwork() {
network_handle_t bestNetwork = NETWORK_INTERFACE_ALL;

// 1: If there is a bound network connection. Do not use anything else, regardless of network state
network_handle_t boundNetwork = NETWORK_INTERFACE_ALL;
size_t n = sizeof(boundNetwork);
spark_get_connection_property(SPARK_CLOUD_BIND_NETWORK_INTERFACE, &boundNetwork, &n, nullptr);

if (boundNetwork != NETWORK_INTERFACE_ALL) {
LOG_DEBUG(TRACE, "Using bound network: %lu", boundNetwork);
return boundNetwork;
}

// 2: If no bound network, use preferred network
if (preferredNetwork_ != NETWORK_INTERFACE_ALL && network_ready(spark::Network.from(preferredNetwork_), 0, nullptr)) {
LOG_DEBUG(TRACE, "Using preferred network: %lu", preferredNetwork_);
return preferredNetwork_;
}

// 3: If no preferred network, use the 'best' network based on criteria
// 3.1: Network is ready: ie configured + connected (see ipv4 routable hook)
// 3.2: Network has best criteria based on network tester results
// If no preferred network, use the 'best' network based on criteria
// Network is ready: ie configured + connected (see ipv4 routable hook)
// Network has best criteria based on network tester results
for (auto& i: bestNetworks_) {
if (network_ready(spark::Network.from(i), 0, nullptr)) {
LOG_DEBUG(TRACE, "Using best network: %lu", i);
Expand Down Expand Up @@ -236,7 +231,7 @@ int ConnectionTester::sendTestPacket(ConnectionMetrics* metrics) {
if (metrics->txPacketCount == metrics->rxPacketCount ||
(millis() > metrics->txPacketStartMillis + REACHABILITY_TEST_PACKET_TIMEOUT_MS)) {

CHECK(generateTestPacket(metrics));
generateTestPacket(metrics);

int r = sock_send(metrics->socketDescriptor, metrics->txBuffer, metrics->testPacketSize, 0);
if (r > 0) {
Expand Down Expand Up @@ -278,12 +273,6 @@ int ConnectionTester::receiveTestPacket(ConnectionMetrics* metrics) {
};

int ConnectionTester::generateTestPacket(ConnectionMetrics* metrics) {
auto network = spark::Network.from(metrics->interface);
if (!network) {
LOG(ERROR, "No Network associated with interface %d", metrics->interface);
return SYSTEM_ERROR_BAD_DATA;
}

unsigned packetDataLength = random(1, REACHABILITY_MAX_PAYLOAD_SIZE);

DTLSPlaintext_t msg = {
Expand Down Expand Up @@ -331,7 +320,7 @@ int ConnectionTester::pollSockets(struct pollfd* pfds, int socketCount) {
receiveTestPacket(connection);
}
if (pfds[i].revents & POLLOUT) {
sendTestPacket(connection);
CHECK(sendTestPacket(connection));
}
}
return 0;
Expand Down Expand Up @@ -380,7 +369,12 @@ int ConnectionTester::testConnections() {
// For each network interface to test, create + open a socket with the retrieved server address
// If any of the sockets fail to be created + opened with this server address, return an error
for (auto& connectionMetrics: metrics_) {
if (!network_ready(spark::Network.from(connectionMetrics.interface),0,nullptr)) {
auto network = spark::Network.from(connectionMetrics.interface);
if (network == spark::Network) {
LOG(ERROR, "No Network associated with interface %d", connectionMetrics.interface);
return SYSTEM_ERROR_BAD_DATA;
}
if (!network_ready(network, 0, nullptr)) {
LOG_DEBUG(TRACE,"%s not ready, skipping test", netifToName(connectionMetrics.interface));
continue;
}
Expand Down Expand Up @@ -487,13 +481,13 @@ const Vector<ConnectionMetrics> ConnectionTester::getConnectionMetrics(){
const Vector<network_interface_t> ConnectionTester::getSupportedInterfaces() {
const Vector<network_interface_t> interfaceList = {
#if HAL_PLATFORM_ETHERNET
static_cast<network_interface_t>(NetworkDiagnosticsInterface::ETHERNET),
NETWORK_INTERFACE_ETHERNET,
#endif
#if HAL_PLATFORM_WIFI
static_cast<network_interface_t>(NetworkDiagnosticsInterface::WIFI_STA),
NETWORK_INTERFACE_WIFI_STA,
#endif
#if HAL_PLATFORM_CELLULAR
static_cast<network_interface_t>(NetworkDiagnosticsInterface::CELLULAR)
NETWORK_INTERFACE_CELLULAR
#endif
};

Expand Down
27 changes: 27 additions & 0 deletions system/src/system_network_manager.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,8 @@ LOG_SOURCE_CATEGORY("system.nm")
#include "control/common.h"
#include "control/network.h"
#include <unistd.h>
#include "system_connection_manager.h"
#include "spark_wiring_cloud.h"

#define CHECKV(_expr) \
({ \
Expand Down Expand Up @@ -536,6 +538,11 @@ void NetworkManager::handleIfState(if_t iface, const struct if_event* ev) {
}

void NetworkManager::handleIfLink(if_t iface, const struct if_event* ev) {
uint8_t netIfIndex = 0;
if_get_index(iface, &netIfIndex);
bool disconnectCloud = false;
auto options = CloudDisconnectOptions().reconnect(true);

if (ev->ev_if_link->state) {
/* Interface link state changed to UP */
NetworkInterfaceConfig conf;
Expand Down Expand Up @@ -609,11 +616,25 @@ void NetworkManager::handleIfLink(if_t iface, const struct if_event* ev) {
} else if (state_ == State::IP_CONFIGURED || state_ == State::IFACE_LINK_UP) {
refreshIpState();
}

// If the preferred network becomes available, close the cloud connection to force migration to this network
if (ConnectionManager::instance()->getPreferredNetwork() == netIfIndex) {
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This works, but technically is not the correct spot to check if a network is ready (ie IP configured).
This state is just link up. The implicit assumption here is that the interface will be assigned an IP by the time we try to reopen the cloud socket, which usually is true.

I think to be strictly correct, the transition from link up -> ip configured is detected in refreshIpState(), so perhaps we should check for connection migration there?

LOG(INFO, "Preferred network %u available, moving cloud connection", netIfIndex);
options.graceful(true);
disconnectCloud = true;
}

} else {
// Disable by default
if_clear_xflags(iface, IFXF_DHCP);
resetInterfaceProtocolState(iface);

// If this is the current cloud connection, disconnect the cloud, but reconnect immediately on any other available interface
if (ConnectionManager::instance()->getCloudConnectionNetwork() == netIfIndex) {
LOG(WARN, "Cloud connection interface %d link state down, disconnecting", netIfIndex);
disconnectCloud = true;
}

clearDnsConfiguration(iface);
/* Interface link state changed to DOWN */
if (state_ == State::IP_CONFIGURED || state_ == State::IFACE_LINK_UP) {
Expand All @@ -624,6 +645,12 @@ void NetworkManager::handleIfLink(if_t iface, const struct if_event* ev) {
}
}
}

if (spark_cloud_flag_connected() && disconnectCloud) {
auto systemOptions = options.toSystemOptions();
spark_cloud_disconnect(&systemOptions, nullptr);
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should we defer the actual disabling of the SPARK_CLOUD_AUTO_CONNECT flag to the system task queue? I'm worried we would get into a situation where the cloud reconnect options would be consumed/overridden before cloud_disconnect() runs and re-sets the cloud auto connect flag...

}

forceCloudPingIfConnected();
}

Expand Down
4 changes: 4 additions & 0 deletions system/src/system_task.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -692,6 +692,10 @@ void cloud_disconnect(unsigned flags, cloud_disconnect_reason cloudReason, netwo
spark_cloud_socket_disconnect(graceful);
}

if (opts.reconnect()) {
spark_cloud_flag_connect();
}

// Note: Invoking the protocol's DISCONNECT or TERMINATE command cancels the ongoing firmware
// if the device is being updated OTA, so there's no need to explicitly cancel the update here
SPARK_CLOUD_CONNECTED = 0;
Expand Down