@@ -204,16 +204,16 @@ static int ssl_error(SSL *ssl, ssize_t ret)
204204 switch (e )
205205 {
206206 case SSL_ERROR_WANT_READ :
207- upslogx ( LOG_ERR , "ssl_error() ret=%" PRIiSIZE " SSL_ERROR_WANT_READ" , ret );
208- break ;
207+ upsdebugx ( 4 , "ssl_error() ret=%" PRIiSIZE " SSL_ERROR_WANT_READ" , ret );
208+ return 0 ;
209209
210210 case SSL_ERROR_WANT_WRITE :
211- upslogx ( LOG_ERR , "ssl_error() ret=%" PRIiSIZE " SSL_ERROR_WANT_WRITE" , ret );
212- break ;
211+ upsdebugx ( 4 , "ssl_error() ret=%" PRIiSIZE " SSL_ERROR_WANT_WRITE" , ret );
212+ return 0 ;
213213
214214 case SSL_ERROR_SYSCALL :
215215 if (ret == 0 && ERR_peek_error () == 0 ) {
216- upslogx (LOG_ERR , "ssl_error() EOF from client " );
216+ upslogx (LOG_ERR , "ssl_error() EOF from server " );
217217 } else {
218218 upslogx (LOG_ERR , "ssl_error() ret=%" PRIiSIZE " SSL_ERROR_SYSCALL" , ret );
219219 }
@@ -709,16 +709,68 @@ static ssize_t net_read(UPSCONN_t *ups, char *buf, size_t buflen, const time_t t
709709#ifdef WITH_SSL
710710 if (ups -> ssl ) {
711711# ifdef WITH_OPENSSL
712+ int iret , ssl_err , ssl_retries = 0 ;
713+ /* Cap retries to avoid spinning forever on a broken socket.
714+ * 250 * 20 ms = 5 s maximum wait, which is generous for a
715+ * local handshake while being safe for CI timeouts.
716+ */
717+ const int SSL_IO_MAX_RETRIES = 250 ;
718+ fd_set fds ;
719+ struct timeval tv ;
720+
712721 /* SSL_* routines deal with int type for return and buflen
713722 * We might need to window our I/O if we exceed 2GB (in
714723 * 32-bit builds)... Not likely to exceed in 64-bit builds,
715724 * but smaller systems with 16-bits might be endangered :)
716725 */
717- int iret ;
718726 assert (buflen <= INT_MAX );
719- iret = SSL_read (ups -> ssl , buf , (int )buflen );
720- assert (iret <= SSIZE_MAX );
721- ret = (ssize_t )iret ;
727+
728+ while (ssl_retries < SSL_IO_MAX_RETRIES ) {
729+ iret = SSL_read (ups -> ssl , buf , (int )buflen );
730+
731+ assert (iret <= SSIZE_MAX );
732+ if (iret > 0 ) {
733+ ret = (ssize_t )iret ;
734+ break ;
735+ }
736+
737+ if (iret == 0 ) {
738+ /* Orderly shutdown or actual EOF */
739+ ret = 0 ;
740+ break ;
741+ }
742+
743+ ssl_err = SSL_get_error (ups -> ssl , iret );
744+ if (ssl_err == SSL_ERROR_WANT_READ
745+ || ssl_err == SSL_ERROR_WANT_WRITE
746+ ) {
747+ FD_ZERO (& fds );
748+ FD_SET (ups -> fd , & fds );
749+ tv .tv_sec = 0 ;
750+ tv .tv_usec = 20000 ; /* 20 ms */
751+
752+ if (select (ups -> fd + 1 ,
753+ (ssl_err == SSL_ERROR_WANT_READ ) ? & fds : NULL ,
754+ (ssl_err == SSL_ERROR_WANT_WRITE ) ? & fds : NULL ,
755+ NULL , & tv ) < 0
756+ ) {
757+ /* select failure is fatal enough to stop retrying */
758+ ssl_error (ups -> ssl , (ssize_t )iret );
759+ return -1 ;
760+ }
761+ ssl_retries ++ ;
762+ continue ;
763+ }
764+
765+ /* Other errors are fatal */
766+ ssl_error (ups -> ssl , (ssize_t )iret );
767+ return -1 ;
768+ }
769+
770+ if (ssl_retries >= SSL_IO_MAX_RETRIES ) {
771+ upslogx (LOG_ERR , "%s: SSL_read timed out after %d retries" , __func__ , ssl_retries );
772+ return -1 ;
773+ }
722774# elif defined(WITH_NSS ) /* WITH_OPENSSL */
723775 /* PR_* routines deal in PRInt32 type
724776 * We might need to window our I/O if we exceed 2GB :) */
@@ -794,16 +846,62 @@ static ssize_t net_write(UPSCONN_t *ups, const char *buf, size_t buflen, const t
794846#ifdef WITH_SSL
795847 if (ups -> ssl ) {
796848# ifdef WITH_OPENSSL
849+ int iret , ssl_err , ssl_retries = 0 ;
850+ /* Cap retries to avoid spinning forever on a broken socket.
851+ * 250 * 20 ms = 5 s maximum wait, which is generous for a
852+ * local handshake while being safe for CI timeouts.
853+ */
854+ const int SSL_IO_MAX_RETRIES = 250 ;
855+ fd_set fds ;
856+ struct timeval tv ;
857+
797858 /* SSL_* routines deal with int type for return and buflen
798859 * We might need to window our I/O if we exceed 2GB (in
799860 * 32-bit builds)... Not likely to exceed in 64-bit builds,
800861 * but smaller systems with 16-bits might be endangered :)
801862 */
802- int iret ;
803863 assert (buflen <= INT_MAX );
804- iret = SSL_write (ups -> ssl , buf , (int )buflen );
805- assert (iret <= SSIZE_MAX );
806- ret = (ssize_t )iret ;
864+
865+ while (ssl_retries < SSL_IO_MAX_RETRIES ) {
866+ iret = SSL_write (ups -> ssl , buf , (int )buflen );
867+
868+ assert (iret <= SSIZE_MAX );
869+ if (iret > 0 ) {
870+ ret = (ssize_t )iret ;
871+ break ;
872+ }
873+
874+ ssl_err = SSL_get_error (ups -> ssl , iret );
875+ if (ssl_err == SSL_ERROR_WANT_READ
876+ || ssl_err == SSL_ERROR_WANT_WRITE
877+ ) {
878+ FD_ZERO (& fds );
879+ FD_SET (ups -> fd , & fds );
880+ tv .tv_sec = 0 ;
881+ tv .tv_usec = 20000 ; /* 20 ms */
882+
883+ if (select (ups -> fd + 1 ,
884+ (ssl_err == SSL_ERROR_WANT_READ ) ? & fds : NULL ,
885+ (ssl_err == SSL_ERROR_WANT_WRITE ) ? & fds : NULL ,
886+ NULL , & tv ) < 0
887+ ) {
888+ /* select failure is fatal enough to stop retrying */
889+ ssl_error (ups -> ssl , (ssize_t )iret );
890+ return -1 ;
891+ }
892+ ssl_retries ++ ;
893+ continue ;
894+ }
895+
896+ /* Other errors (including iret=0) are fatal */
897+ ssl_error (ups -> ssl , (ssize_t )iret );
898+ return -1 ;
899+ }
900+
901+ if (ssl_retries >= SSL_IO_MAX_RETRIES ) {
902+ upslogx (LOG_ERR , "%s: SSL_write timed out after %d retries" , __func__ , ssl_retries );
903+ return -1 ;
904+ }
807905# elif defined(WITH_NSS ) /* WITH_OPENSSL */
808906 /* PR_* routines deal in PRInt32 type
809907 * We might need to window our I/O if we exceed 2GB :) */
@@ -938,13 +1036,14 @@ static int upscli_sslinit(UPSCONN_t *ups, int verifycert)
9381036 int ssl_retries = 0 ;
9391037 /* Cap retries to avoid spinning forever on a broken socket.
9401038 * 250 * 20 ms = 5 s maximum wait, which is generous for a
941- * local handshake while being safe for CI timeouts. */
942- const int SSL_CONNECT_MAX_RETRIES = 250 ;
1039+ * local handshake while being safe for CI timeouts.
1040+ */
1041+ const int SSL_IO_MAX_RETRIES = 250 ;
9431042 fd_set fds ;
9441043 struct timeval tv ;
9451044
9461045 res = -1 ;
947- while (ssl_retries < SSL_CONNECT_MAX_RETRIES ) {
1046+ while (ssl_retries < SSL_IO_MAX_RETRIES ) {
9481047 res = SSL_connect (ups -> ssl );
9491048
9501049 if (res == 1 ) {
@@ -972,7 +1071,7 @@ static int upscli_sslinit(UPSCONN_t *ups, int verifycert)
9721071 (ssl_err == SSL_ERROR_WANT_READ )
9731072 ? "READ" : "WRITE" ,
9741073 ssl_retries + 1 ,
975- SSL_CONNECT_MAX_RETRIES );
1074+ SSL_IO_MAX_RETRIES );
9761075
9771076 if (select (ups -> fd + 1 ,
9781077 (ssl_err == SSL_ERROR_WANT_READ ) ? & fds : NULL ,
@@ -982,6 +1081,9 @@ static int upscli_sslinit(UPSCONN_t *ups, int verifycert)
9821081 upsdebug_with_errno (1 ,
9831082 "%s: select() failed during SSL_connect" ,
9841083 __func__ );
1084+ /* Returns 0 on non-fatal WANT_READ/WRITE;
1085+ * we stop retrying even if non-fatal because
1086+ * select() itself failed. */
9851087 ssl_error (ups -> ssl , res );
9861088 return -1 ;
9871089 }
@@ -1005,7 +1107,7 @@ static int upscli_sslinit(UPSCONN_t *ups, int verifycert)
10051107 return -1 ;
10061108 }
10071109
1008- if (ssl_retries >= SSL_CONNECT_MAX_RETRIES ) {
1110+ if (ssl_retries >= SSL_IO_MAX_RETRIES ) {
10091111 upslogx (LOG_ERR ,
10101112 "%s: SSL_connect timed out after %d retries"
10111113 " (non-blocking handshake never completed)" ,
0 commit comments