From daa888a6634ad5445946939507629e2429250ce0 Mon Sep 17 00:00:00 2001 From: Nic Pottier Date: Fri, 25 Aug 2017 12:52:54 -0500 Subject: [PATCH] strip invalid utf8 before writing to db --- backends/rapidpro/backend_test.go | 2 +- backends/rapidpro/log.go | 6 +++--- sender.go | 2 +- utils/misc.go | 19 ++++++++++++++++++- utils/misc_test.go | 2 ++ 5 files changed, 25 insertions(+), 6 deletions(-) diff --git a/backends/rapidpro/backend_test.go b/backends/rapidpro/backend_test.go index 11182c37b..23d6a87e9 100644 --- a/backends/rapidpro/backend_test.go +++ b/backends/rapidpro/backend_test.go @@ -429,7 +429,7 @@ func (ts *BackendTestSuite) TestChanneLog() { knChannel := ts.getChannel("KN", "dbc126ed-66bc-4e28-b67b-81dc3327c95d") log := courier.NewChannelLog("Message Send Error", knChannel, courier.NilMsgID, "POST", "/null/value", 400, - "request with null \x00", "response with null \x00", time.Millisecond, nil) + "request with null \x00 content", "response with null \x00 content", time.Millisecond, nil) err := writeChannelLog(ts.b, log) ts.NoError(err) diff --git a/backends/rapidpro/log.go b/backends/rapidpro/log.go index 27bc5fe14..fbcdb66a3 100644 --- a/backends/rapidpro/log.go +++ b/backends/rapidpro/log.go @@ -2,11 +2,11 @@ package rapidpro import ( "fmt" - "strings" "time" "github.com/nyaruka/courier" + "github.com/nyaruka/courier/utils" ) const insertLogSQL = ` @@ -28,8 +28,8 @@ func writeChannelLog(b *backend, log *courier.ChannelLog) error { } // strip null chars from request and response, postgres doesn't like that - log.Request = strings.Trim(log.Request, "\x00") - log.Response = strings.Trim(log.Response, "\x00") + log.Request = utils.CleanString(log.Request) + log.Response = utils.CleanString(log.Response) _, err := b.db.Exec(insertLogSQL, dbChan.ID(), log.MsgID, log.Description, log.Error != "", log.Method, log.URL, log.Request, log.Response, log.StatusCode, log.CreatedOn, log.Elapsed/time.Millisecond) diff --git a/sender.go b/sender.go index 5027f04b0..446302eaf 100644 --- a/sender.go +++ b/sender.go @@ -88,7 +88,7 @@ func (f *Foreman) Assign() { // add our sender back to our queue and sleep a bit if !lastSleep { - log.Info("sleeping, no messages") + log.Debug("sleeping, no messages") lastSleep = true } f.availableSenders <- sender diff --git a/utils/misc.go b/utils/misc.go index 8886aa7a6..41420f684 100644 --- a/utils/misc.go +++ b/utils/misc.go @@ -63,5 +63,22 @@ var invalidChars = regexp.MustCompile("([\u0000-\u0008]|[\u000B-\u000C]|[\u000E- // CleanString removes any control characters from the passed in string func CleanString(s string) string { - return invalidChars.ReplaceAllString(s, "") + cleaned := invalidChars.ReplaceAllString(s, "") + + // check whether this is valid UTF8 + if !utf8.ValidString(cleaned) { + v := make([]rune, 0, len(cleaned)) + for i, r := range s { + if r == utf8.RuneError { + _, size := utf8.DecodeRuneInString(s[i:]) + if size == 1 { + continue + } + } + v = append(v, r) + } + cleaned = string(v) + } + + return cleaned } diff --git a/utils/misc_test.go b/utils/misc_test.go index 10f59f686..9bb6ceeea 100644 --- a/utils/misc_test.go +++ b/utils/misc_test.go @@ -25,4 +25,6 @@ func TestStringArrayContains(t *testing.T) { func TestCleanString(t *testing.T) { assert.Equal(t, "\x41hello", CleanString("\x02\x41hello")) assert.Equal(t, "😅 happy!", CleanString("😅 happy!")) + assert.Equal(t, "Hello There", CleanString("Hello \x00 There")) + assert.Equal(t, "Hello z There", CleanString("Hello \xc5z There")) }