Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

*: basic support for builtin function STR_TO_DATE #2078

Merged
merged 12 commits into from Nov 28, 2016
1 change: 1 addition & 0 deletions ast/functions.go
Expand Up @@ -103,6 +103,7 @@ const (
MonthName = "monthname"
Now = "now"
Second = "second"
StrToDate = "str_to_date"
Sysdate = "sysdate"
Time = "time"
UTCDate = "utc_date"
Expand Down
1 change: 1 addition & 0 deletions evaluator/builtin.go
Expand Up @@ -77,6 +77,7 @@ var Funcs = map[string]Func{
ast.MonthName: {builtinMonthName, 1, 1},
ast.Now: {builtinNow, 0, 1},
ast.Second: {builtinSecond, 1, 1},
ast.StrToDate: {builtinStrToDate, 2, 2},
ast.Sysdate: {builtinSysDate, 0, 1},
ast.Time: {builtinTime, 1, 1},
ast.UTCDate: {builtinUTCDate, 0, 0},
Expand Down
322 changes: 322 additions & 0 deletions evaluator/builtin_time.go
Expand Up @@ -21,8 +21,10 @@ import (
"fmt"
"math"
"regexp"
"strconv"
"strings"
"time"
"unicode"

"github.com/juju/errors"
"github.com/pingcap/tidb/ast"
Expand Down Expand Up @@ -673,6 +675,326 @@ func builtinFromUnixTime(args []types.Datum, _ context.Context) (d types.Datum,
return builtinDateFormat([]types.Datum{d, args[1]}, nil)
}

// strToDate converts date string according to format, return true on success,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

s/return/returns

// the value will be stored in argument t.
func strToDate(t *time.Time, date string, format string) bool {
date = skipWhiteSpace(date)
format = skipWhiteSpace(format)

token, formatRemain, succ := getFormatToken(format)
if !succ {
return false
}

if token == "" {
return date == ""
}

dateRemain, succ := matchDateWithToken(t, date, token)
if !succ {
return false
}

return strToDate(t, dateRemain, formatRemain)
}

func getFormatToken(format string) (token string, remain string, succ bool) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

add comments for this function.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why?
this is not an exported function and very easy to understand, just as the function name says~

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't quite understand the meanings of "token" and "remain" without reading the MySQL manual.

if len(format) == 0 {
return "", "", true
}

// Just one character.
if len(format) == 1 {
if format[0] == '%' {
return "", "", false
}
return format, "", true
}

// More than one character.
if format[0] == '%' {
return format[:2], format[2:], true
}

return format[:1], format[1:], true
}

func skipWhiteSpace(input string) string {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Shall we skip the spaces at the end of string?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Indeed, strToDate will skip any spaces, including the end

for i, c := range input {
if !unicode.IsSpace(c) {
return input[i:]
}
}
return ""
}

var weekdayAbbrev = map[string]time.Weekday{
"Sun": time.Sunday,
"Mon": time.Monday,
"Tue": time.Tuesday,
"Wed": time.Wednesday,
"Thu": time.Tuesday,
"Fri": time.Friday,
"Sat": time.Saturday,
}

var monthAbbrev = map[string]time.Month{
"Jan": time.January,
"Feb": time.February,
"Mar": time.March,
"Apr": time.April,
"May": time.May,
"Jun": time.June,
"Jul": time.July,
"Aug": time.August,
"Sep": time.September,
"Oct": time.October,
"Nov": time.November,
"Dec": time.December,
}

type dateFormatParser func(t *time.Time, date string) (remain string, succ bool)

var dateFormatParserTable = map[string]dateFormatParser{
"%a": abbreviatedWeekday,
"%b": abbreviatedMonth,
"%c": monthNumeric,
"%D": dayOfMonthWithSuffix,
"%Y": yearNumericFourDigits,
"%m": monthNumericTwoDigits,
"%d": dayOfMonthNumericTwoDigits,
"%H": hour24TwoDigits,
"%i": minutesNumeric,
"%s": secondsNumeric,
}

func matchDateWithToken(t *time.Time, date string, token string) (remain string, succ bool) {
if parse, ok := dateFormatParserTable[token]; ok {
return parse(t, date)
}

if strings.HasPrefix(date, token) {
return date[len(token):], true
}
return date, false
}

func parseTwoDigits(input string) (int, bool) {
if len(input) < 2 {
return 0, false
}

v, err := strconv.ParseUint(input[:2], 10, 64)
if err != nil {
return int(v), false
}
return int(v), true
}

func hour24TwoDigits(t *time.Time, input string) (string, bool) {
v, succ := parseTwoDigits(input)
if !succ || v >= 24 {
return input, false
}
timeSetHour(t, v)
return input[2:], true
}

func secondsNumeric(t *time.Time, input string) (string, bool) {
v, succ := parseTwoDigits(input)
if !succ || v >= 60 {
return input, false
}
timeSetSecond(t, v)
return input[2:], true
}

func minutesNumeric(t *time.Time, input string) (string, bool) {
v, succ := parseTwoDigits(input)
if !succ || v >= 60 {
return input, false
}
timeSetMinute(t, v)
return input[2:], true
}

func dayOfMonthNumericTwoDigits(t *time.Time, input string) (string, bool) {
v, succ := parseTwoDigits(input)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is 2.31 valid ?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

mysql behavior:

mysql> select str_to_date('2016 02 31', '%Y %m %D');
+---------------------------------------+
| str_to_date('2016 02 31', '%Y %m %D') |
+---------------------------------------+
| 2016-02-31                            |
+---------------------------------------+
1 row in set (0.00 sec)

mysql permit invalid date, while Go can't...and that why we need refact.
Of course, I'll be glad if you also review this PR #2098

if !succ || v >= 32 {
return input, false
}
timeSetDay(t, v)
return input[2:], true
}

func yearNumericFourDigits(t *time.Time, input string) (string, bool) {
if len(input) < 4 {
return input, false
}

v, err := strconv.ParseUint(input[:4], 10, 64)
if err != nil {
return input, false
}
timeSetYear(t, int(v))
return input[4:], true
}

func monthNumericTwoDigits(t *time.Time, input string) (string, bool) {
v, succ := parseTwoDigits(input)
if !succ || v > 12 {
return input, false
}

timeSetMonth(t, time.Month(v))
return input[2:], true
}

func abbreviatedWeekday(t *time.Time, input string) (string, bool) {
if len(input) >= 3 {
dayName := input[:3]
if _, ok := weekdayAbbrev[dayName]; ok {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this case sensitive?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'll confirm the behavior of MySQL, in the next PRs
Currently this function is not really supported, as our time representation is not compatible with mysql.

// TODO: we need refact mysql time to support this
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

s/we/We

return input, false
}
}
return input, false
}

func abbreviatedMonth(t *time.Time, input string) (string, bool) {
if len(input) >= 3 {
monthName := input[:3]
if month, ok := monthAbbrev[monthName]; ok {
timeSetMonth(t, month)
return input[len(monthName):], true
}
}
return input, false
}

func monthNumeric(t *time.Time, input string) (string, bool) {
// TODO: this code is ugly!
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

s/this/This

for i := 12; i >= 0; i-- {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why i >= 0, month 0 is valid?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ditto

str := strconv.FormatInt(int64(i), 10)
if strings.HasPrefix(input, str) {
timeSetMonth(t, time.Month(i))
return input[len(str):], true
}
}

return input, false
}

// 0th 1st 2nd 3rd ...
func dayOfMonthWithSuffix(t *time.Time, input string) (string, bool) {
month, remain := parseOrdinalNumbers(input)
if month >= 0 {
timeSetMonth(t, time.Month(month))
return remain, true
}
return input, false
}

func parseOrdinalNumbers(input string) (value int, remain string) {
for i, c := range input {
if !unicode.IsDigit(c) {
v, err := strconv.ParseUint(input[:i], 10, 64)
if err != nil {
return -1, input
}
value = int(v)
break
}
}
switch {
case strings.HasPrefix(remain, "st"):
if value == 1 {
remain = remain[2:]
return
}
case strings.HasPrefix(remain, "nd"):
if value == 2 {
remain = remain[2:]
return
}
case strings.HasPrefix(remain, "th"):
remain = remain[2:]
return
}
return -1, input
}

func timeSetYear(t *time.Time, year int) {
_, month, day := t.Date()
hour, min, sec := t.Clock()
nsec := t.Nanosecond()
loc := t.Location()
*t = time.Date(year, month, day, hour, min, sec, nsec, loc)
}

func timeSetMonth(t *time.Time, month time.Month) {
year, _, day := t.Date()
hour, min, sec := t.Clock()
nsec := t.Nanosecond()
loc := t.Location()
*t = time.Date(year, month, day, hour, min, sec, nsec, loc)
}

func timeSetDay(t *time.Time, day int) {
year, month, _ := t.Date()
hour, min, sec := t.Clock()
nsec := t.Nanosecond()
loc := t.Location()
*t = time.Date(year, month, day, hour, min, sec, nsec, loc)
}

func timeSetHour(t *time.Time, hour int) {
year, month, day := t.Date()
_, min, sec := t.Clock()
nsec := t.Nanosecond()
loc := t.Location()
*t = time.Date(year, month, day, hour, min, sec, nsec, loc)
}

func timeSetMinute(t *time.Time, min int) {
year, month, day := t.Date()
hour, _, sec := t.Clock()
nsec := t.Nanosecond()
loc := t.Location()
*t = time.Date(year, month, day, hour, min, sec, nsec, loc)
}

func timeSetSecond(t *time.Time, sec int) {
year, month, day := t.Date()
hour, min, _ := t.Clock()
nsec := t.Nanosecond()
loc := t.Location()
*t = time.Date(year, month, day, hour, min, sec, nsec, loc)
}

// See https://dev.mysql.com/doc/refman/5.5/en/date-and-time-functions.html#function_str-to-date
func builtinStrToDate(args []types.Datum, _ context.Context) (types.Datum, error) {
date := args[0].GetString()
format := args[1].GetString()
var (
d types.Datum
goTime time.Time
)
goTime = types.ZeroTime
if !strToDate(&goTime, date, format) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Shall we define a struct to call strToData, I think it's tooooo ugly to pass a pointer to get result

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ugly? it's very common in C, and C-family languages. and Go is one of C-family

struct timeData {...} 
var d timeData 
d.strToDate() ??

I think it's ... Poor man's object ...

d.SetNull()
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This line may be useless, KindNull equals to 0 which is a default value.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I prefer d.SetNull() here explicitly, it can remind us this function return NULL if strToDate fail...
depend on KindNull == 0 is not that reliable on some scenes

return d, nil
}

t := types.Time{
Time: goTime,
Type: mysql.TypeDatetime,
Fsp: types.UnspecifiedFsp,
}
d.SetMysqlTime(t)
return d, nil
}

func builtinSysDate(args []types.Datum, ctx context.Context) (types.Datum, error) {
// SYSDATE is not the same as NOW if NOW is used in a stored function or trigger.
// But here we can just think they are the same because we don't support stored function
Expand Down
28 changes: 28 additions & 0 deletions evaluator/builtin_time_test.go
Expand Up @@ -579,3 +579,31 @@ func (s *testEvaluatorSuite) TestDateArith(c *C) {
}
}
}

func (s *testEvaluatorSuite) TestStrToDate(c *C) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Add a test case for %a %b %c %D.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

it would be done in next PR

tests := []struct {
Date string
Format string
Success bool
Expect time.Time
}{
{"20161122165022", "%Y%m%d%H%i%s", true, time.Date(2016, 11, 22, 16, 50, 22, 0, time.Local)},
{"2016 11 22 16 50 22", "%Y%m%d%H%i%s", true, time.Date(2016, 11, 22, 16, 50, 22, 0, time.Local)},
{"16-50-22 2016 11 22", "%H-%i-%s%Y%m%d", true, time.Date(2016, 11, 22, 16, 50, 22, 0, time.Local)},
{"16-50 2016 11 22", "%H-%i-%s%Y%m%d", false, time.Time{}},
}

for _, test := range tests {
date := types.NewStringDatum(test.Date)
format := types.NewStringDatum(test.Format)
result, err := builtinStrToDate([]types.Datum{date, format}, nil)
if !test.Success {
c.Assert(err, IsNil)
c.Assert(result.IsNull(), IsTrue)
continue
}
c.Assert(result.Kind(), Equals, types.KindMysqlTime)
value := result.GetMysqlTime()
c.Assert(value.Time, Equals, test.Expect)
}
}