Browse files

Add locale argument support to collation

  • Loading branch information...
1 parent 2faa3f9 commit 36fa52a85ed573abac9efc61dc1a395fd5f4a9e3 Nikolai Weibull committed Apr 8, 2013
Showing with 70 additions and 20 deletions.
  1. +2 −2 ext/u/rb_u_string.c
  2. +2 −2 ext/u/rb_u_string.h
  3. +24 −7 ext/u/rb_u_string_casecmp.c
  4. +17 −5 ext/u/rb_u_string_collate.c
  5. +25 −4 ext/u/u_collate.c
View
4 ext/u/rb_u_string.c
@@ -362,8 +362,8 @@ Init_u_string(VALUE mU)
rb_define_method(rb_cUString, "rindex", rb_u_string_rindex_m, -1); /* in ext/u/rb_u_string_rindex.c */
rb_define_method(rb_cUString, "start_with?", rb_u_string_start_with, -1); /* in ext/u/rb_u_string_start_with.c */
- rb_define_method(rb_cUString, "<=>", rb_u_string_collate, 1); /* in ext/u/rb_u_string_collate.c */
- rb_define_method(rb_cUString, "casecmp", rb_u_string_casecmp, 1); /* in ext/u/rb_u_string_casecmp.c */
+ rb_define_method(rb_cUString, "<=>", rb_u_string_collate, -1); /* in ext/u/rb_u_string_collate.c */
+ rb_define_method(rb_cUString, "casecmp", rb_u_string_casecmp, -1); /* in ext/u/rb_u_string_casecmp.c */
rb_define_method(rb_cUString, "collation_key", rb_u_string_collation_key, 0); /* in ext/u/rb_u_string_collation_key.c */
rb_define_method(rb_cUString, "combining_class", rb_u_string_combining_class, 0); /* in ext/u/rb_u_string_combining_class.c */
View
4 ext/u/rb_u_string.h
@@ -72,14 +72,14 @@ VALUE rb_u_string_b(VALUE self);
VALUE rb_u_string_bytesize(VALUE self);
VALUE rb_u_string_byteslice_m(int argc, VALUE *argv, VALUE self);
VALUE rb_u_string_center(int argc, VALUE *argv, VALUE self);
-VALUE rb_u_string_casecmp(VALUE self, VALUE other);
+VALUE rb_u_string_casecmp(int argc, VALUE *argv, VALUE self);
VALUE rb_u_string_cased(VALUE self);
VALUE rb_u_string_case_ignorable(VALUE self);
VALUE rb_u_string_chomp(int argc, VALUE *argv, VALUE self);
VALUE rb_u_string_chop(VALUE self);
VALUE rb_u_string_chr(VALUE self);
VALUE rb_u_string_cntrl(VALUE self);
-VALUE rb_u_string_collate(VALUE self, VALUE other);
+VALUE rb_u_string_collate(int argc, VALUE *argv, VALUE self);
VALUE rb_u_string_collation_key(VALUE self);
VALUE rb_u_string_combining_class(VALUE self);
VALUE rb_u_string_count(int argc, VALUE *argv, VALUE self);
View
31 ext/u/rb_u_string_casecmp.c
@@ -1,23 +1,35 @@
+#include <errno.h>
#include "rb_includes.h"
-/* @overload casecmp(other)
+/* @overload casecmp(other, locale = ENV['LC_COLLATE'])
*
* Returns the comparison of {#foldcase} to _other_{#foldcase} using the
- * linguistically correct rules of the current locale. This is, however,
- * only an approximation of a case-insensitive comparison.
+ * linguistically correct rules of LOCALE. This is, however, only an
+ * approximation of a case-insensitive comparison. The LOCALE must be given
+ * as a language, region, and encoding, for example, “en_US.UTF-8”.
*
* This operation is known as “collation” and you can find more information
* about the collation algorithm employed in the
* Unicode Technical Standard #10, see http://unicode.org/reports/tr10/.
*
* @param [U::String, #to_str] other
+ * @param [#to_str] locale
* @return [Fixnum] */
VALUE
-rb_u_string_casecmp(VALUE self, VALUE rbother)
+rb_u_string_casecmp(int argc, VALUE *argv, VALUE self)
{
+ const char *locale = NULL;
+
+ VALUE rbother, rblocale;
+ if (rb_scan_args(argc, argv, "11", &rbother, &rblocale) == 2)
+ locale = StringValuePtr(rblocale);
+
const struct rb_u_string *string = RVAL2USTRING(self);
const struct rb_u_string *other = RVAL2USTRING_ANY(rbother);
+ rb_u_validate(USTRING_STR(string), USTRING_LENGTH(string));
+ rb_u_validate(USTRING_STR(other), USTRING_LENGTH(other));
+
size_t folded_length;
char *folded = u_foldcase_n(USTRING_STR(string),
USTRING_LENGTH(string),
@@ -28,11 +40,16 @@ rb_u_string_casecmp(VALUE self, VALUE rbother)
USTRING_LENGTH(other),
&folded_other_length);
- int result = u_collate_n(folded, folded_length,
- folded_other, folded_other_length);
+ errno = 0;
+ int r = u_collate_in_locale_n(folded, folded_length,
+ folded_other, folded_other_length,
+ locale);
free(folded_other);
free(folded);
- return INT2FIX(result);
+ if (errno != 0)
+ rb_u_raise_errno(rb_eSystemCallError, errno, "can’t collate strings");
+
+ return INT2FIX(r);
}
View
22 ext/u/rb_u_string_collate.c
@@ -1,28 +1,40 @@
#include "rb_includes.h"
#include <errno.h>
-/* @overload <=>(other)
+/* @overload <=>(other, locale = ENV['LC_COLLATE'])
*
* Returns the comparison of the receiver and OTHER using the linguistically
- * correct rules of the current locale.
+ * correct rules of LOCALE. The LOCALE must be given as a language, region,
+ * and encoding, for example, “en_US.UTF-8”.
*
* This operation is known as “collation” and you can find more information
* about the collation algorithm employed in the
* Unicode Technical Standard #10, see http://unicode.org/reports/tr10/.
*
* @param [U::String, #to_str] other
+ * @param [#to_str] locale
* @return [Fixnum]
* @see #==
* @see #eql? */
VALUE
-rb_u_string_collate(VALUE self, VALUE rbother)
+rb_u_string_collate(int argc, VALUE *argv, VALUE self)
{
+ const char *locale = NULL;
+
+ VALUE rbother, rblocale;
+ if (rb_scan_args(argc, argv, "11", &rbother, &rblocale) == 2)
+ locale = StringValuePtr(rblocale);
+
const struct rb_u_string *string = RVAL2USTRING(self);
const struct rb_u_string *other = RVAL2USTRING_ANY(rbother);
+ rb_u_validate(USTRING_STR(string), USTRING_LENGTH(string));
+ rb_u_validate(USTRING_STR(other), USTRING_LENGTH(other));
+
errno = 0;
- int r = u_collate_n(USTRING_STR(string), USTRING_LENGTH(string),
- USTRING_STR(other), USTRING_LENGTH(other));
+ int r = u_collate_in_locale_n(USTRING_STR(string), USTRING_LENGTH(string),
+ USTRING_STR(other), USTRING_LENGTH(other),
+ locale);
if (errno != 0)
rb_u_raise_errno(rb_eSystemCallError, errno, "can’t collate strings");
return INT2FIX(r);
View
29 ext/u/u_collate.c
@@ -5,11 +5,20 @@
#include <stdlib.h>
#include <string.h>
#include <wchar.h>
+#include <xlocale.h>
#include "u.h"
#include "utf8.h"
#include "private.h"
+static int
+collate(uint32_t *a, uint32_t *b, locale_t locale)
+{
+ return locale != NULL ?
+ wcscoll_l((wchar_t *)a, (wchar_t *)b, locale) :
+ wcscoll((wchar_t *)a, (wchar_t *)b);
+}
+
/* {{{1
* Compare two strings for ordering using the linguistically correct rules of
* the current locale.
@@ -23,17 +32,17 @@ u_collate(const char *a, const char *b)
uint32_t *a_norm = _u_normalize_wc(a, 0, false, U_NORMALIZE_ALL_COMPOSE, NULL);
uint32_t *b_norm = _u_normalize_wc(b, 0, false, U_NORMALIZE_ALL_COMPOSE, NULL);
- int result = wcscoll((wchar_t *)a_norm, (wchar_t *)b_norm);
+ int result = collate(a_norm, b_norm, NULL);
free(a_norm);
free(b_norm);
return result;
}
-
int
-u_collate_n(const char *a, size_t a_n, const char *b, size_t b_n)
+u_collate_in_locale_n(const char *a, size_t a_n, const char *b, size_t b_n,
+ const char *locale)
{
size_t a_norm_n;
uint32_t * const a_norm = _u_normalize_wc(a, a_n, true,
@@ -45,22 +54,34 @@ u_collate_n(const char *a, size_t a_n, const char *b, size_t b_n)
U_NORMALIZE_ALL_COMPOSE,
&b_norm_n);
+ locale_t l = NULL;
+ if (locale != NULL)
+ l = newlocale(LC_COLLATE_MASK, locale, NULL);
+
int result = 0;
uint32_t *a_p = a_norm;
uint32_t *a_end = a_norm + a_norm_n;
uint32_t *b_p = b_norm;
uint32_t *b_end = b_norm + b_norm_n;
while (a_p <= a_end && b_p <= b_end) {
- result = wcscoll((wchar_t *)a_p, (wchar_t *)b_p);
+ result = collate(a_p, b_p, l);
if (result != 0)
break;
a_p += wcslen((wchar_t *)a_p) + 1;
b_p += wcslen((wchar_t *)b_p) + 1;
}
+ if (l != NULL)
+ freelocale(l);
free(a_norm);
free(b_norm);
return result;
}
+
+int
+u_collate_n(const char *a, size_t a_n, const char *b, size_t b_n)
+{
+ return u_collate_in_locale_n(a, a_n, b, b_n, NULL);
+}

0 comments on commit 36fa52a

Please sign in to comment.