Permalink
Browse files

Bug 6554 - make Koha internally utf-8 clean

In current implementation (mostly commented out in this patch)
uses heuristic to guess which strings need decoding from utf-8
to binary representation and doesn't support utf-8 characters
in templates and has problems with utf-8 data from database.

With this changes, Koha perl code always uses utf-8 encoding
correctly. All incomming data from database is allready
correctly marked as utf-8, and decoding of utf8 is required
only from Zebra and XSLT transfers which don't set utf-8 flag
correctly.

For output, standard perl :encoding(utf8) handler is used
so it also removes various "wide character" warnings as side-effect.

Test scenario:
1. make sure that you have utf-8 characters in your biblio
   records, patrons, categories etc.
2. try to search records on intranet and opac which contain
   utf-8 characters
3. install language which has utf-8 characters, e.g. uk-UA
   dpavlin@koha-dev:/srv/koha/misc/translator(bug_6554) $
   PERL5LIB=/srv/koha/ perl translate install uk-UA
4. switch language to uk-UA and verify that templates
   display correctly
5. test search and Z39.50 search and verify that caracters
   are correct

Signed-off-by: Owen Leonard <oleonard@myacpl.org>

I followed the test plan, adding utf-8 characters to library names,
patron categories, titles, and authorized values. I tried the uk-UA
translation and everything looked good.

When performing Z39.50 searches for titles containing utf-8 characters I
got results which were still occasionally contaminated with dummy
characters [?] but I assume this is Z39.50's fault not the patch's.

Signed-off-by: Marcel de Rooy <m.de.rooy@rijksmuseum.nl>
Signed-off-by: Bernardo Gonzalez Kriegel <bgkriegel@gmail.com>
Already signed, add mine.
Signed-off-by: Jared Camins-Esakov <jcamins@cpbibliography.com>
  • Loading branch information...
1 parent 65ea963 commit d542740ab8877234d043c8a11cb19a6004e72656 @dpavlin dpavlin committed with jcamins Jun 10, 2012
View
@@ -1104,7 +1104,9 @@ set_userenv is called in Auth.pm
#'
sub set_userenv {
- my ($usernum, $userid, $usercnum, $userfirstname, $usersurname, $userbranch, $branchname, $userflags, $emailaddress, $branchprinter, $persona)= @_;
+ my ($usernum, $userid, $usercnum, $userfirstname, $usersurname, $userbranch, $branchname, $userflags, $emailaddress, $branchprinter, $persona)=
+ map { utf8::decode($_); $_ } # CGI::Session doesn't handle utf-8, so we decode it here
+ @_;
my $var=$context->{"activeuser"} || '';
my $cell = {
"number" => $usernum,
View
@@ -81,7 +81,7 @@ sub all {
for ( @{$dbh->selectall_arrayref(
"SELECT * FROM itemtypes ORDER BY description", { Slice => {} })} )
{
- utf8::encode($_->{description});
+# utf8::encode($_->{description});
push @itypes, $class->new($_);
}
return @itypes;
View
@@ -308,6 +308,7 @@ sub output_with_http_headers {
}
sub output_html_with_http_headers {
+ binmode( STDOUT, ":encoding(utf8)" );
my ( $query, $cookie, $data, $status ) = @_;
output_with_http_headers( $query, $cookie, $data, 'html', $status );
}
View
@@ -484,6 +484,7 @@ sub getRecords {
# not an index scan
else {
$record = $results[ $i - 1 ]->record($j)->raw();
+ utf8::decode( $record );
# warn "RECORD $j:".$record;
$results_hash->{'RECORDS'}[$j] = $record;
@@ -501,6 +502,7 @@ sub getRecords {
for ( my $j = 0 ; $j < $jmax ; $j++ ) {
my $render_record =
$results[ $i - 1 ]->record($j)->render();
+ utf8::decode($render_record);
my @used_datas = ();
foreach my $tag ( @{ $facet->{tags} } ) {
@@ -714,6 +716,7 @@ sub pazGetRecords {
for (my $i = 0; $i < $count; $i++) {
# FIXME -- may need to worry about diacritics here
my $rec = $paz->record($recid, $i);
+ utf8::decode( $rec );
push @{ $result_group->{'RECORDS'} }, $rec;
}
@@ -1290,17 +1293,17 @@ sub buildQuery {
if ( @limits ) {
$q .= ' and '.join(' and ', @limits);
}
- return ( undef, $q, $q, "q=ccl=".uri_escape($q), $q, '', '', '', '', 'ccl' );
+ return ( undef, $q, $q, "q=ccl=".uri_escape_utf8($q), $q, '', '', '', '', 'ccl' );
}
if ( $query =~ /^cql=/ ) {
- return ( undef, $', $', "q=cql=".uri_escape($'), $', '', '', '', '', 'cql' );
+ return ( undef, $', $', "q=cql=".uri_escape_utf8($'), $', '', '', '', '', 'cql' );
}
if ( $query =~ /^pqf=/ ) {
if ($query_desc) {
- $query_cgi = "q=".uri_escape($query_desc);
+ $query_cgi = "q=".uri_escape_utf8($query_desc);
} else {
$query_desc = $';
- $query_cgi = "q=pqf=".uri_escape($');
+ $query_cgi = "q=pqf=".uri_escape_utf8($');
}
return ( undef, $', $', $query_cgi, $query_desc, '', '', '', '', 'pqf' );
}
@@ -1472,9 +1475,9 @@ sub buildQuery {
$query .= " $operators[$i-1] ";
$query .= " $index_plus " unless $indexes_set;
$query .= " $operand";
- $query_cgi .= "&op=".uri_escape($operators[$i-1]);
- $query_cgi .= "&idx=".uri_escape($index) if $index;
- $query_cgi .= "&q=".uri_escape($operands[$i]) if $operands[$i];
+ $query_cgi .= "&op=".uri_escape_utf8($operators[$i-1]);
+ $query_cgi .= "&idx=".uri_escape_utf8($index) if $index;
+ $query_cgi .= "&q=".uri_escape_utf8($operands[$i]) if $operands[$i];
$query_desc .=
" $operators[$i-1] $index_plus $operands[$i]";
}
@@ -1484,8 +1487,8 @@ sub buildQuery {
$query .= " and ";
$query .= "$index_plus " unless $indexes_set;
$query .= "$operand";
- $query_cgi .= "&op=and&idx=".uri_escape($index) if $index;
- $query_cgi .= "&q=".uri_escape($operands[$i]) if $operands[$i];
+ $query_cgi .= "&op=and&idx=".uri_escape_utf8($index) if $index;
+ $query_cgi .= "&q=".uri_escape_utf8($operands[$i]) if $operands[$i];
$query_desc .= " and $index_plus $operands[$i]";
}
}
@@ -1497,8 +1500,8 @@ sub buildQuery {
$query .= " $index_plus " unless $indexes_set;
$query .= $operand;
$query_desc .= " $index_plus $operands[$i]";
- $query_cgi .= "&idx=".uri_escape($index) if $index;
- $query_cgi .= "&q=".uri_escape($operands[$i]) if $operands[$i];
+ $query_cgi .= "&idx=".uri_escape_utf8($index) if $index;
+ $query_cgi .= "&q=".uri_escape_utf8($operands[$i]) if $operands[$i];
$previous_operand = 1;
}
} #/if $operands
View
@@ -67,6 +67,7 @@ sub new {
COMPILE_DIR => C4::Context->config('template_cache_dir')?C4::Context->config('template_cache_dir'):'',,
INCLUDE_PATH => \@includes,
FILTERS => {},
+ ENCODING => 'utf8', # templates don't have BOM, see Template::FAQ
}
) or die Template->error();
my $self = {
@@ -114,57 +115,16 @@ sub output {
C4::Context->preference('opaclayoutstylesheet');
# add variables set via param to $vars for processing
- # and clean any utf8 mess
for my $k ( keys %{ $self->{VARS} } ) {
$vars->{$k} = $self->{VARS}->{$k};
- if (ref($vars->{$k}) eq 'ARRAY'){
- utf8_arrayref($vars->{$k});
- }
- elsif (ref($vars->{$k}) eq 'HASH'){
- utf8_hashref($vars->{$k});
- }
- else {
- utf8::encode($vars->{$k}) if utf8::is_utf8($vars->{$k});
- }
}
my $data;
-# binmode( STDOUT, ":utf8" );
$template->process( $self->filename, $vars, \$data )
|| die "Template process failed: ", $template->error();
return $data;
}
-sub utf8_arrayref {
- my $arrayref = shift;
- foreach my $element (@$arrayref){
- if (ref($element) eq 'ARRAY'){
- utf8_arrayref($element);
- next;
- }
- if (ref($element) eq 'HASH'){
- utf8_hashref($element);
- next;
- }
- utf8::encode($element) if utf8::is_utf8($element);
- }
-}
-
-sub utf8_hashref {
- my $hashref = shift;
- for my $key (keys %{$hashref}){
- if (ref($hashref->{$key}) eq 'ARRAY'){
- utf8_arrayref($hashref->{$key});
- next;
- }
- if (ref($hashref->{$key}) eq 'HASH'){
- utf8_hashref($hashref->{$key});
- next;
- }
- utf8::encode($hashref->{$key}) if utf8::is_utf8($hashref->{$key});
- }
-}
-
-
+
# FIXME - this is a horrible hack to cache
# the current known-good language, temporarily
# put in place to resolve bug 4403. It is
@@ -34,6 +34,7 @@
use IO::File;
use YAML::Syck qw();
$YAML::Syck::ImplicitTyping = 1;
+$YAML::Syck::ImplicitUnicode = 1; # force utf-8 for preference encoding
our $lang;
# use Smart::Comments;
@@ -21,7 +21,7 @@
use strict;
use warnings;
-use CGI;
+use CGI qw( -utf8 );
use C4::Context;
use C4::Auth;
use C4::Output;
@@ -125,7 +125,7 @@
# next/previous would not work anymore
# construction of the url of each page
- my $value_url = uri_escape($value);
+ my $value_url = uri_escape_utf8($value);
my $base_url = "authorities-home.pl?"
."marclist=$marclist"
."&amp;and_or=$and_or"
View
@@ -154,7 +154,7 @@ =head3 Additional Notes
my $DisplayMultiPlaceHold = C4::Context->preference("DisplayMultiPlaceHold");
# create a new CGI object
# FIXME: no_undef_params needs to be tested
-use CGI qw('-no_undef_params');
+use CGI qw( -no_undef_params -utf8 );
my $cgi = new CGI;
my ($template,$borrowernumber,$cookie);
@@ -880,7 +880,7 @@ sub build_tabs {
.'&frameworkcode='.$frameworkcode
.'&circborrowernumber='.$fa_circborrowernumber
.'&branch='.$fa_branch
- .'&barcode='.uri_escape($fa_barcode)
+ .'&barcode='.uri_escape_utf8($fa_barcode)
.'&stickyduedate='.$fa_stickyduedate
.'&duedatespec='.$fa_duedatespec
);
@@ -517,7 +517,7 @@ sub removeFieldsForPrefill {
print $input->redirect(
'/cgi-bin/koha/circ/circulation.pl?'
.'borrowernumber='.$fa_circborrowernumber
- .'&barcode='.uri_escape($fa_barcode)
+ .'&barcode='.uri_escape_utf8($fa_barcode)
.'&duedatespec='.$fa_duedatespec
.'&stickyduedate=1'
);
@@ -20,7 +20,7 @@
use strict;
use warnings;
-use CGI;
+use CGI qw( -utf8 );
use C4::Auth;
use C4::Output;
View
@@ -27,7 +27,7 @@
#use warnings; FIXME - Bug 2505
use C4::Auth;
use C4::Output;
-use CGI;
+use CGI qw( -utf8 );
use C4::Members;
use C4::Branch;
use C4::Category;
View
@@ -60,7 +60,7 @@
my $DisplayMultiPlaceHold = C4::Context->preference("DisplayMultiPlaceHold");
# create a new CGI object
# FIXME: no_undef_params needs to be tested
-use CGI qw('-no_undef_params');
+use CGI qw( -no_undef_params -utf8 );
my $cgi = new CGI;
my $branch_group_limit = $cgi->param("branch_group_limit");
@@ -652,7 +652,7 @@ sub _input_cgi_parse {
$newsearchcookie = $cgi->cookie(
-name => 'KohaOpacRecentSearches',
# We uri_escape the whole freezed structure so we're sure we won't have any encoding problems
- -value => uri_escape(freeze(\@recentSearches)),
+ -value => uri_escape_utf8(freeze(\@recentSearches)),
-expires => ''
);
$cookie = [$cookie, $newsearchcookie];
@@ -665,7 +665,7 @@ =head1 DESCRIPTION
my $totpages = int($total/$limit) + (($total % $limit) > 0 ? 1 : 0);
my $url = "/cgi-bin/koha/reports/guided_reports.pl?reports=$report_id&amp;phase=Run%20this%20report&amp;limit=$limit";
if (@sql_params) {
- $url = join('&amp;sql_params=', $url, map { URI::Escape::uri_escape($_) } @sql_params);
+ $url = join('&amp;sql_params=', $url, map { URI::Escape::uri_escape_utf8($_) } @sql_params);
}
$template->param(
'results' => \@rows,
@@ -127,7 +127,7 @@
# redirection to the referrer page
#
if ( $input->param('destination') eq "circ" ) {
- $cardnumber = uri_escape($cardnumber);
+ $cardnumber = uri_escape_utf8($cardnumber);
print $input->redirect( '/cgi-bin/koha/circ/circulation.pl?findborrower='
. $cardnumber
. $failedrenews
@@ -127,7 +127,7 @@
$template->param(
title => $subs->{'bibliotitle'},
issue => $issue,
- issue_escaped => URI::Escape::uri_escape($issue),
+ issue_escaped => URI::Escape::uri_escape_utf8($issue),
subscriptionid => $subscriptionid,
memberloop => $memberloop,
routingnotes => $routingnotes,
View
@@ -62,7 +62,7 @@ =head1 Routing.pl
if($op eq 'save'){
my $sth = $dbh->prepare('UPDATE serial SET routingnotes = ? WHERE subscriptionid = ?');
$sth->execute($notes,$subscriptionid);
- my $urldate = URI::Escape::uri_escape($date_selected);
+ my $urldate = URI::Escape::uri_escape_utf8($date_selected);
print $query->redirect("routing-preview.pl?subscriptionid=$subscriptionid&issue=$urldate");
}

0 comments on commit d542740

Please sign in to comment.