Skip to content

Commit

Permalink
Impelment Unicode normalization Form D in libarchive itself in order …
Browse files Browse the repository at this point in the history
…not to rely on CoreServices framework on Mac OS.

It passes all NormalizationTest.txt released The Unicode Consortium(http://www.unicode.org).
  • Loading branch information
ggcueroad committed Mar 15, 2012
1 parent 34eb338 commit fd7d009
Show file tree
Hide file tree
Showing 7 changed files with 1,328 additions and 222 deletions.
5 changes: 0 additions & 5 deletions CMakeLists.txt
Expand Up @@ -1369,11 +1369,6 @@ IF(MSVC)
ADD_DEFINITIONS(-D_CRT_SECURE_NO_DEPRECATE)
ENDIF(MSVC)

# We need CoreServices on Mac OS.
IF(APPLE)
LIST(APPEND ADDITIONAL_LIBS "-framework CoreServices")
ENDIF(APPLE)

IF(ENABLE_TEST)
ADD_CUSTOM_TARGET(run_all_tests)
ENDIF(ENABLE_TEST)
Expand Down
47 changes: 42 additions & 5 deletions build/utils/gen_archive_string_composition_h.sh
@@ -1,10 +1,13 @@
#!/bin/sh
#
# This needs http://unicode.org/Public/UNIDATA/UnicodeData.txt
# This needs http://unicode.org/Public/6.0.0/ucd/UnicodeData.txt
#
inputfile="$1" # Expect UnicodeData.txt
outfile=archive_string_composition.h
pickout=/tmp/mk_unicode_composition_tbl$$.awk
pickout2=/tmp/mk_unicode_composition_tbl2$$.awk
#nfdtmp=/tmp/mk_unicode_decomposition_tmp$$.txt
nfdtmp="nfdtmpx"
#################################################################################
#
# Append the file header of "archive_string_composition.h"
Expand All @@ -14,7 +17,7 @@ append_copyright()
{
cat > ${outfile} <<CR_END
/*-
* Copyright (c) 2011 libarchive Project
* Copyright (c) 2011-2012 libarchive Project
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
Expand Down Expand Up @@ -44,7 +47,7 @@ cat > ${outfile} <<CR_END
/*
* ATTENTION!
* This file is generated by build/utils/gen_archive_string_composition_h.sh
* from http://unicode.org/Public/UNIDATA/UnicodeData.txt
* from http://unicode.org/Public/6.0.0/ucd/UnicodeData.txt
*
* See also http://unicode.org/report/tr15/
*/
Expand Down Expand Up @@ -76,6 +79,7 @@ BEGIN {
min = "";
max = "";
cmd="sort | awk -F ' ' '{printf \"\\\\t{ 0x%s , 0x%s , 0x%s },\\\\n\",\$1,\$2,\$3}'"
nfdtbl="${nfdtmp}"
print "static const struct unicode_composition_table u_composition_table[] = {"
}
END {
Expand Down Expand Up @@ -178,7 +182,6 @@ END {
}
print "};"
print ""
print "#endif /* ARCHIVE_STRING_COMPOSITION_H_INCLUDED */"
}
#
#
Expand Down Expand Up @@ -241,7 +244,7 @@ function hextoi(hex)
#}
#
# Exclusion code points specified by
# http://unicode.org/Public/UNIDATA/CompositionExclusions.txt
# http://unicode.org/Public/6.0.0/ucd/CompositionExclusions.txt
##
# 1. Script Specifices
##
Expand Down Expand Up @@ -404,6 +407,35 @@ function hextoi(hex)
print "0"cp[1], "0"cp[2], "0"\$1 | cmd
else
print cp[1], cp[2], \$1 | cmd
# NFC ==> NFD table.
if (length(\$1) == 4)
print "0"\$1, "0"cp[1], "0"cp[2] >>nfdtbl
else
print \$1, cp[1], cp[2] >>nfdtbl
}
AWK_END
#################################################################################
# awk script
#
#################################################################################
cat > ${pickout2} <<AWK_END
#
BEGIN {
FS = " "
print "struct unicode_decomposition_table {"
print "\tuint32_t nfc;"
print "\tuint32_t cp1;"
print "\tuint32_t cp2;"
print "};"
print ""
print "static const struct unicode_decomposition_table u_decomposition_table[] = {"
}
END {
print "};"
print ""
}
{
printf "\t{ 0x%s , 0x%s , 0x%s },\n", \$1, \$2, \$3;
}
AWK_END
#################################################################################
Expand All @@ -413,6 +445,11 @@ AWK_END
#################################################################################
append_copyright
awk -f ${pickout} ${inputfile} >> ${outfile}
awk -f ${pickout2} ${nfdtmp} >> ${outfile}
echo "#endif /* ARCHIVE_STRING_COMPOSITION_H_INCLUDED */" >> ${outfile}
echo "" >> ${outfile}
#
# Remove awk the script.
rm ${pickout}
rm ${pickout2}
rm ${nfdtmp}
7 changes: 0 additions & 7 deletions configure.ac
Expand Up @@ -197,13 +197,6 @@ case $host in
;;
esac

# We need CoreServices on Mac OS.
case $host in
*darwin* )
LIBS="${LIBS} -framework CoreServices"
;;
esac

# Checks for header files.
AC_HEADER_DIRENT
AC_HEADER_SYS_WAIT
Expand Down

0 comments on commit fd7d009

Please sign in to comment.