/
utf8func.h
76 lines (66 loc) · 3.16 KB
/
utf8func.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
/*
LETTERFUNC - Unicode functions: I/O, string comparison, change casing
Copyright (C) 2012 Center for Sprogteknologi, University of Copenhagen
This file is part of CST's Language Technology Tools.
LETTERFUNC is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
LETTERFUNC is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with LETTERFUNC; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#ifndef UTF8_H
#define UTF8_H
#include "letterfunc.h"
#include <stddef.h>
#if UNICODE_CAPABLE
bool isAllUpper(const char * s,size_t len);
bool isUpperUTF8(const char * s);
//int UnicodeToUtf8(int w,char * s,int len);
int UnicodeToUtf8(int w,char * s,size_t len);
int Utf8ToUnicode(int * w,const char * s,size_t len);
//void AllToUpper(char * s);
const char * allToLowerUTF8(const char * s);
// Simple case folding, used for simple "case insensitive" comparison.
// Allocates enough memory to hold the returned value.
// This memory is deallocated the next time allToLowerUTF8 is called, so
// do not delete!
//void AllToLowerUTF8(char * s);
// Simple case folding, used for simple "case insensitive" comparison.
// Overwrites s!
//void NToLowerUTF8(char * s,const char * stop);
// Like AllToLowerUTF8. Last converted character is the last character
// starting before stop. Zero byte is written after last converted character.
// Overwrites s!
const char * changeCase(const char * s,bool low,size_t & length);
// The returned value must be copied to its destination before the next call
// to changeCase. 'length' is the number of bytes before the final '\0' in the
// returned value. On entry, if length is set to a positive number, case is
// only converted for the first 'length' bytes; the remaining bytes are
// skipped.
int strCaseCmp(const char *s, const char *p);
int strCaseCmpN(const char *s, const char *p,ptrdiff_t & is,ptrdiff_t & ip);
//Like strCaseCmp. Returns indices into s and p where s and/or p ended or where they became dissimilar.
int strCmpN(const char *s, const char *p,ptrdiff_t & is,ptrdiff_t & ip);
//Like strCaseCmp, but case-sensitive. Returns indices into s and p where s and/or p ended or where they became dissimilar.
int UTF8char(const char * s,bool & UTF8);
// Sets UTF8 to false if s isn't UTF8
// returns character (starting at) s
int getUTF8char(const char *& s,bool & UTF8);
// Same as UTF8char, but also post-increments s!
//const char * Inc(const char *& s);
//size_t Inc(const char * s);
int copyUTF8char(const char * source,char * dest);
// Copies one character from source to destination.
// Returns number of bytes copied. (max 6)
// Makes no check of validity of UTF8!
size_t skipUTF8char(const char * s);
// Makes no check of validity of UTF8!
extern bool UTF8;
#endif
#endif