-
Notifications
You must be signed in to change notification settings - Fork 746
/
SC_TextUtils.cpp
128 lines (117 loc) · 3.33 KB
/
SC_TextUtils.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
/*
SuperCollider real time audio synthesis system
Copyright (c) 2002 James McCartney. All rights reserved.
http://www.audiosynth.com
Copyright (c) 2012 Tim Blechmann. All rights reserved.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include <cstring>
#define OPENCURLY '{'
#define CLOSCURLY '}'
int rtf2txt(char* txt)
{
int rdpos=0, wrpos=0;
char c;
if (strncmp(txt,"{\\rtf",5)!=0) return 0; // OK, not an RTF file
text:
switch (txt[wrpos]=txt[rdpos++])
{
case 0:
/*{
char fname[32];
sprintf(fname, "rtf2txt_out%d.txt", bugctr++);
FILE *fp = fopen(fname, "w");
fwrite(txt,wrpos,1,fp);
fclose(fp);
}*/
return wrpos;
case OPENCURLY:
case CLOSCURLY:
case '\n': goto text;
case '\\':
if (strncmp(txt+rdpos,"fonttbl",7)==0
|| strncmp(txt+rdpos,"filetbl",7)==0
|| strncmp(txt+rdpos,"colortbl",8)==0
|| strncmp(txt+rdpos,"stylesheet",10)==0
)
{
int level = 1;
while(level && (c=txt[rdpos++]) != 0) {
if (c == OPENCURLY) level++;
else if (c == CLOSCURLY) level--;
}
} else if (strncmp(txt+rdpos,"\'a0",3)==0 || (strncmp(txt+rdpos,"\'A0",3)==0))
{
txt[wrpos++] = ' '; rdpos = rdpos + 3;
} else {
if (txt[rdpos]==CLOSCURLY || txt[rdpos]==OPENCURLY
|| txt[rdpos]=='\\' || txt[rdpos]=='\t'|| txt[rdpos]=='\n')
{ txt[wrpos++] = txt[rdpos++]; goto text; }
if (strncmp(txt+rdpos,"tab",3)==0) { txt[wrpos++] = '\t'; }
if (strncmp(txt+rdpos,"par",3)==0) { txt[wrpos++] = '\n'; }
while((c=txt[rdpos++]) && c!=' ' && c!='\\');
if (c=='\\') rdpos--;
}
goto text;
default :
wrpos++;
goto text;
}
}
// strips HTML down to plaintext tags in a fairly simple-minded way
int html2txt(char* txt)
{
int rdpos=-1, wrpos=0, bodypos=-1;
bool intag = false;
// First check if we can find a BODY tag to start at
while(bodypos == -1 && txt[++rdpos] != 0){
if(strncmp(txt+rdpos, "<body", 5) == 0) // FIXME: should be case-insensitive, ideally
bodypos = rdpos;
}
if(bodypos != -1)
rdpos = bodypos;
else
rdpos = 0;
// Now we start from our start, and add the non-tag text to the result
while(txt[rdpos] != 0){
if(intag){
if(txt[rdpos++] == '>')
intag = false;
}else{
if(txt[rdpos] == '<'){
intag = true;
++rdpos;
}else{
/*
if(strncmp(txt+rdpos, "&", 5)==0){
txt[wrpos++] = '&';
rdpos += 5;
}else if(strncmp(txt+rdpos, " ", 6)==0){
txt[wrpos++] = ' ';
rdpos += 6;
}else if(strncmp(txt+rdpos, "<", 4)==0){
txt[wrpos++] = '<';
rdpos += 4;
}else if(strncmp(txt+rdpos, ">", 4)==0){
txt[wrpos++] = '>';
rdpos += 4;
}else{
*/
txt[wrpos++] = txt[rdpos++];
//}
}
}
}
txt[wrpos] = 0;
return wrpos;
}