Skip to content

Commit c52c7f3

Browse files
committed
Do not expose encodings that do not need to be exposed
1 parent 98e218d commit c52c7f3

File tree

4 files changed

+312
-376
lines changed

4 files changed

+312
-376
lines changed

docs/encoding.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -102,7 +102,7 @@ The key of the comment can be either "encoding" or "coding". The value of the co
102102
* `Windows-31J`
103103
* `Windows-874`
104104

105-
For each of these encodings, prism provides a function for checking if the subsequent bytes form an alphabetic or alphanumeric character.
105+
For each of these encodings, prism provides functions for checking if the subsequent bytes can be interpreted as a character, and then if that character is alphabetic, alphanumeric, or uppercase.
106106

107107
## Getting notified when the encoding changes
108108

include/prism/encoding.h

Lines changed: 16 additions & 92 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
#define PRISM_ENCODING_H
88

99
#include "prism/defines.h"
10+
#include "prism/util/pm_strncasecmp.h"
1011

1112
#include <assert.h>
1213
#include <stdbool.h>
@@ -119,98 +120,21 @@ bool pm_encoding_utf_8_isupper_char(const uint8_t *b, ptrdiff_t n);
119120
*/
120121
extern const uint8_t pm_encoding_unicode_table[256];
121122

122-
// Below are the encodings that are supported by the parser. They are defined in
123-
// their own files in the src/enc directory.
124-
125-
extern pm_encoding_t pm_encoding_ascii;
126-
extern pm_encoding_t pm_encoding_ascii_8bit;
127-
extern pm_encoding_t pm_encoding_big5;
128-
extern pm_encoding_t pm_encoding_big5_hkscs;
129-
extern pm_encoding_t pm_encoding_big5_uao;
130-
extern pm_encoding_t pm_encoding_cesu_8;
131-
extern pm_encoding_t pm_encoding_cp51932;
132-
extern pm_encoding_t pm_encoding_cp850;
133-
extern pm_encoding_t pm_encoding_cp852;
134-
extern pm_encoding_t pm_encoding_cp855;
135-
extern pm_encoding_t pm_encoding_cp949;
136-
extern pm_encoding_t pm_encoding_cp950;
137-
extern pm_encoding_t pm_encoding_cp951;
138-
extern pm_encoding_t pm_encoding_emacs_mule;
139-
extern pm_encoding_t pm_encoding_euc_jp;
140-
extern pm_encoding_t pm_encoding_euc_jp_ms;
141-
extern pm_encoding_t pm_encoding_euc_jis_2004;
142-
extern pm_encoding_t pm_encoding_euc_kr;
143-
extern pm_encoding_t pm_encoding_euc_tw;
144-
extern pm_encoding_t pm_encoding_gb12345;
145-
extern pm_encoding_t pm_encoding_gb18030;
146-
extern pm_encoding_t pm_encoding_gb1988;
147-
extern pm_encoding_t pm_encoding_gb2312;
148-
extern pm_encoding_t pm_encoding_gbk;
149-
extern pm_encoding_t pm_encoding_ibm437;
150-
extern pm_encoding_t pm_encoding_ibm720;
151-
extern pm_encoding_t pm_encoding_ibm737;
152-
extern pm_encoding_t pm_encoding_ibm775;
153-
extern pm_encoding_t pm_encoding_ibm852;
154-
extern pm_encoding_t pm_encoding_ibm855;
155-
extern pm_encoding_t pm_encoding_ibm857;
156-
extern pm_encoding_t pm_encoding_ibm860;
157-
extern pm_encoding_t pm_encoding_ibm861;
158-
extern pm_encoding_t pm_encoding_ibm862;
159-
extern pm_encoding_t pm_encoding_ibm863;
160-
extern pm_encoding_t pm_encoding_ibm864;
161-
extern pm_encoding_t pm_encoding_ibm865;
162-
extern pm_encoding_t pm_encoding_ibm866;
163-
extern pm_encoding_t pm_encoding_ibm869;
164-
extern pm_encoding_t pm_encoding_iso_8859_1;
165-
extern pm_encoding_t pm_encoding_iso_8859_2;
166-
extern pm_encoding_t pm_encoding_iso_8859_3;
167-
extern pm_encoding_t pm_encoding_iso_8859_4;
168-
extern pm_encoding_t pm_encoding_iso_8859_5;
169-
extern pm_encoding_t pm_encoding_iso_8859_6;
170-
extern pm_encoding_t pm_encoding_iso_8859_7;
171-
extern pm_encoding_t pm_encoding_iso_8859_8;
172-
extern pm_encoding_t pm_encoding_iso_8859_9;
173-
extern pm_encoding_t pm_encoding_iso_8859_10;
174-
extern pm_encoding_t pm_encoding_iso_8859_11;
175-
extern pm_encoding_t pm_encoding_iso_8859_13;
176-
extern pm_encoding_t pm_encoding_iso_8859_14;
177-
extern pm_encoding_t pm_encoding_iso_8859_15;
178-
extern pm_encoding_t pm_encoding_iso_8859_16;
179-
extern pm_encoding_t pm_encoding_koi8_r;
180-
extern pm_encoding_t pm_encoding_koi8_u;
181-
extern pm_encoding_t pm_encoding_mac_cent_euro;
182-
extern pm_encoding_t pm_encoding_mac_croatian;
183-
extern pm_encoding_t pm_encoding_mac_cyrillic;
184-
extern pm_encoding_t pm_encoding_mac_greek;
185-
extern pm_encoding_t pm_encoding_mac_iceland;
186-
extern pm_encoding_t pm_encoding_mac_japanese;
187-
extern pm_encoding_t pm_encoding_mac_roman;
188-
extern pm_encoding_t pm_encoding_mac_romania;
189-
extern pm_encoding_t pm_encoding_mac_thai;
190-
extern pm_encoding_t pm_encoding_mac_turkish;
191-
extern pm_encoding_t pm_encoding_mac_ukraine;
192-
extern pm_encoding_t pm_encoding_shift_jis;
193-
extern pm_encoding_t pm_encoding_sjis_docomo;
194-
extern pm_encoding_t pm_encoding_sjis_kddi;
195-
extern pm_encoding_t pm_encoding_sjis_softbank;
196-
extern pm_encoding_t pm_encoding_stateless_iso_2022_jp;
197-
extern pm_encoding_t pm_encoding_stateless_iso_2022_jp_kddi;
198-
extern pm_encoding_t pm_encoding_tis_620;
123+
/**
124+
* This is the default encoding for Ruby source files. We keep a specific
125+
* visible pointer around to it so that prism.c can compare it against the
126+
* default.
127+
*/
199128
extern pm_encoding_t pm_encoding_utf_8;
200-
extern pm_encoding_t pm_encoding_utf8_mac;
201-
extern pm_encoding_t pm_encoding_utf8_docomo;
202-
extern pm_encoding_t pm_encoding_utf8_kddi;
203-
extern pm_encoding_t pm_encoding_utf8_softbank;
204-
extern pm_encoding_t pm_encoding_windows_1250;
205-
extern pm_encoding_t pm_encoding_windows_1251;
206-
extern pm_encoding_t pm_encoding_windows_1252;
207-
extern pm_encoding_t pm_encoding_windows_1253;
208-
extern pm_encoding_t pm_encoding_windows_1254;
209-
extern pm_encoding_t pm_encoding_windows_1255;
210-
extern pm_encoding_t pm_encoding_windows_1256;
211-
extern pm_encoding_t pm_encoding_windows_1257;
212-
extern pm_encoding_t pm_encoding_windows_1258;
213-
extern pm_encoding_t pm_encoding_windows_31j;
214-
extern pm_encoding_t pm_encoding_windows_874;
129+
130+
/**
131+
* Parse the given name of an encoding and return a pointer to the corresponding
132+
* encoding struct if one can be found, otherwise return NULL.
133+
*
134+
* @param start A pointer to the first byte of the name.
135+
* @param end A pointer to the last byte of the name.
136+
* @returns A pointer to the encoding it finds, otherwise NULL.
137+
*/
138+
pm_encoding_t * pm_encoding_find(const uint8_t *start, const uint8_t *end);
215139

216140
#endif

0 commit comments

Comments
 (0)