-
Notifications
You must be signed in to change notification settings - Fork 1
/
Words.java
executable file
·324 lines (276 loc) · 9.69 KB
/
Words.java
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
/*
* Words.java
* Adventure Game Interpreter Word Package
*
* Created by Dr. Z
* Copyright (c) 2001 Dr. Z. All rights reserved.
*/
package com.sierra.agi.word;
import java.io.*;
import java.util.Enumeration;
import java.util.HashMap;
import java.util.Hashtable;
import java.util.StringTokenizer;
import java.util.Vector;
import com.sierra.agi.io.ByteCasterStream;
import com.sierra.agi.io.IOUtils;
/**
* Stores Words of the game.
* <P>
* <B>Word File Format</B><BR>
* The words.tok file is used to store the games vocabulary, i.e. the dictionary
* of words that the interpreter understands. These words are stored along with
* a word number which is used by the said test commands as argument values for
* that command. Many words can have the same word number which basically means
* that these words are synonyms for each other as far as the game is concerned.
* </P><P>
* The file itself is both packed and encrypted. Words are stored in alphabetic
* order which is required for the compression method to work.
* </P><P>
* <B>The first section</B><BR>
* At the start of the file is a section that is always 26x2 bytes long. This
* section contains a two byte entry for every letter of the alphabet. It is
* essentially an index which gives the starting location of the words beginning
* with the corresponding letter.
* </P><P>
* <TABLE BORDER=1>
* <THEAD><TR><TD>Byte</TD><TD>Meaning</TD></TR></THEAD>
* <TBODY>
* <TR><TD>0-1</TD><TD>Hi and then Lo byte for 'A' offset</TD></TR>
* <TR><TD COLSPAN=2>...</TD></TR>
* <TR><TD>50-51</TD><TD>Hi and then Lo byte for 'Z' offset</TD></TR>
* <TR><TD>52</TD><TD>Words section</TD></TR>
* </TBODY></TABLE>
* </P><P>
* The important thing to note from the above is that the 16 bit words are
* big-endian (HI-LO). The little endian (LO-HI) byte order convention used
* everywhere else in the AGI system is not used here. For example, 0x00 and
* 0x24 means 0x0024, not 0x2400. Big endian words are used later on for word
* numbers as well.
* </P><P>
* All offsets are taken from the beginning of the file. If no words start with
* a particular letter, then the offset in that field will be 0x0000.
* </P><P>
* <B>The words section</B><BR>
* Words are stored in a compressed way in which each word will use part of the
* previous word as a starting point for itself. For example, "forearm" and
* "forest" both have the prefix "fore". If "forest" comes immediately after
* "forearm", then the data for "forest" will specify that it will start with
* the first four characters of the previous word. Whether this method is used
* for further confusion for would be cheaters or whether it is to help in the
* searching process, I don't yet know, but it most certainly isn't purely for
* compression since the words.tok file is usally quite small and no attempt is
* made to compress any of the larger files (before AGI version 3 that is).
* </P><P>
* <TABLE BORDER=1>
* <THEAD><TR><TD>Byte</TD><TD>Meaning</TD></TR></THEAD>
* <TBODY>
* <TR><TD>0</TD><TD>Number of characters to include from start of prevous word</TD></TR>
* <TR><TD>1</TD><TD>Char 1 (xor 0x7F gives the ASCII code for the character)</TD></TR>
* <TR><TD>2</TD><TD>Char 2</TD></TR>
* <TR><TD COLSPAN=2>...</TD></TR>
* <TR><TD>n</TD><TD>Last char</TD></TR>
* <TR><TD>n + 1</TD><TD>Wordnum (LO-HI) -- see below</TD></TR>
* </TBODY></TABLE>
* </P><P>
* If a word does not use any part of the previous word, then the prefix field
* is equal to zero. This will always be the case for the first word starting
* with a new letter. There is nothing to indicate where the words starting with
* one letter finish and the next set starts, infact the words section is just
* one continuous chain of words conforming to the above format. The index
* section mentioned earlier is not needed to read the words in which suggests
* that the whole words.tok format is organised to find words quickly.
* </P><P>
* <B>A note about word numbers</B><BR>
* Some word numbers have special meaning. They are listed below:
* </P><P>
* <TABLE BORDER=1>
* <THEAD><TR><TD>Word #</TD><TD>Meaning</TD></TR></THEAD>
* <TBODY>
* <TR><TD>0</TD><TD>Words are ignored (e.g. the, at)</TD></TR>
* <TR><TD>1</TD><TD>Anyword</TD></TR>
* <TR><TD>9999</TD><TD>ROL (Rest Of Line) -- it does matter what the rest of the input list is</TD></TR>
* </TBODY></TABLE>
* </P>
* @author Dr. Z, Lance Ewing (Documentation)
* @version 0.00.00.01
*/
public class Words extends Object implements WordsProvider
{
protected Hashtable wordHash = new Hashtable(800);
protected HashMap<Integer, Word> wordNumToWordMap = new HashMap<Integer, Word>();
/** Creates a new Word container. */
public Words()
{
}
public Words loadWords(InputStream stream) throws IOException
{
loadWordTable(stream);
return this;
}
/**
* Read a AGI word table.
*
* @param stream Stream from where to read the words.
* @return Returns the number of words readed.
*/
protected int loadWordTable(InputStream stream) throws IOException
{
ByteCasterStream bstream = new ByteCasterStream(stream);
String prev = null;
String curr;
int i, wordNum, wordCount;
IOUtils.skip(stream, 52);
wordCount = 0;
while (true)
{
i = stream.read();
if (i < 0)
{
break;
}
else if (i > 0)
{
curr = prev.substring(0, i);
}
else
{
curr = new String();
}
while (true)
{
i = stream.read();
if (i <= 0)
{
break;
}
else
{
curr += (char)((i ^ 0x7F) & 0x7F);
if (i >= 0x7F)
{
break;
}
}
}
if (i <= 0)
{
break;
}
wordNum = bstream.hiloReadUnsignedShort();
prev = curr;
addWord(wordNum, curr);
wordCount++;
}
return wordCount;
}
protected boolean addWord(int wordNum, String word)
{
Word w = (Word)wordHash.get(word);
if (w != null)
{
return false;
}
w = new Word();
w.number = wordNum;
w.text = word;
// Map of word text to the Word object.
wordHash.put(word, w);
// Map of word number to the Word object.
wordNumToWordMap.put(wordNum, w);
return true;
}
public Word getWordByNumber(int wordNum)
{
return wordNumToWordMap.get(wordNum);
}
public Word findWord(String word)
{
return (Word)wordHash.get(word);
}
public int getWordCount()
{
return wordHash.size();
}
public Enumeration words()
{
return wordHash.elements();
}
protected static String removeSpaces(String inputString)
{
StringBuffer buff = new StringBuffer(inputString.length());
StringTokenizer token = new StringTokenizer(inputString.trim(), " ");
String str;
while (token.hasMoreTokens())
{
buff.append(token.nextToken());
if (token.hasMoreTokens())
{
buff.append(" ");
}
}
return buff.toString();
}
protected static int findChar(String str, int begin)
{
int ch = str.indexOf(' ', begin);
if (ch < 0)
{
ch = str.length();
}
return ch;
}
public Vector parse(String inputString)
{
Vector vector = new Vector(5, 2);
int begin, end;
Word word;
inputString = inputString.toLowerCase();
inputString = removeSpaces(inputString);
begin = 0;
while (inputString.length() > 0)
{
end = findChar(inputString, begin);
word = findWord(inputString.substring(0, end));
if (word != null)
{
begin = 0;
try
{
inputString = inputString.substring(end + 1);
}
catch (StringIndexOutOfBoundsException sioobex)
{
inputString = "";
}
if (word.number == 9999)
{
return vector;
}
if (word.number != 0)
{
vector.add(word);
}
continue;
}
if (end >= inputString.length())
{
begin = 0;
end = findChar(inputString, 0);
word = new Word();
word.number = -1;
word.text = inputString.substring(0, end);
vector.add(word);
if (end >= inputString.length())
{
break;
}
inputString = inputString.substring(end + 1);
continue;
}
begin = end + 1;
}
System.out.println(vector);
return vector;
}
}