-
Notifications
You must be signed in to change notification settings - Fork 2
/
scan.c
215 lines (204 loc) · 6.61 KB
/
scan.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
/****************************************************/
/* File: scan.c */
/* The scanner implementation for the TINY compiler */
/* Compiler Construction: Principles and Practice */
/* Kenneth C. Louden */
/****************************************************/
#include "globals.h"
#include "util.h"
#include "scan.h"
/* states in scanner DFA */
// 有限状态机的状态通过枚举实现
typedef enum
{ START,INASSIGN,INCOMMENT,INNUM,INID,DONE }
StateType;
/* lexeme of identifier or reserved word */
char tokenString[MAXTOKENLEN+1]; // 加一以容纳结束符
/* BUFLEN = length of the input buffer for
source code lines */
// 缓冲区大小,256个字符
#define BUFLEN 256
// 静态变量保证安全
static char lineBuf[BUFLEN]; /* holds the current line */
static int linepos = 0; /* current position in LineBuf */
static int bufsize = 0; /* current size of buffer string */
static int EOF_flag = FALSE; /* corrects ungetNextChar behavior on EOF */
/* getNextChar fetches the next non-blank character
from lineBuf, reading in a new line if lineBuf is
exhausted */
static int getNextChar(void)
{ if (!(linepos < bufsize))
{ lineno++; // 行号加一
if (fgets(lineBuf,BUFLEN-1,source)) // 源代码一行最多255个字符
{ if (EchoSource) fprintf(listing,"%4d: %s",lineno,lineBuf);
bufsize = strlen(lineBuf);
linepos = 0; // 指针置成零
return lineBuf[linepos++];
}
else //遇到文件结束
{ EOF_flag = TRUE;
return EOF;
}
}
else return lineBuf[linepos++];
}
/* ungetNextChar backtracks one character
in lineBuf */
// 注意行首的字符是不能unget的,靠使用者保证。
// 这里只检查文件末尾不能unget。
static void ungetNextChar(void)
{ if (!EOF_flag) linepos-- ;}
/* lookup table of reserved words */
// TokenType的宏定义在globals.h中,真正的对照表在这里
static struct
{ char* str;
TokenType tok;
} reservedWords[MAXRESERVED]
= {{"if",IF},{"then",THEN},{"else",ELSE},{"end",END},
{"repeat",REPEAT},{"until",UNTIL},{"read",READ},
{"write",WRITE}};
/* lookup an identifier to see if it is a reserved word */
/* uses linear search */
static TokenType reservedLookup (char * s)
{ int i;
for (i=0;i<MAXRESERVED;i++)
if (!strcmp(s,reservedWords[i].str))
return reservedWords[i].tok;
return ID; // ID意为标识符,没有保留字匹配时的处理
}
/****************************************/
/* the primary function of the scanner */
/****************************************/
/* function getToken returns the
* next token in source file
*/
TokenType getToken(void)
{ /* index for storing into tokenString */
int tokenStringIndex = 0;
/* holds current token to be returned */
TokenType currentToken;
/* current state - always begins at START */
StateType state = START;
/* flag to indicate save to tokenString */
int save;
while (state != DONE)
{ int c = getNextChar();
save = TRUE;
switch (state)
{ case START:
if (isdigit(c))
state = INNUM;
else if (isalpha(c))
state = INID;
else if (c == ':')
state = INASSIGN;
else if ((c == ' ') || (c == '\t') || (c == '\n'))
save = FALSE; // 空格字符放弃,仍留在START状态
else if (c == '{')
{ save = FALSE; // 注释字符放弃
state = INCOMMENT; //但跳到INCOMMENT状态
}
else // 下面是可以直接到DONE的情况
{ state = DONE;
switch (c)
{ case EOF:
save = FALSE; // 文件结尾放弃
currentToken = ENDFILE;
break;
case '=':
currentToken = EQ;
break;
case '<':
currentToken = LT;
break;
case '+':
currentToken = PLUS;
break;
case '-':
currentToken = MINUS;
break;
case '*':
currentToken = TIMES;
break;
case '/':
currentToken = OVER;
break;
case '(':
currentToken = LPAREN;
break;
case ')':
currentToken = RPAREN;
break;
case ';':
currentToken = SEMI;
break;
default:
currentToken = ERROR; // 如果都不匹配,是错误
break;
}
}
break;
case INCOMMENT:
save = FALSE; // 注释括号中的字符放弃
if (c == EOF) // 注释了一半遇到文件结束符,可以正常结束
{ state = DONE;
currentToken = ENDFILE;
}
else if (c == '}') state = START; // 注释结束,重新开始
break;
case INASSIGN:
state = DONE;
if (c == '=')
currentToken = ASSIGN;
else
{ /* backup in the input */
ungetNextChar();
save = FALSE;
currentToken = ERROR; // 出现单独的冒号是错误
}
break;
case INNUM:
if (!isdigit(c))
{ /* backup in the input */
ungetNextChar();
save = FALSE;
state = DONE;
currentToken = NUM;
}
break;
case INID:
if (!isalpha(c))
{ /* backup in the input */
ungetNextChar();
save = FALSE;
state = DONE;
currentToken = ID;
}
break;
case DONE: // 错误处理,状态DONE/ERROR都会匹配到这里
// 出错处理步骤:(1)打印信息;(2)返回类型为ERROR
default: /* should never happen */
fprintf(listing,"Scanner Bug: state= %d\n",state);
state = DONE;
currentToken = ERROR;
break;
}
// 把字符保存起来
if ((save) && (tokenStringIndex <= MAXTOKENLEN))
tokenString[tokenStringIndex++] = (char) c;
// 如果是结束状态,加上结束符形成一个完整的字符串
if (state == DONE)
{ tokenString[tokenStringIndex] = '\0';
// 对保留字问题做处理
if (currentToken == ID)
currentToken = reservedLookup(tokenString);
}
}
// 打印调试信息
if (TraceScan) {
fprintf(listing,"\t%d: ",lineno);
printToken(currentToken,tokenString);
}
// 返回值是token的类型,但ID/NUM需要得到token的值,可以另外到全局变量tokenString[]处取
return currentToken;
} /* end getToken */