-
Notifications
You must be signed in to change notification settings - Fork 8
/
lex.c
166 lines (133 loc) · 4.78 KB
/
lex.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#include <stdbool.h>
#include "lex.h"
#include "tokens.h"
#include "string.h"
// lex code with length fileLength into tokens
int lex(Token *p_headToken, char *code, int fileLength) {
// token count
int tokenCount = 1;
// line number
int lineNumber = 1;
// for each char (including terminator, helps us not need to push number tokens if they are last)
int i = 0;
while (i < fileLength) {
char c = code[i];
if (c == '\n') lineNumber++;
if (c == '/' && code[i + 1] == '/') {
// comments
while (code[i] != '\n' && i < fileLength) i++;
lineNumber++;
} else if (c == '(') {
Token_push(p_headToken, NULL, TOK_APPLYOPEN, lineNumber);
tokenCount++;
} else if (c == ')') {
Token_push(p_headToken, NULL, TOK_APPLYCLOSE, lineNumber);
tokenCount++;
} else if (c == '=') {
Token_push(p_headToken, NULL, TOK_ASSIGNMENT, lineNumber);
tokenCount++;
} else if (c == '{') {
Token_push(p_headToken, NULL, TOK_FUNCOPEN, lineNumber);
tokenCount++;
} else if (c == '}') {
Token_push(p_headToken, NULL, TOK_FUNCCLOSE, lineNumber);
tokenCount++;
} else if (c == '-' && code[i + 1] == '>') {
Token_push(p_headToken, NULL, TOK_ARROW, lineNumber);
i++;
tokenCount++;
} else if (c == '<' && code[i + 1] == '-') {
Token_push(p_headToken, NULL, TOK_RETURN, lineNumber);
i++;
tokenCount++;
} else if (c == '"') {
// record first char in string
int stringStart = i + 1;
// go to first char after quotes
i++;
// count to last char in string (last quote)
while (code[i] != '"' || (code[i] == '"' && code[i - 1] == '\\')) {
i++;
// error handling
if (code[i] == '\n') {
printf("Syntax Error @ Line %i: Unexpected new line before string closed.\n", lineNumber);
exit(0);
}
if (code[i] == '\0') {
printf("Syntax Error @ Line %i: Unexpected end of file before string closed.\n", lineNumber);
exit(0);
}
}
// get substring and add token
char *val = malloc(i - stringStart + 1);
strncpy(val, &code[stringStart], i - stringStart);
val[i - stringStart] = '\0';
Token_push(p_headToken, parseString(val), TOK_STRING, lineNumber);
free(val);
tokenCount++;
} else if (isdigit(c) > 0 || (c == '-' && isdigit(code[i + 1]) > 0)) {
// record first char in int
int numStart = i;
// float flag
bool isFloat = false;
// increment
i++;
// increment until char is not a valid number char
while (isdigit(code[i]) > 0 || code[i] == '.') {
// handle float flag and protect against multiple points
if (code[i] == '.') {
if (isFloat) {
// case were we saw a point before
printf("Syntax Error @ Line %i: Multiple decimal points in single number.\n", lineNumber);
exit(0);
} else isFloat = true;
}
i++;
}
// get substring and add token
char *val = malloc(i - numStart + 1);
strncpy(val, &code[numStart], i - numStart);
val[i - numStart] = '\0';
if (isFloat) Token_push(p_headToken, val, TOK_FLOAT, lineNumber);
else Token_push(p_headToken, val, TOK_INT, lineNumber);
tokenCount++;
// make sure to go back to last char of number
i--;
} else if (
strchr(" \n\r\t\f\v{}()\"=", c) == NULL
&& !(c == '-' && code[i + 1] == '>')
&& !(c == '<' && code[i + 1] == '-')
&& !(c == '/' && code[i + 1] == '/')
) {
// case of identifier
int identifierStart = i;
// while valid identifier char
while (
strchr(" \n\r\t\f\v{}()\"=", code[i]) == NULL
&& !(code[i] == '-' && code[i + 1] == '>')
&& !(code[i] == '<' && code[i + 1] == '-')
&& !(code[i] == '/' && code[i + 1] == '/')
) i++;
// get substring and add token
char *val = malloc(i - identifierStart + 1);
strncpy(val, &code[identifierStart], i - identifierStart);
val[i - identifierStart] = '\0';
Token_push(p_headToken, val, TOK_IDENTIFIER, lineNumber);
tokenCount++;
// make sure to go back to last char of identifier
i--;
} else if (strchr(" \n\r\t\f\v", code[i]) == NULL) {
// handle unexpected char
printf("Syntax Error @ Line %i: Unexpected char \"%c\".\n", lineNumber, c);
exit(0);
}
i++;
}
Token_push(p_headToken, NULL, TOK_END, lineNumber);
tokenCount++;
return tokenCount;
}