Skip to content

Commit

Permalink
Handle unicode line separators when parsing C# files
Browse files Browse the repository at this point in the history
  • Loading branch information
marek-safar committed Jun 28, 2013
1 parent 83c7a43 commit 0f25b95
Showing 1 changed file with 38 additions and 41 deletions.
79 changes: 38 additions & 41 deletions mcs/mcs/cs-tokenizer.cs
Original file line number Diff line number Diff line change
Expand Up @@ -250,6 +250,9 @@ public enum PreprocessorDirective
public const int EvalCompilationUnitParserCharacter = 0x100001;
public const int EvalUsingDeclarationsParserCharacter = 0x100002;
public const int DocumentationXref = 0x100003;

const int UnicodeLS = 0x2028;
const int UnicodePS = 0x2029;

//
// XML documentation buffer. The save point is used to divide
Expand Down Expand Up @@ -1813,7 +1816,7 @@ int get_char ()

x = '\n';
advance_line ();
} else if (x == '\n') {
} else if (x == '\n' || x == UnicodeLS || x == UnicodePS) {
advance_line ();
} else {
col++;
Expand Down Expand Up @@ -1849,7 +1852,7 @@ public void putback (int c)
throw new InternalErrorException (string.Format ("Secondary putback [{0}] putting back [{1}] is not allowed", (char)putback_char, (char) c), Location);
}

if (c == '\n' || col == 0) {
if (c == '\n' || c == UnicodeLS || c == UnicodePS || col == 0) {
// It won't happen though.
line--;
ref_line--;
Expand Down Expand Up @@ -1931,7 +1934,7 @@ PreprocessorDirective get_cmd_arg (out string arg)
int has_identifier_argument = (int)(cmd & PreprocessorDirective.RequiresArgument);
int pos = 0;

while (c != -1 && c != '\n') {
while (c != -1 && c != '\n' && c != UnicodeLS && c != UnicodePS) {
if (c == '\\' && has_identifier_argument >= 0) {
if (has_identifier_argument != 0) {
has_identifier_argument = 1;
Expand All @@ -1958,10 +1961,7 @@ PreprocessorDirective get_cmd_arg (out string arg)
// Eat single-line comments
//
get_char ();
do {
c = get_char ();
} while (c != -1 && c != '\n');

ReadToEndOfLine ();
break;
}

Expand Down Expand Up @@ -2023,10 +2023,7 @@ bool PreProcessLine ()
//
// Eat any remaining characters to continue parsing on next line
//
while (c != -1 && c != '\n') {
c = get_char ();
}

ReadToEndOfLine ();
return false;
}

Expand All @@ -2035,10 +2032,7 @@ bool PreProcessLine ()
//
// Eat any remaining characters to continue parsing on next line
//
while (c != -1 && c != '\n') {
c = get_char ();
}

ReadToEndOfLine ();
return new_line != 0;
}

Expand All @@ -2052,13 +2046,11 @@ bool PreProcessLine ()
c = 0;
}

if (c != '\n' && c != '/' && c != '"') {
if (c != '\n' && c != '/' && c != '"' && c != UnicodeLS && c != UnicodePS) {
//
// Eat any remaining characters to continue parsing on next line
//
while (c != -1 && c != '\n') {
c = get_char ();
}
ReadToEndOfLine ();

Report.Error (1578, loc, "Filename, single-line comment or end-of-line expected");
return true;
Expand All @@ -2074,16 +2066,15 @@ bool PreProcessLine ()
}
}

if (c == '\n') {
if (c == '\n' || c == UnicodeLS || c == UnicodePS) {

} else if (c == '/') {
ReadSingleLineComment ();
} else {
//
// Eat any remaining characters to continue parsing on next line
//
while (c != -1 && c != '\n') {
c = get_char ();
}
ReadToEndOfLine ();

Error_EndLineExpected ();
return true;
Expand Down Expand Up @@ -2318,7 +2309,7 @@ int TokenizeNumber (int value)
string TokenizeFileName (ref int c)
{
var string_builder = new StringBuilder ();
while (c != -1 && c != '\n') {
while (c != -1 && c != '\n' && c != UnicodeLS && c != UnicodePS) {
c = get_char ();
if (c == '"') {
c = get_char ();
Expand Down Expand Up @@ -2366,25 +2357,28 @@ int TokenizePragmaNumber (ref int c)
Report.Warning (1692, 1, Location, "Invalid number");

// Read everything till the end of the line or file
do {
c = get_char ();
} while (c != -1 && c != '\n');
ReadToEndOfLine ();
}
}

return number;
}

void ReadToEndOfLine ()
{
int c;
do {
c = get_char ();
} while (c != -1 && c != '\n' && c != UnicodeLS && c != UnicodePS);
}

void ReadSingleLineComment ()
{
if (peek_char () != '/')
Report.Warning (1696, 1, Location, "Single-line comment or end-of-line expected");

// Read everything till the end of the line or file
int c;
do {
c = get_char ();
} while (c != -1 && c != '\n');
ReadToEndOfLine ();
}

/// <summary>
Expand All @@ -2410,7 +2404,7 @@ void ParsePragmaDirective (string arg)

var loc = Location;

if (c == '\n' || c == '/') {
if (c == '\n' || c == '/' || c == UnicodeLS || c == UnicodePS) {
if (c == '/')
ReadSingleLineComment ();

Expand All @@ -2436,7 +2430,7 @@ void ParsePragmaDirective (string arg)
Report.RegisterWarningRegion (loc).WarningEnable (loc, code, context);
}
}
} while (code >= 0 && c != '\n' && c != -1);
} while (code >= 0 && c != '\n' && c != -1 && c != UnicodeLS && c != UnicodePS);
}

return;
Expand All @@ -2446,8 +2440,7 @@ void ParsePragmaDirective (string arg)
Report.Warning (1634, 1, Location, "Expected disable or restore");

// Eat any remaining characters on the line
while (c != '\n' && c != -1)
c = get_char ();
ReadToEndOfLine ();

return;
}
Expand Down Expand Up @@ -2927,7 +2920,7 @@ private int consume_string (bool quoted)
return Token.LITERAL;
}

if (c == '\n') {
if (c == '\n' || c == UnicodeLS || c == UnicodePS) {
if (!quoted) {
Report.Error (1010, Location, "Newline in constant");

Expand Down Expand Up @@ -3150,6 +3143,8 @@ public int xtoken ()
case '\v':
case '\r':
case '\n':
case UnicodeLS:
case UnicodePS:
case '/':
next = peek_token ();
if (next == Token.COMMA || next == Token.CLOSE_BRACKET)
Expand Down Expand Up @@ -3368,7 +3363,7 @@ public int xtoken ()
}
}

while ((d = get_char ()) != -1 && d != '\n');
ReadToEndOfLine ();

any_token_seen |= tokens_seen;
tokens_seen = false;
Expand Down Expand Up @@ -3406,7 +3401,7 @@ public int xtoken ()
if (docAppend)
xml_comment_buffer.Append ((char) d);

if (d == '\n'){
if (d == '\n' || d == UnicodeLS || d == UnicodePS){
any_token_seen |= tokens_seen;
tokens_seen = false;
//
Expand Down Expand Up @@ -3456,6 +3451,8 @@ public int xtoken ()
return is_number (c, false);

case '\n': // white space
case UnicodeLS:
case UnicodePS:
any_token_seen |= tokens_seen;
tokens_seen = false;
comments_seen = false;
Expand Down Expand Up @@ -3492,7 +3489,7 @@ public int xtoken ()
continue;
}

if (c == ' ' || c == '\t' || c == '\n' || c == '\f' || c == '\v' )
if (c == ' ' || c == '\t' || c == '\n' || c == '\f' || c == '\v' || c == UnicodeLS || c == UnicodePS)
continue;

if (c == '#') {
Expand Down Expand Up @@ -3576,7 +3573,7 @@ int TokenizeBackslash ()
return Token.LITERAL;
}

if (c == '\n') {
if (c == '\n' || c == UnicodeLS || c == UnicodePS) {
Report.Error (1010, start_location, "Newline in constant");
return Token.ERROR;
}
Expand All @@ -3597,7 +3594,7 @@ int TokenizeBackslash ()

// Try to recover, read until newline or next "'"
while ((c = get_char ()) != -1) {
if (c == '\n' || c == '\'')
if (c == '\n' || c == '\'' || c == UnicodeLS || c == UnicodePS)
break;
}
}
Expand Down

0 comments on commit 0f25b95

Please sign in to comment.