Skip to content

Commit

Permalink
Introduce preprocessing step
Browse files Browse the repository at this point in the history
Decided to introduce a new class for better functional encapsulation and
clear distribution of duties. First step is to handle comments in the
preprocessor since that's where they are actually stripped out which
results in some slightly different behaviour of tokenisation (added
tests to capture this).

On the road to #13
  • Loading branch information
kymckay committed May 13, 2021
1 parent 59268e7 commit fba0d9a
Show file tree
Hide file tree
Showing 18 changed files with 282 additions and 93 deletions.
9 changes: 8 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,17 @@ This is the first C++ I have ever written. My intentions for this project are:

If you'd like to contribute please read [CONTRIBUTING.md](CONTRIBUTING.md).

## Preprocessing

- A comment starts with a `//` or `/*` character pair that is not part of a string literal, and ends at the end of the physical line or with a `*/` character pair, respectively. These are stripped during preprocessing and thus do not act as tokens delimiters.
- SQF scripts support C-like preprocessor directives. If the `#` character appears at the start of a line (ignoring leading whitespace) the line is preprocessed.
- The directive must immediately follow the `#` character (no whitespace) and ends at the end of the logical line.
- The logical line can be extended by a `\` character immediately preceding the end of the physical line.
- Preprocessing will fail and the script cannot be tokenised if an unrecognised directive is present.

## Lexical Analysis

- In SQF a program consists of a sequence of statements. The end of a statement (except from the very last) is marked by a `,` or `;` character except where such characters are allowed by syntax (e.g. commas within an array literal).
- A comment starts with a `//` or `/*` character pair that is not part of a string literal, and ends at the end of the physical line or with a `*/` character pair, respectively.
- Outside of string literals, whitespace characters are used to seperate tokens (only necessary if their concatenation could otherwise be interpreted as a different token).

### Identifiers and Keywords
Expand Down
2 changes: 1 addition & 1 deletion src/lexer/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,6 @@ cc_library(
name = "sqf-lexer",
srcs = ["lexer.cpp"],
hdrs = ["lexical_error.h", "lexer.h"],
deps = [":sqf-tokens", "//src/sqf:sqf-keywords"],
deps = [":sqf-tokens", "//src/sqf:sqf-keywords", "//src/preprocessor:sqf-preprocessor"],
visibility = ["//src/parser:__pkg__"],
)
59 changes: 5 additions & 54 deletions src/lexer/lexer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
#include <algorithm>
#include <cctype>

Lexer::Lexer(std::istream &to_read) : stream_(to_read)
Lexer::Lexer(Preprocessor &pre) : preproc_(pre)
{
// Immediately read in the first character
advance();
Expand All @@ -20,29 +20,12 @@ void Lexer::error(Token t, std::string msg)
// Preview the next character in order to differentiate tokens that start the same
char Lexer::peek()
{
return stream_.peek();
return preproc_.peek();
}

void Lexer::advance()
{
// Increment the line whenever a newline is passed
if (current_char_ == '\n')
{
lineno_++;
column_ = 0;
}

stream_.get(current_char_);

// When end of stream is reached return EOF character
if (stream_.eof())
{
current_char_ = '\0';
}
else
{
column_++;
}
current_char_ = preproc_.get();
}

void Lexer::skipWhitespace()
Expand All @@ -53,31 +36,6 @@ void Lexer::skipWhitespace()
}
}

void Lexer::skipComment()
{
if (peek() == '/')
{
while (current_char_ != '\0' && current_char_ != '\n')
{
advance();
}

// Skip past the EOL
advance();
}
else if (peek() == '*')
{
while (current_char_ != '\0' && !(current_char_ == '*' && peek() == '/'))
{
advance();
}

// Skip past the block end: */
advance();
advance();
}
}

Token Lexer::_id()
{
// Token position at first character
Expand Down Expand Up @@ -232,8 +190,8 @@ Token Lexer::makeToken(TokenType type, std::string raw)
Token t;
t.type = type;
t.raw = raw;
t.line = lineno_;
t.column = column_;
t.line = current_char_.line;
t.column = current_char_.column;
return t;
}

Expand All @@ -248,13 +206,6 @@ Token Lexer::nextToken()
continue;
}

// Comments are irrelevant (block and line)
if (current_char_ == '/' && (peek() == '/' || peek() == '*'))
{
skipComment();
continue;
}

if (std::isalpha(current_char_) || current_char_ == '_')
{
return _id();
Expand Down
14 changes: 5 additions & 9 deletions src/lexer/lexer.h
Original file line number Diff line number Diff line change
@@ -1,31 +1,27 @@
#pragma once
#include "src/preprocessor/preprocessor.h"
#include "src/lexer/token.h"
#include "src/lexer/lexical_error.h"
#include <string>
#include <istream>

class Lexer
{
// Reference member, stream has no copy constructor and lexer doesn't care what kind of stream it is (file or string)
std::istream &stream_;
// Reference member, no need to copy the supplied preprocessor
Preprocessor &preproc_;

char current_char_;
// Current position lexer has reached in the text
// Used to give a position to errors
int lineno_ = 1;
int column_ = 1;
PosChar current_char_;

void error(Token, std::string);
char peek();
void advance();
void skipWhitespace();
void skipComment();
Token _id();
Token number();
Token string();
Token makeToken(TokenType, std::string);

public:
Lexer(std::istream&);
Lexer(Preprocessor&);
Token nextToken();
};
4 changes: 3 additions & 1 deletion src/main/sqwhiff.cpp
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
#include "src/preprocessor/preprocessor.h"
#include "src/lexer/lexer.h"
#include "src/parser/parser.h"
#include "src/analyzer/analyzer.h"
Expand All @@ -10,7 +11,8 @@ int main()

if (file_in.is_open())
{
Lexer lex(file_in);
Preprocessor preproc(file_in);
Lexer lex(preproc);
Parser p(lex);
Analyzer a(p);
a.analyze(std::cout);
Expand Down
6 changes: 6 additions & 0 deletions src/preprocessor/BUILD
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
cc_library(
name = "sqf-preprocessor",
srcs = ["preprocessor.cpp"],
hdrs = ["pos_char.h", "preprocessing_error.h", "preprocessor.h"],
visibility = ["//src/lexer:__pkg__"],
)
15 changes: 15 additions & 0 deletions src/preprocessor/pos_char.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
#pragma once

// Character with an associated file position (may not correspond to physical position because of macros)
struct PosChar
{
char c = '\0';
int line = 1;
int column = 1;

// Convenience conversion operator
operator char() const { return c; }
};

// Using a typedef prevents naming clashes in global name space
typedef PosChar PosChar;
9 changes: 9 additions & 0 deletions src/preprocessor/preprocessing_error.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
#pragma once
#include <string>
#include <stdexcept>

class PreprocessingError : public std::runtime_error
{
public:
PreprocessingError(int line, int col, std::string msg) : std::runtime_error(std::to_string(line) + ":" + std::to_string(col) + " PreprocessingError: " + msg) {}
};
91 changes: 91 additions & 0 deletions src/preprocessor/preprocessor.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
#include "src/preprocessor/preprocessor.h"
#include <string>
#include <istream>
#include <algorithm>
#include <cctype>

Preprocessor::Preprocessor(std::istream &to_read) : stream_(to_read)
{
// Immediately read in the first character
advance();
}

void Preprocessor::error(PosChar p, std::string msg)
{
throw PreprocessingError(p.line, p.column, msg);
}

void Preprocessor::advance()
{
// Increment the line whenever a newline is passed
if (current_char_ == '\n')
{
lineno_++;
column_ = 0;
line_start_ = true;
}
else if (line_start_ && !std::isspace(current_char_))
{
line_start_ = false;
}

stream_.get(current_char_);

// When end of stream is reached return EOF character
if (stream_.eof())
{
current_char_ = '\0';
}
else
{
column_++;
}
}

void Preprocessor::skipComment()
{
if (stream_.peek() == '/')
{
// Intentionally don't skip the newline at the end (acts as a delimiter)
while (current_char_ != '\0' && current_char_ != '\n')
{
advance();
}
}
else if (stream_.peek() == '*')
{
while (current_char_ != '\0' && !(current_char_ == '*' && peek() == '/'))
{
advance();
}

// Skip past the block end: */
advance();
advance();
}
}

PosChar Preprocessor::get()
{
// Comments are irrelevant (block and line)
if (current_char_ == '/' && (stream_.peek() == '/' || stream_.peek() == '*'))
{
skipComment();
}

PosChar c;
c.line = lineno_;
c.column = column_;
c.c = current_char_;

// Remember to actually progress through the input
advance();

return c;
}

// Preview the next character in order to differentiate tokens that start the same
char Preprocessor::peek()
{
return stream_.peek();
}
29 changes: 29 additions & 0 deletions src/preprocessor/preprocessor.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
#pragma once
#include "src/preprocessor/pos_char.h"
#include "src/preprocessor/preprocessing_error.h"
#include <string>
#include <istream>

class Preprocessor
{
// Reference member for polymorphism
std::istream &stream_;

char current_char_ = '\0';
// Current physical position preprocessor has reached
// Used to give a position to errors and macros
int lineno_ = 1;
int column_ = 1;

// Preprocessor directives must appear at a line start (ignoring whitespace)
bool line_start_ = true;

void error(PosChar, std::string);
void advance();
void skipComment();

public:
Preprocessor(std::istream&);
PosChar get();
char peek();
};
8 changes: 8 additions & 0 deletions test/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -42,9 +42,17 @@ cc_test(
srcs = ["parser_private_test.cpp"],
deps = [":sqf-ast-tester", "@com_google_googletest//:gtest_main"],
)

cc_test(
name = "parser-program",
size = "small",
srcs = ["parser_program_test.cpp"],
deps = [":sqf-ast-tester", "@com_google_googletest//:gtest_main"],
)

cc_test(
name = "preprocessor-comments",
size = "small",
srcs = ["preprocessor_comments_test.cpp"],
deps = [":sqf-ast-tester", "@com_google_googletest//:gtest_main"],
)
Loading

0 comments on commit fba0d9a

Please sign in to comment.