Merge branch 'timrach-algebra-issue#7'

nicolewhite · Aug 19, 2015 · 57f9797 · 57f9797
2 parents 23e661b + 8353132
commit 57f9797
Show file tree

Hide file tree

Showing 4 changed files with 505 additions and 2 deletions.
diff --git a/algebra.js b/algebra.js
@@ -1,9 +1,19 @@
 var Fraction = require('./src/fractions');
 var Expression = require('./src/expressions').Expression;
 var Equation = require('./src/equations');
+var Parser = require('./src/parser');
+
+
+
+var parse = function(input){
+	var parser = new Parser();
+	var result = parser.parse(input);
+	return result;
+}
 
 module.exports = {
     Fraction: Fraction,
     Expression: Expression,
-    Equation: Equation
-};
+    Equation: Equation,
+    parse: parse
+};
diff --git a/src/lexer.js b/src/lexer.js
@@ -0,0 +1,146 @@
+'use strict';
+
+/*
+  The lexer module is a slightly modified version of the handwritten lexer by Eli Bendersky.
+  The parts not needed like comments and quotes were deleted and some things modified.
+  Comments are left unchanged, the original lexer can be found here:
+  http://eli.thegreenplace.net/2013/07/16/hand-written-lexer-in-javascript-compared-to-the-regex-based-ones
+*/
+
+var Lexer = function() {
+  this.pos = 0;
+  this.buf = null;
+  this.buflen = 0;
+
+  // Operator table, mapping operator -> token name
+  this.optable = {
+    '+':  'PLUS',
+    '-':  'MINUS',
+    '*':  'MULTIPLY',
+    '/':  'DIVIDE',
+    '^':  'POWER',
+    '(':  'L_PAREN',
+    ')':  'R_PAREN',
+    '=':  'EQUALS'
+  };
+};
+
+// Initialize the Lexer's buffer. This resets the lexer's internal
+// state and subsequent tokens will be returned starting with the
+// beginning of the new buffer.
+Lexer.prototype.input = function(buf) {
+  this.pos = 0;
+  this.buf = buf;
+  this.buflen = buf.length;
+};
+
+// Get the next token from the current buffer. A token is an object with
+// the following properties:
+// - type: name of the pattern that this token matched (taken from rules).
+// - value: actual string value of the token.
+// - pos: offset in the current buffer where the token starts.
+//
+// If there are no more tokens in the buffer, returns null. In case of
+// an error throws Error.
+Lexer.prototype.token = function() {
+  this._skipnontokens();
+  if (this.pos >= this.buflen) {
+    return null;
+  }
+
+  // The char at this.pos is part of a real token. Figure out which.
+  var c = this.buf.charAt(this.pos);
+   // Look it up in the table of operators
+  var op = this.optable[c];
+  if (op !== undefined) {
+    if(op === 'L_PAREN' || op === 'R_PAREN'){
+       return {type: 'PAREN', value: op, pos: this.pos++};  
+    }else{
+      return {type: 'OPERATOR', value: op, pos: this.pos++};  
+    }
+  } else {
+    // Not an operator - so it's the beginning of another token.
+    if (Lexer._isalpha(c)) {
+      return this._process_identifier();
+    } else if (Lexer._isdigit(c)) {
+      return this._process_number();
+    } else {
+      throw new Error('Token error at character ' + c + ' at position ' + this.pos);
+    }
+  }
+};
+
+Lexer._isdigit = function(c) {
+  return c >= '0' && c <= '9';
+};
+
+Lexer._isalpha = function(c) {
+  return (c >= 'a' && c <= 'z') ||
+         (c >= 'A' && c <= 'Z');
+};
+
+Lexer._isalphanum = function(c) {
+  return (c >= 'a' && c <= 'z') ||
+         (c >= 'A' && c <= 'Z') ||
+         (c >= '0' && c <= '9');
+};
+
+Lexer.prototype._process_digits = function(position){
+  var endpos = position;
+  while (endpos < this.buflen &&
+        (Lexer._isdigit(this.buf.charAt(endpos)))){
+    endpos++;
+  }
+  return endpos
+}
+
+Lexer.prototype._process_number = function() {
+  //Read characters until a non-digit character appears
+  var endpos = this._process_digits(this.pos);
+  //If it's a decimal point, continue to read digits
+  if(this.buf.charAt(endpos) === '.'){
+    endpos = this._process_digits(endpos + 1);
+  }
+  //Check if the last read character is a decimal point.
+  //If it is, ignore it and proceed
+  if(this.buf.charAt(endpos-1) === '.'){
+    throw new Error("Decimal point without decimal digits at position " + (endpos-1));
+  } 
+  //construct the NUMBER token
+  var tok = {
+    type: 'NUMBER',
+    value: this.buf.substring(this.pos, endpos),
+    pos: this.pos
+  };
+  this.pos = endpos;
+  return tok;
+};
+
+Lexer.prototype._process_identifier = function() {
+  var endpos = this.pos + 1;
+  while (endpos < this.buflen &&
+         Lexer._isalphanum(this.buf.charAt(endpos))) {
+    endpos++;
+  }
+
+  var tok = {
+    type: 'IDENTIFIER',
+    value: this.buf.substring(this.pos, endpos),
+    pos: this.pos
+  };
+  this.pos = endpos;
+  return tok;
+};
+
+Lexer.prototype._skipnontokens = function() {
+  while (this.pos < this.buflen) {
+    var c = this.buf.charAt(this.pos);
+    if (c == ' ' || c == '\t' || c == '\r' || c == '\n') {
+      this.pos++;
+    } else {
+      break;
+    }
+  }
+};
+
+module.exports = Lexer;
diff --git a/src/parser.js b/src/parser.js
@@ -0,0 +1,205 @@
+'use strict';
+
+var Lexer = require('./lexer'),
+    Expression = require('./expressions').Expression,
+    Equation = require('./equations');
+
+/*
+    This parser module uses the shunting yard algorithm to convert input strings
+    to algebraic expressions using the algebra.js module.
+*/
+var Parser = function() {
+    this.lexer = new Lexer();
+    this.current_token = null;
+    this.operator_stack = []; //The operator stack
+    this.output = []; //The output stack
+
+    //Operator precendence definitions
+    this.prec = {
+        'EQUALS' : 1,
+        'PLUS' : 2,
+        'MINUS' : 2,
+        'MULTIPLY': 3,
+        'DIVIDE':3,
+        'POWER': 4
+    };
+
+    //Operator associativity definitions
+    this.asso = {
+        'PLUS' : 'LEFT',
+        'MINUS' : 'LEFT',
+        'MULTIPLY': 'LEFT',
+        'DIVIDE':'LEFT',
+        'POWER': 'RIGHT',
+        'EQUALS' : 'RIGHT'
+    };
+};
+
+
+/*
+    Initializes the parser internals and the lexer.
+    The input is then parsed using the shunting yard algorithm
+    and the expression tree is constructed and returned as the result
+*/
+Parser.prototype.parse = function(input) {
+    this.operator_stack = []; // empty the operator stack
+    this.output = []; //empty the output stack
+    //pass the input to the lexer
+    this.lexer.input(input);
+    //perform shunting yard algorithm
+    this.shunting_yard();
+    //construct the expression tree
+    return this.construct_expression();
+};
+
+//Returns the stacks head
+Parser.prototype.stack_top = function() {
+    return this.operator_stack[this.operator_stack.length - 1];
+};
+
+//Moves the stacks head to the output
+Parser.prototype.stack_head_to_ouput = function() {
+    this.output.push(this.operator_stack.pop());
+};
+
+/*
+    The shunting yard algorithm according to the description on https://en.wikipedia.org/wiki/Shunting-yard_algorithm. Comments are taken from the description on the site.
+    This implementation ignores function and seperator tokens as they are not needed for the 
+    parser.
+*/
+Parser.prototype.shunting_yard = function() {
+    //Read the first token
+    this.current_token = this.lexer.token();
+    //While there are tokens to be read:
+    while(this.current_token !== null){
+        //If the token is a number, then add it to the output queue.
+        if(this.current_token.type === 'NUMBER' || this.current_token.type === 'IDENTIFIER'){
+            this.output.push(this.current_token);
+        //If the token is an operator, o1, then:
+        }else if (this.current_token.type ==='OPERATOR'){
+            var o1 = this.current_token;
+            //while there is an operator token, o2, at the top of the operator stack, and either
+            while(this.operator_stack.length > 0){
+                var o2 = this.stack_top();
+                //o1 is left-associative and its precedence is less than or equal to that of o2, or o1 is right associative, and has precedence less than that of o2,
+                if((this.asso[o1.value] === 'LEFT' && this.prec[o1.value] <= this.prec[o2.value])||
+                   (this.asso[o1.value] === 'RIGHT' && this.prec[o1.value] < this.prec[o2.value])){
+                    //then pop o2 off the operator stack, onto the output queue;
+                    this.stack_head_to_ouput();
+                }else{
+                    break;
+                }
+            }
+            //push o1 onto the operator stack.
+            this.operator_stack.push(o1);
+        }else {
+            //If the token is a left parenthesis (i.e. '('), then push it onto the stack.
+            if(this.current_token.value === 'L_PAREN'){
+                this.operator_stack.push(this.current_token);
+            //If the token is a right parenthesis (i.e. ')'):
+            }else{
+                //Until the token at the top of the stack is a left parenthesis, pop operators off the stack onto the output queue.
+                while(this.stack_top() !== undefined){
+                    if(this.stack_top().value === 'L_PAREN'){
+                        break;
+                    }else{
+                        this.stack_head_to_ouput();    
+                    }                    
+                }
+                //Pop the left parenthesis from the stack, but not onto the output queue.
+                var head = this.operator_stack.pop();
+                //If the stack runs out without finding a left parenthesis, then there are mismatched parentheses.
+                if(head === undefined){
+                    throw new Error('Unbalanced Parenthesis');
+                }
+            }
+        }
+        this.current_token = this.lexer.token();
+    }
+    //When there are no more tokens to read:
+    //While there are still operator tokens in the stack:
+    while(this.operator_stack.length > 0){
+        //If the operator token on the top of the stack is a parenthesis, then there are mismatched parentheses.
+        if(this.stack_top().type === 'PAREN'){
+            throw new Error('Unbalanced Parenthesis');
+        }else{
+            //Pop the operator onto the output queue.
+            this.stack_head_to_ouput();
+        }
+    }
+    //Exit.
+};
+
+//Converts the base types NUMBER and IDENTIFIER to an Expression.
+Parser.prototype.convert_for_application = function(operand) {
+    if(operand.type === 'NUMBER'){
+        //Integer conversion
+        if(parseInt(operand.value) == operand.value){
+            return new Expression(parseInt(operand.value));      
+        }else{
+            //Split the decimal number to integer and decimal parts
+            var splits = operand.value.split('.');
+            //count the digits of the decimal part
+            var decimals = splits[1].length;
+            //determine the multiplication factor
+            var factor = Math.pow(10,decimals);
+            var float_op = parseFloat(operand.value);
+            //multiply the float with the factor and divide it again afterwards 
+            //to create a valid expression object
+            return new Expression(parseInt(float_op * factor)).divide(factor);
+        }
+    } else {
+        return new Expression(operand.value);
+    }
+};
+
+/*  
+    Applies the specified operator to the specified operands.
+    op is always a token of type OPERATOR,
+    operands lhs and rhs can be tokens of type NUMBER or IDENTIFIER
+    or Expression objects 
+*/
+Parser.prototype.apply_operator = function(op, lhs, rhs) {
+    var result;  
+
+    //Apply the operator
+    switch(op.value){
+        case 'PLUS': result = lhs.add(rhs);break;
+        case 'MINUS': result = lhs.subtract(rhs);break; 
+        case 'MULTIPLY': result = lhs.multiply(rhs);break;
+        case 'DIVIDE': 
+            /*
+                Division is a bit special as the algebra.js module
+                only allows division by integers or Fractions, but not
+                Expressions. Therefore the rhs operand is always converted to an
+                integer.
+            */
+            result = lhs.divide(parseInt(rhs.toString()));
+            break;
+        // Power also doesn't accept expressions as rhs operand
+        case 'POWER': result = lhs.pow(parseInt(rhs.toString()));break;
+        case 'EQUALS' : result = new Equation(lhs,rhs);break;
+    }
+    return result;
+};
+
+
+/*
+    Recursively build the expression tree.
+*/
+Parser.prototype.construct_expression = function() {
+    //Read the stack head
+    var head = this.output.pop();
+    if(head === undefined) throw new Error("Missing operand")
+    //If its an operator, recursively construct the operands and apply the operator to construct the node
+    if(head.type === 'OPERATOR'){
+        var rhs = this.construct_expression();
+        var lhs = this.construct_expression();
+        return this.apply_operator(head, lhs,rhs);
+    }else{
+        //If it is not an operator, it can only be a number or a variable, which are leaves in the tree
+        return this.convert_for_application(head);
+    }
+};
+
+module.exports = Parser;