Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add a lexer for ARM assembly files #1057

Merged
merged 21 commits into from Jul 30, 2019
Merged
Show file tree
Hide file tree
Changes from 18 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
12 changes: 12 additions & 0 deletions lib/rouge/demos/armasm
@@ -0,0 +1,12 @@
GET common.s

RetVal * 0x123 :SHL: 4

AREA |Area$$Name|, CODE, READONLY

MyFunction ROUT ; This is a comment
ASSERT RetVal <> 0
1 MOVW r0, #RetVal
BX lr

END
143 changes: 143 additions & 0 deletions lib/rouge/lexers/armasm.rb
@@ -0,0 +1,143 @@
# -*- coding: utf-8 -*- #
# frozen_string_literal: true

module Rouge
module Lexers
class ArmAsm < RegexLexer
title "ArmAsm"
desc "Arm assembly syntax"
tag 'armasm'
filenames '*.s'

def self.preproc_keyword
@preproc_keyword ||= %w(
define elif else endif error if ifdef ifndef include line pragma undef warning
)
end

def self.file_directive
@file_directive ||= %w(
BIN GET INCBIN INCLUDE LNK
)
end

def self.general_directive
@general_directive ||= %w(
ALIAS ALIGN AOF AOUT AREA ARM ASSERT ATTR CN CODE16 CODE32 COMMON CP
DATA DCB DCD DCDO DCDU DCFD DCFDU DCFH DCFHU DCFS DCFSU DCI DCI.N DCI.W
DCQ DCQU DCW DCWU DN ELIF ELSE END ENDFUNC ENDIF ENDP ENTRY EQU EXPORT
EXPORTAS EXTERN FIELD FILL FN FRAME FUNCTION GBLA GBLL GBLS GLOBAL IF
IMPORT INFO KEEP LCLA LCLL LCLS LEADR LEAF LTORG MACRO MAP MEND MEXIT
NOFP OPT ORG PRESERVE8 PROC QN RELOC REQUIRE REQUIRE8 RLIST RN ROUT
SETA SETL SETS SN SPACE STRONG SUBT THUMB THUMBX TTL WEND WHILE
\[ \] [|!#*=%&^]
)
end

def self.shift_or_condition
@shift_or_condition ||= %w(
ASR LSL LSR ROR RRX AL CC CS EQ GE GT HI HS LE LO LS LT MI NE PL VC VS
asr lsl lsr ror rrx al cc cs eq ge gt hi hs le lo ls lt mi ne pl vc vs
)
end

def self.builtin
@builtin ||= %w(
ARCHITECTURE AREANAME ARMASM_VERSION CODESIZE COMMANDLINE CONFIG CPU
ENDIAN FALSE FPIC FPU INPUTFILE INTER LINENUM LINENUMUP LINENUMUPPER
OBJASM_VERSION OPT PC PCSTOREOFFSET REENTRANT ROPI RWPI TRUE VAR
)
end

def self.operator
@operator ||= %w(
AND BASE CC CC_ENCODING CHR DEF EOR FATTR FEXEC FLOAD FSIZE INDEX LAND
LEFT LEN LEOR LNOT LOR LOWERCASE MOD NOT OR RCONST REVERSE_CC RIGHT ROL
ROR SHL SHR STR TARGET_ARCH_[0-9A-Z_]+ TARGET_FEATURE_[0-9A-Z_]+
TARGET_FPU_[A-Z_] TARGET_PROFILE_[ARM] UAL UPPERCASE
)
end

state :root do
rule %r/\n/, Text
rule %r/^[ \t]*#[ \t]*(?:(?:#{ArmAsm.preproc_keyword.join('|')})[ \t].*)?\n/, Comment::Preproc
rule %r/[ \t]+/, Text, :command
rule %r/;.*/, Comment
rule %r/\$[a-z_]\w*\.?/i, Name::Namespace # variable substitution or macro argument
rule %r/\w+|\|[^|\n]+\|/, Name::Label
end

state :command do
rule %r/\n/, Text, :pop!
rule %r/[ \t]+/ do |m|
token Text
goto :args
end
rule %r/;.*/, Comment, :pop!
rule %r/(?:#{ArmAsm.file_directive.join('|')})\b/ do |m|
token Keyword
goto :filespec
end
rule %r/(?:#{ArmAsm.general_directive.join('|')})(?=[; \t\n])/, Keyword
rule %r/(?:[A-Z][\dA-Z]*|[a-z][\da-z]*)(?:\.[NWnw])?(?:\.[DFIPSUdfipsu]?(?:8|16|32|64)?){,3}\b/, Name::Builtin # rather than attempt to list all opcodes, rely on all-uppercase or all-lowercase rule
rule %r/[a-z_]\w*|\|[^|\n]+\|/i, Name::Function # probably a macro name
rule %r/\$[a-z]\w*\.?/i, Name::Namespace
end

state :args do
rule %r/\n/, Text, :pop!
rule %r/[ \t]+/, Text
rule %r/;.*/, Comment, :pop!
rule %r/(?:#{ArmAsm.shift_or_condition.join('|')})\b/, Name::Builtin
rule %r/[a-z_]\w*|\|[^|\n]+\|/i, Name::Variable # various types of symbol
rule %r/%[bf]?[at]?\d+(?:[a-z_]\w*)?/i, Name::Label
rule %r/(?:&|0x)\h+(?![\hp])/i, Literal::Number::Hex
pyrmont marked this conversation as resolved.
Show resolved Hide resolved
rule %r/(?:&|0x)[.\h]+(?:p[-+]?\d+)?/i, Literal::Number::Float
rule %r/0f_\h{8}|0d_\h{16}/i, Literal::Number::Float
rule %r/(?:2_[01]+|3_[0-2]+|4_[0-3]+|5_[0-4]+|6_[0-5]+|7_[0-6]+|8_[0-7]+|9_[0-8]+|\d+)(?![\de])/i, Literal::Number::Integer
pyrmont marked this conversation as resolved.
Show resolved Hide resolved
rule %r/(?:2_[.01]+|3_[.0-2]+|4_[.0-3]+|5_[.0-4]+|6_[.0-5]+|7_[.0-6]+|8_[.0-7]+|9_[.0-8]+|[.\d]+)(?:e[-+]?\d+)?/i, Literal::Number::Float
rule %r/[@:](?=[ \t]*(?:8|16|32|64|128|256)[^\d])/, Operator
rule %r/[.@]|\{(?:#{ArmAsm.builtin.join('|')})\}/, Name::Constant
rule %r/[-!#%&()*+,\/<=>?^{|}]|\[|\]|!=|&&|\/=|<<|<=|<>|==|><|>=|>>|\|\||:(?:#{ArmAsm.operator.join('|')}):/, Operator
rule %r/\$[a-z]\w*\.?/i, Name::Namespace
rule %r/'/ do |m|
token Literal::String::Char
goto :singlequoted
end
rule %r/"/ do |m|
token Literal::String::Double
goto :doublequoted
end
end

state :singlequoted do
rule %r/\n/, Text, :pop!
rule %r/\$\$/, Literal::String::Char
rule %r/\$[a-z]\w*\.?/i, Name::Namespace
rule %r/'/ do |m|
token Literal::String::Char
goto :args
end
rule %r/[^$'\n]+/, Literal::String::Char
end

state :doublequoted do
rule %r/\n/, Text, :pop!
rule %r/\$\$/, Literal::String::Double
rule %r/\$[a-z]\w*\.?/i, Name::Namespace
rule %r/"/ do |m|
token Literal::String::Double
goto :args
end
rule %r/[^$"\n]+/, Literal::String::Double
end

state :filespec do
rule %r/\n/, Text, :pop!
rule %r/\$\$/, Literal::String::Other
rule %r/\$[a-z]\w*\.?/i, Name::Namespace
rule %r/[^$\n]+/, Literal::String::Other
end
end
end
end
14 changes: 14 additions & 0 deletions spec/lexers/armasm_spec.rb
@@ -0,0 +1,14 @@
# -*- coding: utf-8 -*- #
# frozen_string_literal: true

describe Rouge::Lexers::ArmAsm do
let(:subject) { Rouge::Lexers::ArmAsm.new }

describe 'guessing' do
include Support::Guessing

it 'guesses by filename' do
assert_guess :filename => 'foo.s'
end
end
end
86 changes: 86 additions & 0 deletions spec/visual/samples/armasm
@@ -0,0 +1,86 @@
; it shouldn't have any problem with apostrophes in comments
; or "quotation marks" or more ;s

GET otherfile.s

^ 0,r12 ; typical data structure layout
offset1 # 4
# 4 ; sometimes no label assigned
offset2 # 0
size * :INDEX:@

IF ?offset1 = 4
! 1, "This is an assembly-time assert"
ELIF -1<>:NOT:0
! 1, "This is another assert"
ELSE
! 0, "But this is a warning", 1
ENDIF

my_r0 RN 0 ; non-standard register name symbols

sixteen * 4 * 4

GBLS VBar
VBar SETS "|"

MACRO
$label MyMacro$suffix $arg1 = default, $arg2
LCLS thing
[ "$arg2"=""
thing SETS "wibble ; this isn't a comment"
|
thing SETS $arg2
]
LCLL boolean
boolean SETL {TRUE}
EXPORT $arg1
$arg1
$thing MOV pc, #0
MEND


ORG 0

ARM

AREA |Area$$Name|, CODE, READONLY

MyFunc ROUT

just_a_label
label_and ; comment
LDR my_r0, =just_a_statement
label AND r0, r1, r2 ; and a comment
MOV r0, r1, r2, LSL r3
MOV.W r0, r1, r2, ROR #1
ADDS.N r0, r1
IT MI
MOVMI r0, r1, RRX
UND #0x10-&10
PUSH {r0,r2-r4,ip,lr}
LDF F0, =25E-1
VMOVEQ.F64 d0, #-.7e1

Symbols_may_contain_d161ts_and_underscores
but_must_start_with_a_letter
and_are_case_SenSitIve
01 ; this, by contrast, is a local label

LDR r0, here
B .+8
here DATA
DCD 1
BNE %BT01

DCB 1,2,3,'A',';','"' ; bytes
= "This is a string with embedded $$ dollar and "" double quote characters", 0
= "$VBar.not_part_of_variable_name", 0
DCD -1 ; words
& -2,:INDEX:offset1 ; more words
% 16
same_as SPACE 16
or FILL 16, 0
INCBIN include.bin

END