diff --git a/lib/rouge/lexers/nasm.rb b/lib/rouge/lexers/nasm.rb index 2ebd975d71..cbf491ec9b 100644 --- a/lib/rouge/lexers/nasm.rb +++ b/lib/rouge/lexers/nasm.rb @@ -1,197 +1,71 @@ # -*- coding: utf-8 -*- # # frozen_string_literal: true +# Based on Chroma's NASM lexer implementation +# https://github.com/alecthomas/chroma/blob/498eaa690f5ac6ab0e3d6f46237e547a8935cdc7/lexers/n/nasm.go module Rouge module Lexers class Nasm < RegexLexer - tag 'nasm' - filenames '*.asm' - #mimetypes 'text/x-chdr', 'text/x-csrc' - title "Nasm" desc "Netwide Assembler" - id = /[a-zA-Z_][a-zA-Z0-9_]*/ - - #todo: pull more instructions from: http://www.nasm.us/doc/nasmdocb.html - #so far, we have sections 1.1 and 1.2 - - def self.keywords - @keywords ||= Set.new %w( - aaa aad aam aas adc add and arpl bb0_reset bb1_reset bound bsf bsr bswap bt btc btr bts - call cbw cdq cdqe clc cld cli clts cmc cmp cmpsb cmpsd cmpsq cmpsw cmpxchg - cmpxchg16b cmpxchg486 cmpxchg8b cpuid cpu_read cpu_write cqo cwd cwde daa das dec div - dmint emms enter equ f2xm1 fabs fadd faddp fbld fbstp fchs fclex fcmovb fcmovbe fcmove - fcmovnb fcmovnbe fcmovne fcmovnu fcmovu fcom fcomi fcomip fcomp fcompp fcos fdecstp - fdisi fdiv fdivp fdivr fdivrp femms feni ffree ffreep fiadd ficom ficomp fidiv fidivr - fild fimul fincstp finit fist fistp fisttp fisub fisubr fld fld1 fldcw fldenv fldl2e - fldl2t fldlg2 fldln2 fldpi fldz fmul fmulp fnclex fndisi fneni fninit fnop fnsave fnstcw - fnstenv fnstsw fpatan fprem fprem1 fptan frndint frstor fsave fscale fsetpm fsin fsincos - fsqrt fst fstcw fstenv fstp fstsw fsub fsubp fsubr fsubrp ftst fucom fucomi fucomip - fucomp fucompp fwait fxam fxch fxtract fyl2x fyl2xp1 hlt ibts icebp idiv imul in inc - incbin insb insd insw int int01 int03 int1 int3 into invd invlpg invlpga invpcid iret - iretd iretq iretw jcxz jecxz jmp jmpe jrcxz lahf lar lds lea leave les lfence lfs - lgdt lgs lidt lldt lmsw loadall loadall286 lodsb lodsd lodsq lodsw loop loope loopne - loopnz loopz lsl lss ltr mfence monitor monitorx mov movd movq movsb movsd movsq movsw - movsx movsxd movzx mul mwait mwaitx neg nop not or out outsb outsd outsw packssdw - packsswb packuswb paddb paddd paddsb paddsiw paddsw paddusb paddusw paddw pand pandn - pause paveb pavgusb pcmpeqb pcmpeqd pcmpeqw pcmpgtb pcmpgtd pcmpgtw pdistib pf2id pfacc - pfadd pfcmpeq pfcmpge pfcmpgt pfmax pfmin pfmul pfrcp pfrcpit1 pfrcpit2 pfrsqit1 pfrsqrt - pfsub pfsubr pi2fd pmachriw pmaddwd pmagw pmulhriw pmulhrwa pmulhrwc pmulhw pmullw - pmvgezb pmvlzb pmvnzb pmvzb pop popa popad popaw popf popfd popfq popfw por prefetch - prefetchw pslld psllq psllw psrad psraw psrld psrlq psrlw psubb psubd psubsb psubsiw - psubsw psubusb psubusw psubw punpckhbw punpckhdq punpckhwd punpcklbw punpckldq punpcklwd - push pusha pushad pushaw pushf pushfd pushfq pushfw pxor rcl rcr rdm rdmsr rdpmc rdshr - rdtsc rdtscp ret retf retn rol ror rsdc rsldt rsm rsts sahf sal salc sar sbb scasb scasd - scasq scasw sfence sgdt shl shld shr shrd sidt skinit sldt smi smint smintold smsw - stc std sti stosb stosd stosq stosw str sub svdc svldt svts swapgs syscall sysenter - sysexit sysret test ud0 ud1 ud2 ud2a ud2b umov verr verw wbinvd wrmsr wrshr xadd xbts - xchg xlat xlatb xor - - cmova cmovae cmovb cmovbe cmovc cmove cmovg cmovge cmovl cmovle cmovna cmovnae cmovnb cmovnbe cmovnc cmovne cmovng cmovnge cmovnl cmovnle cmovno cmovnp cmovns cmovnz cmovo cmovp cmovpe cmovpo cmovs cmovz - - ja jae jb jbe jc jcxz jecxz je jg jge jl jle jna jnae jnb jnbe jnc jne jng jnge jnl jnle jno jnp jns jnz jo jp jpe jpo js jz - - seta setae setb setbe setc sete setg setge setl setle setna setnae setnb setnbe setnc setne setng setnge setnl setnle setno setnp setns setnz seto setp setpe setpo sets setz - - AAA AAD AAM AAS ADC ADD AND ARPL BB0_RESET BB1_RESET BOUND BSF BSR BSWAP BT BTC BTR BTS - CALL CBW CDQ CDQE CLC CLD CLI CLTS CMC CMP CMPSB CMPSD CMPSQ CMPSW CMPXCHG - CMPXCHG16B CMPXCHG486 CMPXCHG8B CPUID CPU_READ CPU_WRITE CQO CWD CWDE DAA DAS DEC DIV - DMINT EMMS ENTER EQU F2XM1 FABS FADD FADDP FBLD FBSTP FCHS FCLEX FCMOVB FCMOVBE FCMOVE - FCMOVNB FCMOVNBE FCMOVNE FCMOVNU FCMOVU FCOM FCOMI FCOMIP FCOMP FCOMPP FCOS FDECSTP - FDISI FDIV FDIVP FDIVR FDIVRP FEMMS FENI FFREE FFREEP FIADD FICOM FICOMP FIDIV FIDIVR - FILD FIMUL FINCSTP FINIT FIST FISTP FISTTP FISUB FISUBR FLD FLD1 FLDCW FLDENV FLDL2E - FLDL2T FLDLG2 FLDLN2 FLDPI FLDZ FMUL FMULP FNCLEX FNDISI FNENI FNINIT FNOP FNSAVE FNSTCW - FNSTENV FNSTSW FPATAN FPREM FPREM1 FPTAN FRNDINT FRSTOR FSAVE FSCALE FSETPM FSIN FSINCOS - FSQRT FST FSTCW FSTENV FSTP FSTSW FSUB FSUBP FSUBR FSUBRP FTST FUCOM FUCOMI FUCOMIP - FUCOMP FUCOMPP FWAIT FXAM FXCH FXTRACT FYL2X FYL2XP1 HLT IBTS ICEBP IDIV IMUL IN INC - INCBIN INSB INSD INSW INT INT01 INT03 INT1 INT3 INTO INVD INVLPG INVLPGA INVPCID IRET - IRETD IRETQ IRETW JCXZ JECXZ JMP JMPE JRCXZ LAHF LAR LDS LEA LEAVE LES LFENCE LFS - LGDT LGS LIDT LLDT LMSW LOADALL LOADALL286 LODSB LODSD LODSQ LODSW LOOP LOOPE LOOPNE - LOOPNZ LOOPZ LSL LSS LTR MFENCE MONITOR MONITORX MOV MOVD MOVQ MOVSB MOVSD MOVSQ MOVSW - MOVSX MOVSXD MOVZX MUL MWAIT MWAITX NEG NOP NOT OR OUT OUTSB OUTSD OUTSW PACKSSDW - PACKSSWB PACKUSWB PADDB PADDD PADDSB PADDSIW PADDSW PADDUSB PADDUSW PADDW PAND PANDN - PAUSE PAVEB PAVGUSB PCMPEQB PCMPEQD PCMPEQW PCMPGTB PCMPGTD PCMPGTW PDISTIB PF2ID PFACC - PFADD PFCMPEQ PFCMPGE PFCMPGT PFMAX PFMIN PFMUL PFRCP PFRCPIT1 PFRCPIT2 PFRSQIT1 PFRSQRT - PFSUB PFSUBR PI2FD PMACHRIW PMADDWD PMAGW PMULHRIW PMULHRWA PMULHRWC PMULHW PMULLW - PMVGEZB PMVLZB PMVNZB PMVZB POP POPA POPAD POPAW POPF POPFD POPFQ POPFW POR PREFETCH - PREFETCHW PSLLD PSLLQ PSLLW PSRAD PSRAW PSRLD PSRLQ PSRLW PSUBB PSUBD PSUBSB PSUBSIW - PSUBSW PSUBUSB PSUBUSW PSUBW PUNPCKHBW PUNPCKHDQ PUNPCKHWD PUNPCKLBW PUNPCKLDQ PUNPCKLWD - PUSH PUSHA PUSHAD PUSHAW PUSHF PUSHFD PUSHFQ PUSHFW PXOR RCL RCR RDM RDMSR RDPMC RDSHR - RDTSC RDTSCP RET RETF RETN ROL ROR RSDC RSLDT RSM RSTS SAHF SAL SALC SAR SBB SCASB SCASD - SCASQ SCASW SFENCE SGDT SHL SHLD SHR SHRD SIDT SKINIT SLDT SMI SMINT SMINTOLD SMSW - STC STD STI STOSB STOSD STOSQ STOSW STR SUB SVDC SVLDT SVTS SWAPGS SYSCALL SYSENTER - SYSEXIT SYSRET TEST UD0 UD1 UD2 UD2A UD2B UMOV VERR VERW WBINVD WRMSR WRSHR XADD XBTS - XCHG XLAT XLATB XOR - - CMOVA CMOVAE CMOVB CMOVBE CMOVC CMOVE CMOVG CMOVGE CMOVL CMOVLE CMOVNA CMOVNAE CMOVNB CMOVNBE CMOVNC CMOVNE CMOVNG CMOVNGE CMOVNL CMOVNLE CMOVNO CMOVNP CMOVNS CMOVNZ CMOVO CMOVP CMOVPE CMOVPO CMOVS CMOVZ + tag 'nasm' + filenames '*.asm' + mimetypes 'text/x-nasm' - JA JAE JB JBE JC JCXZ JECXZ JE JG JGE JL JLE JNA JNAE JNB JNBE JNC JNE JNG JNGE JNL JNLE JNO JNP JNS JNZ JO JP JPE JPO JS JZ + state :root do + rule %r/^\s*%/, Comment::Preproc, :preproc - SETA SETAE SETB SETBE SETC SETE SETG SETGE SETL SETLE SETNA SETNAE SETNB SETNBE SETNC SETNE SETNG SETNGE SETNL SETNLE SETNO SETNP SETNS SETNZ SETO SETP SETPE SETPO SETS SETZ - ) - end + mixin :whitespace - def self.keywords_type - @keywords_type ||= Set.new %w( - DB DW DD DQ DT DO DY DZ RESB RESW RESD RESQ REST RESO RESY RESZ - db dq dd dq dt do dy dz resb resw resd resq rest reso resy resz - ) - end + rule %r/[a-z$._?][\w$.?#@~]*:/i, Name::Label - def self.reserved - @reserved ||= Set.new %w( - global extern macro endmacro assign rep endrep section - GLOBAL EXTERN MACRO ENDMACRO ASSIGN REP ENDREP SECTION - ) - end + rule %r/([a-z$._?][\w$.?#@~]*)(\s+)(equ)/i do + groups Name::Constant, Keyword::Declaration, Keyword::Declaration + push :instruction_args + end + rule %r/BITS|USE16|USE32|SECTION|SEGMENT|ABSOLUTE|EXTERN|GLOBAL|ORG|ALIGN|STRUC|ENDSTRUC|COMMON|CPU|GROUP|UPPERCASE|IMPORT|EXPORT|LIBRARY|MODULE/, Keyword, :instruction_args + rule %r/(?:res|d)[bwdqt]|times/i, Keyword::Declaration, :instruction_args + rule %r/[a-z$._?][\w$.?#@~]*/i, Name::Function, :instruction_args - def self.builtins - @builtins ||= [] + rule %r/[\r\n]+/, Text end - start { push :expr_bol } - - state :expr_bol do - mixin :inline_whitespace - rule(//) { pop! } - end + state :instruction_args do + rule %r/"(\\\\"|[^"\\n])*"|'(\\\\'|[^'\\n])*'|`(\\\\`|[^`\\n])*`/, Str + rule %r/(?:0x[\da-f]+|$0[\da-f]*|\d+[\da-f]*h)/i, Num::Hex + rule %r/[0-7]+q/i, Num::Oct + rule %r/[01]+b/i, Num::Bin + rule %r/\d+\.e?\d+/i, Num::Float + rule %r/\d+/, Num::Integer - state :inline_whitespace do - rule %r/[ \t\r]+/, Text - end + mixin :punctuation - state :whitespace do - rule %r/\n+/m, Text, :expr_bol - rule %r(//(\\.|.)*?\n), Comment::Single, :expr_bol - mixin :inline_whitespace - end + rule %r/r\d[0-5]?[bwd]|[a-d][lh]|[er]?[a-d]x|[er]?[sb]p|[er]?[sd]i|[c-gs]s|st[0-7]|mm[0-7]|cr[0-4]|dr[0-367]|tr[3-7]/i, Name::Builtin + rule %r/[a-z$._?][\w$.?#@~]*/i, Name::Variable + rule %r/[\r\n]+/, Text, :pop! - state :expr_whitespace do - rule %r/\n+/m, Text, :expr_bol mixin :whitespace end - state :root do - mixin :expr_whitespace - rule(//) { push :statement } - rule %r/^%[a-zA-Z0-9]+/, Comment::Preproc, :statement - - rule( - %r(&=|[*]=|/=|\\=|\^=|\+=|-=|<<=|>>=|<<|>>|:=|<=|>=|<>|[-&*/\\^+=<>.]), - Operator - ) - rule %r/;.*/, Comment, :statement - rule %r/^[a-zA-Z]+[a-zA-Z0-9]*:/, Name::Function - rule %r/;.*/, Comment + state :preproc do + rule %r/[^;\n]+/, Comment::Preproc + rule %r/;.*?\n/, Comment::Single, :pop! + rule %r/\n/, Comment::Preproc, :pop! end - state :statement do - mixin :expr_whitespace - mixin :statements - rule %r/;.*/, Comment - rule %r/^%[a-zA-Z0-9]+/, Comment::Preproc - rule %r/[a-zA-Z]+%[0-9]+:/, Name::Function - end - - state :statements do - mixin :whitespace - rule %r/L?"/, Str, :string - rule %r/[a-zA-Z]+%[0-9]+:/, Name::Function #labels/subroutines/functions - rule %r(L?'(\\.|\\[0-7]{1,3}|\\x[a-f0-9]{1,2}|[^\\'\n])')i, Str::Char - rule %r/0x[0-9a-f]+[lu]*/i, Num::Hex - rule %r/\d+[lu]*/i, Num::Integer - rule %r(\*/), Error - rule %r([~&*+=\|?:<>/-]), Operator - rule %r/[(),.]/, Punctuation - rule %r/\[[a-zA-Z0-9]*\]/, Punctuation - rule %r/%[0-9]+/, Keyword::Reserved - rule %r/[a-zA-Z]+%[0-9]+/, Name::Function #labels/subroutines/functions - - #rule %r/(?+*\/%~-]+/, Operator + rule %r/\$+/, Keyword::Constant + rule %r/seg|wrt|strict/i, Operator::Word + rule %r/byte|[dq]?word/i, Keyword::Type end end end diff --git a/spec/visual/samples/nasm b/spec/visual/samples/nasm index 686dfd1b88..4d8e760082 100644 --- a/spec/visual/samples/nasm +++ b/spec/visual/samples/nasm @@ -52,3 +52,9 @@ irq_common_stub: IRQ i, i+32 %assign i i+1 %endrep + +mov eax, [eax] +mov eax, [eax + ebx] +mov eax, [eax + ebx*2] +mov eax, [eax + 10] +mov eax, [eax + ebx*2 + 10]