# EMV is primarily a compatibility standard, not security

- It is designed to allow terminals and cards work, even without fully understanding the data processed
- *Be conservative in what you do, be liberal in what you accept from others* (Postel's law)
- This has the potential to create security vulnerabilities, by increasing complexity and risking that important data will not be properly interpreted

# One way EMV achieves compatibility is through the TLV format
- Data is encoded as **T**ag, **L**ength, **V**alue
- Very efficient, compared to JSON or XML
- Tree structure can be decoded without knowing all tags
- Unknown tags can be ignored (for better or worse)
- `0x00` values are ignored between TLV items, allowing in-place deletion (historically `0xff` too)

# It is sometimes helpful to manually decode TLV data
- Data might be incomplete or corrupt
- You might have to write your own decoder (though I wouldn't recommend it)
- Doing things for yourself can help you find where others might have slipped up

In [1]:
## Some helpful utilities for processing hex data
from hexutils import *

In [2]:
## Convert hex to binary
to_bin('AA')

'10101010'

In [3]:
## Strip whitespace around and within hex
strip_bytes("12F\n  456 ")

'12f456'

In [4]:
## Split hex into bytes for display
split_bytes("12F\n  456 ")

'12 f4 56'

In [5]:
## Count how many bytes in a hex string
len_bytes("12F\n  456 ")

3

In [6]:
## Covert bytes to text (using ISO8859-1)
decode_bytes("12F\n  456 ")

'\x12ôV'

In [7]:
## Format a byte into a binary table
format_bytes('aa')

0,1,2,3,4,5,6,7,8
0xaa =,b8,b7,b6,b5,b4,b3,b2,b1
,1,0,1,0,1,0,1,0


In [8]:
## Split a byte into fields of specified length
format_bytes('aa', [2,1,0,5])

0,1,2,3,4,5,6,7,8
0xaa =,b8,b7,b6,b5,b4,b3,b2,b1
,1,0,-,-,-,-,-,-
,-,-,1,-,-,-,-,-
,-,-,-,-,-,-,-,-
,-,-,-,0,1,0,1,0


## Output of cardpeek log, requesting an EMV record
C:00B2021400:6C97:
C:00B2021497:9000:7081948C219F02069F03069F1A0295055F2A029A039C019F37049F35019F45029F4C089F34038D0C910A8A0295059F37049F4C089F08020002571352AAAAAAAAAAAA47D15122011407992700000F5F20134D5552444F43482F53544556454E204A2E44525F300202019F1F183134303739303030303030303030303932373030303030309F420208269F4401029F49039F37049F470103

In [9]:
## Response as a Python string
response="7081948C219F02069F03069F1A0295055F2A029A039C019F37049F35019F45029F4C089F34038D0C910A8A0295059F37049F4C089F08020002571352AAAAAAAAAAAA47D15122011407992700000F5F20134D5552444F43482F53544556454E204A2E44525F300202019F1F183134303739303030303030303030303932373030303030309F420208269F4401029F49039F37049F470103"

Response is `7081948C219F02069F03...`

In [10]:
## Look at the first byte of the response (a tag)
take(response, 1)

'70'

In [11]:
## Application class, constructed, one-byte tag
format_bytes(_, [2,1,5])

0,1,2,3,4,5,6,7,8
0x70 =,b8,b7,b6,b5,b4,b3,b2,b1
,0,1,-,-,-,-,-,-
,-,-,1,-,-,-,-,-
,-,-,-,1,0,0,0,0


Response is `7081948C219F02069F03...`

In [12]:
## First byte of length is 0x81...
take(response, 1, 1)

'81'

In [13]:
## b8 is 1, so the actual length is in the next byte
format_bytes(_)

0,1,2,3,4,5,6,7,8
0x81 =,b8,b7,b6,b5,b4,b3,b2,b1
,1,0,0,0,0,0,0,1


In [14]:
## The actual length is 0x94...
take(response, 1, 2)

'94'

In [15]:
## which is 148 in decimal
int(_, 16)

148

Response is `7081948C219F02069F03...`

In [16]:
## The tag value is 148 bytes, starting after tag (1 byte) and length (2 bytes)...
take(response, 148, 1+2)

'8c 21 9f 02 06 9f 03 06 9f 1a 02 95 05 5f 2a 02 9a 03 9c 01 9f 37 04 9f 35 01 9f 45 02 9f 4c 08 9f 34 03 8d 0c 91 0a 8a 02 95 05 9f 37 04 9f 4c 08 9f 08 02 00 02 57 13 52 aa aa aa aa aa aa 47 d1 51 22 01 14 07 99 27 00 00 0f 5f 20 13 4d 55 52 44 4f 43 48 2f 53 54 45 56 45 4e 20 4a 2e 44 52 5f 30 02 02 01 9f 1f 18 31 34 30 37 39 30 30 30 30 30 30 30 30 30 30 39 32 37 30 30 30 30 30 30 9f 42 02 08 26 9f 44 01 02 9f 49 03 9f 37 04 9f 47 01 03'

In [17]:
## which is the whole response from the card
len_bytes(response) - 3

148

Response is `7081948C219F02069F03...`

In [18]:
## The value is constructed so the next byte is a tag
take(response, 1, 1+2)

'8c'

In [19]:
## Context-specific class, primitive, 1-byte tag
format_bytes(_, [2,1,5]) # 0x8c - CDOL1

0,1,2,3,4,5,6,7,8
0x8c =,b8,b7,b6,b5,b4,b3,b2,b1
,1,0,-,-,-,-,-,-
,-,-,0,-,-,-,-,-
,-,-,-,0,1,1,0,0


Response is `7081948C219F02069F03...`

In [20]:
## Next byte will be the length
take(response, 1, 1+2+1)

'21'

In [21]:
## b8 is 0 so this is a 1 byte length (0x21)...
format_bytes(_)

0,1,2,3,4,5,6,7,8
0x21 =,b8,b7,b6,b5,b4,b3,b2,b1
,0,0,1,0,0,0,0,1


In [22]:
## which is 16 in decimal
int(_, 16)

33

Response is `7081948C219F02069F03...`

In [23]:
## The CDOL1 is 33 bytes, skipping the tags and lengths 
cdol1 = take(response, 33, 1+2+1+1)
cdol1

'9f 02 06 9f 03 06 9f 1a 02 95 05 5f 2a 02 9a 03 9c 01 9f 37 04 9f 35 01 9f 45 02 9f 4c 08 9f 34 03'

In [24]:
## After the CDOL1 the next tag is the CDOL2
take(response, 1, 1+2+1+1 + 33) # 0x8d - CDOL2

'8d'

In [25]:
## with length 0x0c
take(response, 1, 1+2+1+1 + 33 + 1)

'0c'

In [26]:
## Which is 12 in decimal
int(_, 16)

12

In [27]:
## So the CDOL2 can be extracted
cdol2 = take(response, 12, 1+2+1+1 + 33 + 1+1)
cdol2

'91 0a 8a 02 95 05 9f 37 04 9f 4c 08'

CDOL1 is `9f 02 06 9f 03 06 9f 1a 02 95 05...`

In [28]:
## DOL objects are a list of tags and lengths
take(cdol1, 1)

'9f'

In [29]:
## 9f starts a context-specific class, primitive, multi-byte tag
format_bytes(_, [2,1,5])

0,1,2,3,4,5,6,7,8
0x9f =,b8,b7,b6,b5,b4,b3,b2,b1
,1,0,-,-,-,-,-,-
,-,-,0,-,-,-,-,-
,-,-,-,1,1,1,1,1


In [30]:
## The next byte of the tag is 0x02
take(cdol1, 1, 1)

'02'

In [31]:
## 0x02 is the last byte of the tag, giving 0x9f02
format_bytes(_, [1,7]) # 0x9f02 - Amount, Authorised (Numeric)

0,1,2,3,4,5,6,7,8
0x02 =,b8,b7,b6,b5,b4,b3,b2,b1
,0,-,-,-,-,-,-,-
,-,0,0,0,0,0,1,0


In [32]:
## Next is the length of the data expected 0x06
take(cdol1, 1, 1 + 1)

'06'

In [33]:
## Going back to the response, another 2-byte tag is at offset 78...
take(response, 2, 78) # 0x5f20 – Cardholder Name

'5f 20'

In [34]:
## which has length 0x13
take(response, 1, 80)

'13'

In [35]:
## This tag is ASCII encoded
take(response, 0x13, 81)

'4d 55 52 44 4f 43 48 2f 53 54 45 56 45 4e 20 4a 2e 44 52'

In [36]:
decode_bytes(_)

'MURDOCH/STEVEN J.DR'

In [37]:
## Another 2-byte tag is at offset 100...
take(response, 2, 100) # 0x5f30 – Service Code

'5f 30'

In [38]:
## with length 0x02
take(response, 1, 102)

'02'

In [39]:
## and in binary-coded decimal format: 201
strip_bytes(take(response, 2, 103))

'0201'

In [40]:
## At offset 57 we have a 1-byte tag (with length 59)...
take(response, 1, 57) # 0x57 – Track 2 Equivalent Data

'57'

In [41]:
## which is also in binary-coded decimal - I've removed the middle of my card number ;-)
strip_bytes(take(response, 0x13, 59))

'52aaaaaaaaaaaa47d15122011407992700000f'