In [18]:
from construct import *
import enum

data = None
tiff_file = 'data/data.NEF'
with open(tiff_file, 'rb') as f:
    data = f.read()

# TIFF (Tag Image File Format)

参考标准 [TIFF6.0](https://web.archive.org/web/20180810205359/https://www.adobe.io/content/udp/en/open/standards/TIFF/_jcr_content/contentbody/download/file.res/TIFF6.pdf)

# TIFF Struct

TIFF 文件最大为 2**32 bytes, 也就是 4GB.

TIFF 由一个 **Image File Header 和若干个 **Image File Directory(IFD)** 以及实际的数据组成

结构图如下:

![图 1](../asset/bf5757e490b74e3036f7972526d2a99880ec61076cf0aa2a13c01d45854089ac.png)  

## Image File Header

头部信息一共 8 个字节

- Bytes 0-1: 指示大小端
    - II(0x49 0x49): 小端字节序
    - MM(0x4D 0x4D): 大端字节序
- Bytes 2-3: 固定值 42, 进一步标识该文件为 TIFF 文件
    - 小端序: 0x2A 0x00
    - 大端序: 0x00 0x2A
- Bytes 4-7: 指向第一个 IFD 的 offset, 第一个 IFD 可能存在于 Image File Header 之后的任意地方, 但其偏移必须是偶数

![图 2](../asset/4371d66ff58c949220a99dd41aa537f9404921f1914bd995d0ecb6f6bab07a43.png)  


## Image File Directory(IFD)

每个 TIFF 文件必须要有至少一个 IFD, 而每个 IFD 至少有一个 IFD Entry 

IFD的结构为:

- Bytes 0-1: 指示 IFD 中包含 IFD Entry 的数量, 这里假设为 B
- 随后的 12 * B 个字节: 这里包含了 B 个 DE, 每个 DE 大小为 12 Bytes
- 随后的 4 个字节: 下一个 IFD 的偏移量, 如果为 0, 则表示当前为最后一个 IFD 

![图 1](../asset/bf5757e490b74e3036f7972526d2a99880ec61076cf0aa2a13c01d45854089ac.png)  

### IFD Entry

每个 IFD Entry 为 12 Bytes

结构如下:
- Bytes 0-1: Tag
- Bytes 2-3: Type
- Bytes 4-7: Value 的数量
- Bytes 8-11: Value 的值 或 Value 的起始 offset, 取决于 Type, offset 可以在文件的任何地方, offset 必须是偶数

![图 3](../asset/2e76318ec5802dd5faeebad564cce5fd2b35b16e95b76ae3b2cb3f7c8b3029ce.png)  

#### Type

| Type |   description |
| --- | --- |
| 1=BYTE | 8-bit unsigned integer. |
| 2=ASCII | 8-bit byte that contains a 7-bit ASCII code; the last byte shall be NUL (binary zero). |
| 3=SHORT | 16-bit (2-byte) unsigned integer. |
| 4=LONG | 32-bit (4-byte) unsigned integer. |
| 5=RATIONAL | Two LONGs: the first represents the numerator of a fraction; the second, the denominator. |
| 6=SBYTE | An 8-bit signed (twos-complement) integer. |
| 7=UNDEFINED | An 8-bit byte that may contain anything, depending on the definition of the field. |
| 8=SSHORT | A 16-bit (2-byte) signed (twos-complement) integer. |
| 9=SLONG | A 32-bit (4-byte) signed (twos-complement) integer. |
| 10=SRATIONAL | Two SLONGs: the first represents the numerator of the fraction, the second the denominator. |
| 11=FLOAT | Single precision (4-byte) IEEE format. |
| 12=DOUBLE | Double precision (8-byte) IEEE format. |

In [26]:
%run TiffTag.py

class TagType(enum.IntEnum):
    BYTE = 1
    ASCII = 2
    SHORT = 3
    LONG = 4
    RATIONAL = 5
    SBYTE = 6
    UNDEFINED = 7
    SSHORT = 8
    SLONG = 9
    SRATIONAL = 10
    FLOAT = 11
    DOUBLE = 12

kTagFmt = {
    TagType.BYTE : Byte,
    TagType.ASCII : Byte,
    TagType.SHORT : Short,
    # TODO: finish this
}

class Tag:
    def __init__(self, entry, data):
        self.tag = entry.Tag
        self.type = TagType(entry.Type)
        self.count = entry.Count
        self.data = entry.Data
        self.name = TAG[self.tag] if self.tag in TAG else 'Unknown'

    def parse_value(self):
        if self.count == 1:
            return self.data
        

    def __str__(self):
        ret = f'{self.name}({self.tag}/{hex(self.tag)}) {self.type.name} count:{self.count} {self.data}/{hex(self.data)}\n'
        return ret

class IFD:
    def __init__(self, data, offset, is_little):
        self.data = data
        self.is_little = is_little
        
        self.IFD_entry_fmt = Struct(
            "Tag" / BytesInteger(2, swapped=self.is_little),
            "Type" / BytesInteger(2, swapped=self.is_little),
            "Count" / BytesInteger(4, swapped=self.is_little),
            "Data" / BytesInteger(4, swapped=self.is_little),
        )

        self.IFD_fmt = Struct(
            "IFDEntryCount" / BytesInteger(2, swapped=self.is_little),
            "IFDEntries" / Array(this.IFDEntryCount, self.IFD_entry_fmt),
            "NextIFDOffset" / BytesInteger(4, swapped=self.is_little)
        )

        self.sub_ifd = None

        self.ifd = self.IFD_fmt.parse(self.data[offset:])
        self.tags = self.parse_tags()

    def get_tag(self, tag):
        return self.tags[tag][1]

    def next(self):
        next_offset = self.ifd.NextIFDOffset
        if next_offset != 0:
            return IFD(self.data[next_offset:])
        else:
            return None

    def parse_tags(self):
        tags_map = {}
        for entry in self.ifd.IFDEntries:
            tags_map[entry.Tag] = Tag(entry, data)
        return tags_map
    
    def __str__(self):
        ret = ''
        ret += f'tag count: {len(self.tags)}\n'
        for tag in self.tags.values():
            ret += f'{tag}\n'
        return ret 


class TIFF:
    def __init__(self, data):
        self.data = data
        self.is_little = data[:2] == b'II'

        self.header_fmt = Struct(
            "ByteOrder" / Bytes(2),
            "MagicNum" / BytesInteger(2, swapped=self.is_little),
            "FirstIFDOffset" / BytesInteger(4, swapped=self.is_little),
        )

        self.header = self.header_fmt.parse(self.data)
        offset = self.header.FirstIFDOffset
        self.ifds = self.parse_ifds(offset)

    def parse_ifds(self, offset, data=None):
        if data == None:
            data = self.data
        ifd = IFD(data, offset, self.is_little)
        ifds = [ifd]
        while True:
            ifd = ifd.next()

            if ifd == None:
                break
            else:
                ifds.append(ifd)

        return ifds

    def __str__(self):
        ret = f'Header {self.header.__str__()}\n'
        ret += f'IFDs list len: {len(self.ifds)}\n'
        ret += ('-'*50 + '\n')
        for ifd in self.ifds:
            ret += f'{ifd}\n'

        return ret

tiff = TIFF(data)
print(tiff)
# print(tiff.sub_IFDs)

NameError: name 'entry' is not defined

# TIFF/EP (Tag Image File Format / Electronic Photography)

TIFF/EP 是基于 TIFF6 标准为相机图片扩展的一个标准. 
为了完全兼容 TIFF 6.0 的解析, 其一般会在第一个 IFD 中包含一个未压缩的缩略图, 
该缩略图一般大小为 120 * 160, 存储于一个 strip 中.

TIFF/EP 的标识符为 
- Tag 37398 (TIFF/EPstandardID)

其主要数据都存储在第一个 IFD 中的 SubIFDs 中
- Tag 330 (SubIFDs)

同时其规定, TIFF/EP 中每一个图片数据段(tile 或 strip)都包含一个完整的 JPEG 数据流

In [None]:
class TIFF_EP(TIFF):
    def __init__(self, data):
        super().__init__(data)

        sub_ifds_tag = self.ifds[0].get_tag(330)

        # self.sub_ifd_tags = []
        # for offset in Array(sub_ifds_tag.Count, BytesInteger(4, swapped=self.is_little)).parse(self.data[sub_ifds_tag.Data:]):
        #     tags_map = self.parse_ifds(offset)
        #     self.ifds_tag_map[offset] = tags_map
        #     self.sub_ifd_tags.append(tags_map)

        # exif_ifds_tag = self.first_ifd_tags[34665][1]
        # offset = exif_ifds_tag.Data
        # self.exif_tags = self.parse_ifds(offset)
        # self.ifds_tag_map[offset] = self.exif_tags

tiff_ep = TIFF_EP(data)
print(tiff_ep)

Container: 
    Tag = 330
    Type = 4
    Count = 2
    Data = 404
Header Container: 
    ByteOrder = b'II' (total 2)
    MagicNum = 42
    FirstIFDOffset = 8
IFDs list len: 1
--------------------------------------------------
tag count: 25
NewSubfileType Container: 
    Tag = 254
    Type = 4
    Count = 1
    Data = 1
ImageWidth Container: 
    Tag = 256
    Type = 4
    Count = 1
    Data = 160
ImageLength Container: 
    Tag = 257
    Type = 4
    Count = 1
    Data = 120
BitsPerSample Container: 
    Tag = 258
    Type = 3
    Count = 3
    Data = 316
Compression Container: 
    Tag = 259
    Type = 3
    Count = 1
    Data = 1
PhotometricInterpretation Container: 
    Tag = 262
    Type = 3
    Count = 1
    Data = 2
Make Container: 
    Tag = 271
    Type = 2
    Count = 18
    Data = 324
Model Container: 
    Tag = 272
    Type = 2
    Count = 11
    Data = 344
StripOffsets Container: 
    Tag = 273
    Type = 4
    Count = 1
    Data = 946870
Orientation Container: 
    Tag =

## Nikon Electronic File (NEF)

NEF 格式时 Nikon 基于 TIFF/EP 标准扩展的自家的相机图片格式.
参考资料: [nef](http://lclevy.free.fr/nef/)

其主要的改动在于, SubIFDs(Tag 330) 中, 有两个偏移量
- 第一个 SubIFD 存储了一张全尺寸有损压缩的 JPEG 格式图片
- 第二个 SubIFD 存储了一张全尺寸无损压缩的 RAW 格式图片

### 0th IFD

其第一个 IFD 的定义如下:

| Tag value	| Name	| Type	| Length	| Description |
| --------- | ----- | ----- | --------- | ----------- |
| 0x00fe/254	| SubfileType	| 4	| 1	| 1=Reduced-resolution image |
| 0x0100/256	| imageWidth	| 4	| 1	| 160 |
| 0x0101/257	| ImageHeight	| 4	| 1	| 120 |
| 0x0102/258	| BitsPerSample	| 3	| 3	| [ 8, 8, 8 ] |
| 0x0103/259	| Compression	| 3	| 1	| 1=uncompressed |
| ...	| ...	| 	| 	| ... |
| 0x014a / 330	| SubIFD tag	| 4	| 2	| [ JpegImageOffset, RawOffset ] : offsets to the 2 child IFDs |
| 0x0214 / 34665	| ReferenceBlackWhite	| 5=rational	| 6	|  |
| 0x8769 / 34665	| EXIF	| 4	| 1	| offset to the EXIF IFD. the EXIF IFD contains a pointer to the Makernote IFD |
| 0x9286 / 37510	| UserComment	| 7	| variable	|  |
| ...	| ...	| 	| 	| ... |

### 0th subIFD (存储有损JPEG图片)

| Tag value	| Name	| Type	| Length	| Description |
| --------- | ----- | ----- | --------- | ----------- |
| 0x00fe / 254	| SubfileType	| 4	| 1	| 1=Reduced-resolution image |
| 0x0103 / 259	| Compression	| 3	| 1	| 6=old/jpeg |
| 0x011a / 282	| XResolution	| 5=rational	| 1	| 300 |
| 0x011b / 283	| YResolution	| 5	| 1	| 300 |
| 0x0128 / 296	| ResolutionUnit	| 3	| 1	| 2=pixel_per_inch |
| 0x0201 / 513	| JpgFromRawStart	| 4	| 1	| offset to image data |
| 0x0202 / 514	| JpgFromRawLength	| 4	| 1	| image data length |
| 0x0213 / 531	| YCbCrPositioning	| 3	| 1	| 2=co_sited |

### 1th subIFD (存储无损RAW图片)

| Tag value	| Name	| Type	| Length	| Description |
| --------- | ----- | ----- | --------- | ----------- |
| 0x00fe / 254	| SubfileType	| 4	| 1	| 0=Full-resolution Image |
| 0x0100 / 256	| ImageWidth	| 4	| 1	| 3904 for the D60 |
| 0x0101 / 257	| ImageHeight	| 4	| 1	| 2616 for the D60 |
| 0x0102 / 258	| BitsPerSample	| 1	| 1	| 12bits for the D60 |
| 0x0103 / 259	| Compression	| 3	| 1	| 1=uncompressed, 34713=Nikon NEF Compressed [[Details](http://lclevy.free.fr/nef/nikon_compression.c), [Values](http://lclevy.free.fr/nef/values.txt)] |
| 0x0106 / 262	| PhotometricInterpretation	| 3	| 1	| 32803=Color Filter Array |
| 0x0111 / 273	| JpgFromRawStart	| 4	| 1	| offset to the image data |
| 0x0115 / 277	| SamplesPerPixel	| 3	| 1	| 1 |
| 0x0116 / 278	| RowsPerStrip	| 3	| 1	| 2616 for the D60 |
| 0x0117 / 279	| JpgFromRawLength	| 4	| 1	| image data lenght |
| 0x011a / 282	| XResolution	| 5=rational	| 1	| 300 |
| 0x011b / 283	| YResolution	| 5	| 1	| 300 |
| 0x011c / 284	| PlanarConfiguration	| 3	| 1	| 1 = Chunky |
| 0x0128 / 296	| ResolutionUnit	| 3	| 1	| 2=pixel_per_inch |
| 0x828d / 33421	| CFARepeatPatternDim	| 3	| 2	| [2, 2] = 2x2 |
| 0x828e / 33422	| CFAPattern2	| 1	| 4	| [1, 2, 0, 1] = [G, B, R, G] for the D60 |
| 0x9217 / 37399	| SensingMethod	| 3	| 1	| 2 = One-chip color area (D60) |

### Makernote

Makernote 属于是 Nikon 自定义的字段了, 里面包含了一个 jpeg 的缩略图, 
同时Makernote本身包含了 tiff 6.0 的完整格式

其位于 NEF 的 EXIF IFD(0x8769 / 34665) 的 MakerNote tag(0x927C / 37500) 中,
MakerNote tag 的 value 为 Makernote 的 offset, 其 Count 为 Makernote 的字节数

Nikon 的缩略图就存放在 Makernote 的 0th ifd 的 NikonPreview(0x0011 / 17) 中

#### Makernote header

| Offset	| Length	| Type	| Description	| Value |
| --------- | --------- | ----- | ------------- | ----- |
| 0x0000	| 6	| string	| magic value	| "Nikon", zero terminated |
| 0x0006	| 1	| short	| version ?	| 0x0210 |
| 0x0008	| 1	| short	| ?	| 0x0000 |
| 0x000a	| 1	| short	| byte order	| Usually 0x4D4D / "MM", except for E5700 (0x4949 / "II") |
| 0x000c	| 1	| short	| TIFF magic value	| 0x2a |
| 0x00e	| 1	| long	| TIFF offset	| 8 |
| 0x0012	| 	| IFD	| first IFD	|  |

#### Makernote tags

这里的数据存储与 first IFD 中, Nikon 的所有自定义 tag 参考 [Nikon tags](https://exiftool.org/TagNames/Nikon.html)


| Tag value	| Name	| Type	| Length	| Description |
| --------- | --------- | ----- | ------------- | ----- |
| 0x0001 / 1	| MakerNoteVersion	| 7	| 4	| version="0210" for the D60 |
| 0x0002 / 2	| ISO	| 3	| 2	|  |
| 0x000e / 14	| ExposureDifference	| 7	| 4	|  |
| 0x0011 / 17	| NikonPreview	| 4	| 1	| offset to this IFD |
| 0x0012 / 18	| FlashExposureComp	| 7	| 4	|  |
| 0x001d / 27	| SerialNumber	| 3	| 7	| Serial numner (used for Color Balance decryption) |
| 0x0093 / 147	| NEF Compression	| 3	| 	| 1=lossy type1  3=lossless 4=lossy type 2 (d90, d3, d700, d300 ) |
| 0x0096 / 150	| Linearization table	| bytes	| 	| see format below |
| 0x0097 / 151	| Color Balance	| 7	| 572 for D60	| Color Balance (see decryption code below for versionn >= 200) |
| 0x00a7 / 167	| ShutterCount	| 4	| 1	| shutter count. used to decrypt While Balance tag |
| 0x00bb / 187	| ?	| 7	| 6	|  |


#### Makernote Nikon preview tag (0x0011)

Nikon 的缩略图就存在这里

| Tag value	| Name	| Type	| Length	| Description |
| --------- | --------- | ----- | ------------- | ----- |
| 0x0103 / 259	| Compression	| 3	| 1	| 6=old/jpeg |
| 0x011a / 282	| XResolution	| 5=rational	| 1	| 300 |
| 0x011b / 283	| YResolution	| 5	| 1	| 300 |
| 0x0128 / 296	| ResolutionUnit	| 3	| 1	| 2=pixel_per_inch |
| 0x0201 / 513	| JpgFromRawStart	| 4	| 1	| offset to image data |
| 0x0202 / 514	| JpgFromRawLength	| 4	| 1	| image data length |
| 0x0213 / 531	| YCbCrPositioning	| 3	| 1	| 2=co_sited |

In [None]:
class Makernote_Nikon(TIFF):
    def __init__(self, data):
        super().__init__(data)

    def get_jpeg_image(self):
        preview_ifd_offset = self.first_ifd_tags[17][1].Data
        preview_ifd_tags = self.parse_ifds(preview_ifd_offset)
        jpeg_offset = preview_ifd_tags[513][1].Data
        jpeg_length = preview_ifd_tags[514][1].Data

        return self.data[jpeg_offset:jpeg_length]


class NEF(TIFF_EP):
    def __init__(self, data):
        super().__init__(data)

        self.jpeg = self.get_jpeg_image()

        self.raw_compression = self.sub_ifd_tags[1][259][1].Data

        self.makernote_fmt = Struct(
            "magic_val" / Const(b"Nikon\0"),
            "version" / Bytes(2),
            Bytes(2),
            "tiff_data" / GreedyBytes,
        )

        makernote_offset = self.exif_tags[37500][1].Data
        makernote_len = self.exif_tags[37500][1].Count
        makernote_data = self.data[makernote_offset : makernote_offset + makernote_len]
        self.makernote_header = self.makernote_fmt.parse(makernote_data)

        self.makernote_tiff = Makernote_Nikon(self.makernote_header.tiff_data)
        self.thumb_jpeg = self.get_jpeg_thumb()

    def get_jpeg_image(self):
        jpeg_offset = self.sub_ifd_tags[0][513][1].Data
        jpeg_length = self.sub_ifd_tags[0][514][1].Data

        return self.data[jpeg_offset : jpeg_offset + jpeg_length]

    def get_jpeg_thumb(self):
        return self.makernote_tiff.get_jpeg_image()

    def extract_jpeg(self, out_path):
        with open(out_path, "wb") as f:
            f.write(self.jpeg)

    def extract_thumb_jpeg(self, out_path):
        with open(out_path, "wb") as f:
            f.write(self.thumb_jpeg)

    def __str__(self):
        ret = f"NEF raw image compression type: {self.raw_compression}\n"
        ret += f'{"+"*50}\n'
        ret += "markernote tags:\n"
        ret += f"{self.makernote_tiff}\n"
        ret += f'{"+"*50}\n'
        ret += super().__str__()
        return ret


nef = NEF(data)
nef.extract_jpeg("./data/data_nef.jpg")
nef.extract_thumb_jpeg("./data/data_nef_thumb.jpg")
print(nef)


NEF raw image compression type: 34713
++++++++++++++++++++++++++++++++++++++++++++++++++
markernote tags:
Header Container: 
    ByteOrder = b'II' (total 2)
    MagicNum = 42
    FirstIFDOffset = 8
IFDs list len: 1
--------------------------------------------------
IFD list len: 1
++++++++++++++++++++++++++++++++++++++++++++++++++
offset: 8
Container: 
    IFDEntryCount = 47
    IFDEntries = ListContainer: 
        Container: 
            Tag = 1
            Type = 7
            Count = 4
            Data = 808530480
        Container: 
            Tag = 2
            Type = 3
            Count = 2
            Data = 52428800
        Container: 
            Tag = 4
            Type = 2
            Count = 8
            Data = 578
        Container: 
            Tag = 5
            Type = 2
            Count = 13
            Data = 586
        Container: 
            Tag = 7
            Type = 2
            Count = 7
            Data = 602
        Container: 
            Tag = 8
       

## CR2



In [None]:
cr2_data = None
with open(r'D:\Users\Desktop\Canon_Canon PowerShot SX50 HS_2022-03-14_36569088.CR2', 'rb') as f:
    cr2_data = f.read()

cr2 = TIFF(cr2_data)
# print(cr2.IFD_fmt.parse(cr2_data[16:]))
print(cr2)

Header Container: 
    ByteOrder = b'II' (total 2)
    MagicNum = 42
    FirstIFDOffset = 16
IFDs list len: 1
--------------------------------------------------
IFD list len: 4
++++++++++++++++++++++++++++++++++++++++++++++++++
offset: 16
Container: 
    IFDEntryCount = 19
    IFDEntries = ListContainer: 
        Container: 
            Tag = 256
            Type = 3
            Count = 1
            Data = 1600
        Container: 
            Tag = 257
            Type = 3
            Count = 1
            Data = 1200
        Container: 
            Tag = 258
            Type = 3
            Count = 3
            Data = 250
        Container: 
            Tag = 259
            Type = 3
            Count = 1
            Data = 6
        Container: 
            Tag = 270
            Type = 2
            Count = 32
            Data = 256
        Container: 
            Tag = 271
            Type = 2
            Count = 6
            Data = 288
        Container: 
            Tag = 272
  