In [45]:
from construct import *

data = None
tiff_file = 'data/data.NEF'
with open(tiff_file, 'rb') as f:
    data = f.read()

# TIFF (Tag Image File Format)

参考标准 [TIFF6.0](https://web.archive.org/web/20180810205359/https://www.adobe.io/content/udp/en/open/standards/TIFF/_jcr_content/contentbody/download/file.res/TIFF6.pdf)

# TIFF Struct

TIFF 文件最大为 2**32 bytes, 也就是 4GB.

TIFF 由一个 **Image File Header 和若干个 **Image File Directory(IFD)** 以及实际的数据组成

结构图如下:

![图 1](../asset/bf5757e490b74e3036f7972526d2a99880ec61076cf0aa2a13c01d45854089ac.png)  

## Image File Header

头部信息一共 8 个字节

- Bytes 0-1: 指示大小端
    - II(0x49 0x49): 小端字节序
    - MM(0x4D 0x4D): 大端字节序
- Bytes 2-3: 固定值 42, 进一步标识该文件为 TIFF 文件
    - 小端序: 0x2A 0x00
    - 大端序: 0x00 0x2A
- Bytes 4-7: 指向第一个 IFD 的 offset, 第一个 IFD 可能存在于 Image File Header 之后的任意地方, 但其偏移必须是偶数

![图 2](../asset/4371d66ff58c949220a99dd41aa537f9404921f1914bd995d0ecb6f6bab07a43.png)  


## Image File Directory(IFD)

每个 TIFF 文件必须要有至少一个 IFD, 而每个 IFD 至少有一个 IFD Entry 

IFD的结构为:

- Bytes 0-1: 指示 IFD 中包含 IFD Entry 的数量, 这里假设为 B
- 随后的 12 * B 个字节: 这里包含了 B 个 DE, 每个 DE 大小为 12 Bytes
- 随后的 4 个字节: 下一个 IFD 的偏移量, 如果为 0, 则表示当前为最后一个 IFD 

![图 1](../asset/bf5757e490b74e3036f7972526d2a99880ec61076cf0aa2a13c01d45854089ac.png)  

### IFD Entry

每个 IFD Entry 为 12 Bytes

结构如下:
- Bytes 0-1: Tag
- Bytes 2-3: Type
- Bytes 4-7: Value 的数量
- Bytes 8-11: Value 的值 或 Value 的起始 offset, 取决于 Type, offset 可以在文件的任何地方, offset 必须是偶数

![图 3](../asset/2e76318ec5802dd5faeebad564cce5fd2b35b16e95b76ae3b2cb3f7c8b3029ce.png)  

#### Type

| Type |   description |
| --- | --- |
| 1=BYTE | 8-bit unsigned integer. |
| 2=ASCII | 8-bit byte that contains a 7-bit ASCII code; the last byte shall be NUL (binary zero). |
| 3=SHORT | 16-bit (2-byte) unsigned integer. |
| 4=LONG | 32-bit (4-byte) unsigned integer. |
| 5=RATIONAL | Two LONGs: the first represents the numerator of a fraction; the second, the denominator. |
| 6=SBYTE | An 8-bit signed (twos-complement) integer. |
| 7=UNDEFINED | An 8-bit byte that may contain anything, depending on the definition of the field. |
| 8=SSHORT | A 16-bit (2-byte) signed (twos-complement) integer. |
| 9=SLONG | A 32-bit (4-byte) signed (twos-complement) integer. |
| 10=SRATIONAL | Two SLONGs: the first represents the numerator of the fraction, the second the denominator. |
| 11=FLOAT | Single precision (4-byte) IEEE format. |
| 12=DOUBLE | Double precision (8-byte) IEEE format. |

In [46]:
%run TiffTag.py

class TIFF:
    def __init__(self, data):
        self.data = data
        self.is_little = data[:2] == b'II'
        self.header_fmt = Struct(
            "ByteOrder" / Bytes(2),
            "MagicNum" / BytesInteger(2, swapped=self.is_little),
            "FirstIFDOffset" / BytesInteger(4, swapped=self.is_little),
        )

        self.IFD_entry_fmt = Struct(
            "Tag" / BytesInteger(2, swapped=self.is_little),
            "Type" / BytesInteger(2, swapped=self.is_little),
            "Count" / BytesInteger(4, swapped=self.is_little),
            "Data" / BytesInteger(4, swapped=self.is_little),
        )

        self.IFD_fmt = Struct(
            "IFDEntryCount" / BytesInteger(2, swapped=self.is_little),
            "IFDEntries" / Array(this.IFDEntryCount, self.IFD_entry_fmt),
            "NextIFDOffset" / BytesInteger(4, swapped=self.is_little)
        )

        self.header = self.header_fmt.parse(self.data)
        offset = self.header.FirstIFDOffset
        ifds, tags_map = self.parse_ifds(offset)

        self.ifds_info_map = {offset: (ifds, tags_map)}

    def parse_ifds(self, offset, data=None):
        if data == None:
            data = self.data
        ifds = []
        tags_map = {}
        while True:
            ifd = self.IFD_fmt.parse(data[offset:])
            ifds.append(ifd)

            for entry in ifd.IFDEntries:
                if entry.Tag not in TAG:
                    continue
                tags_map[entry.Tag] = (TAG[entry.Tag], entry)

            offset = ifd.NextIFDOffset
            if offset == 0:
                break

        return ifds, tags_map

    def __str__(self):
        ret = f'Header {self.header.__str__()}\n'
        ret += f'IFDs list len: {len(self.ifds_info_map)}\n'
        for offset, ifds_info in self.ifds_info_map.items():
            ret += ('-'*50 + '\n')
            ret += f'offset: {hex(offset)}\n'
            for tag in ifds_info[1].values():
                ret += f'{tag[0]} {tag[1].__str__()}\n'
        return ret

tiff = TIFF(data)
print(tiff)
# print(tiff.sub_IFDs)

Header Container: 
    ByteOrder = b'II' (total 2)
    MagicNum = 42
    FirstIFDOffset = 8
IFDs list len: 1
--------------------------------------------------
offset: 0x8
NewSubfileType Container: 
    Tag = 254
    Type = 4
    Count = 1
    Data = 1
ImageWidth Container: 
    Tag = 256
    Type = 4
    Count = 1
    Data = 160
ImageLength Container: 
    Tag = 257
    Type = 4
    Count = 1
    Data = 120
BitsPerSample Container: 
    Tag = 258
    Type = 3
    Count = 3
    Data = 316
Compression Container: 
    Tag = 259
    Type = 3
    Count = 1
    Data = 1
PhotometricInterpretation Container: 
    Tag = 262
    Type = 3
    Count = 1
    Data = 2
Make Container: 
    Tag = 271
    Type = 2
    Count = 18
    Data = 324
Model Container: 
    Tag = 272
    Type = 2
    Count = 11
    Data = 344
StripOffsets Container: 
    Tag = 273
    Type = 4
    Count = 1
    Data = 946870
Orientation Container: 
    Tag = 274
    Type = 3
    Count = 1
    Data = 1
SamplesPerPixel Container

# TIFF/EP (Tag Image File Format / Electronic Photography)

TIFF/EP 是基于 TIFF6 标准为相机图片扩展的一个标准. 
为了完全兼容 TIFF 6.0 的解析, 其一般会在第一个 IFD 中包含一个未压缩的缩略图, 
该缩略图一般大小为 120 * 160, 存储于一个 strip 中.

TIFF/EP 的标识符为 
- Tag 37398 (TIFF/EPstandardID)

其主要数据都存储在第一个 IFD 中的 SubIFDs 中
- Tag 330 (SubIFDs)

同时其规定, TIFF/EP 中每一个图片数据段(tile 或 strip)都包含一个完整的 JPEG 数据流

In [47]:
class TIFF_EP(TIFF):
    def __init__(self, data):
        super().__init__(data)

        first_ifds_info = self.ifds_info_map[self.header.FirstIFDOffset]
        first_ifds_tags_map = first_ifds_info[1]
        sub_ifds_tag = first_ifds_tags_map[330][1]

        self.sub_ifd_tags = []
        for offset in Array(sub_ifds_tag.Count, BytesInteger(4, swapped=self.is_little)).parse(self.data[sub_ifds_tag.Data:]):
            ifds, tags_map = self.parse_ifds(offset)
            self.ifds_info_map[offset] = (ifds, tags_map)
            self.sub_ifd_tags.append(tags_map)

        exif_ifds_tag = first_ifds_tags_map[34665][1]
        offset = exif_ifds_tag.Data
        self.ifds_info_map[offset] = self.parse_ifds(offset)
        self.exif_tags = self.ifds_info_map[offset][1]

tiff_ep = TIFF_EP(data)
print(tiff_ep)

Header Container: 
    ByteOrder = b'II' (total 2)
    MagicNum = 42
    FirstIFDOffset = 8
IFDs list len: 4
--------------------------------------------------
offset: 0x8
NewSubfileType Container: 
    Tag = 254
    Type = 4
    Count = 1
    Data = 1
ImageWidth Container: 
    Tag = 256
    Type = 4
    Count = 1
    Data = 160
ImageLength Container: 
    Tag = 257
    Type = 4
    Count = 1
    Data = 120
BitsPerSample Container: 
    Tag = 258
    Type = 3
    Count = 3
    Data = 316
Compression Container: 
    Tag = 259
    Type = 3
    Count = 1
    Data = 1
PhotometricInterpretation Container: 
    Tag = 262
    Type = 3
    Count = 1
    Data = 2
Make Container: 
    Tag = 271
    Type = 2
    Count = 18
    Data = 324
Model Container: 
    Tag = 272
    Type = 2
    Count = 11
    Data = 344
StripOffsets Container: 
    Tag = 273
    Type = 4
    Count = 1
    Data = 946870
Orientation Container: 
    Tag = 274
    Type = 3
    Count = 1
    Data = 1
SamplesPerPixel Container

## Nikon Electronic File (NEF)

NEF 格式时 Nikon 基于 TIFF/EP 标准扩展的自家的相机图片格式.
参考资料: [nef](http://lclevy.free.fr/nef/)

其主要的改动在于, SubIFDs(Tag 330) 中, 有两个偏移量
- 第一个 SubIFD 存储了一张全尺寸有损压缩的 JPEG 格式图片
- 第二个 SubIFD 存储了一张全尺寸无损压缩的 RAW 格式图片

### 0th IFD

其第一个 IFD 的定义如下:

| Tag value	| Name	| Type	| Length	| Description |
| --------- | ----- | ----- | --------- | ----------- |
| 0x00fe/254	| SubfileType	| 4	| 1	| 1=Reduced-resolution image |
| 0x0100/256	| imageWidth	| 4	| 1	| 160 |
| 0x0101/257	| ImageHeight	| 4	| 1	| 120 |
| 0x0102/258	| BitsPerSample	| 3	| 3	| [ 8, 8, 8 ] |
| 0x0103/259	| Compression	| 3	| 1	| 1=uncompressed |
| ...	| ...	| 	| 	| ... |
| 0x014a / 330	| SubIFD tag	| 4	| 2	| [ JpegImageOffset, RawOffset ] : offsets to the 2 child IFDs |
| 0x0214 / 34665	| ReferenceBlackWhite	| 5=rational	| 6	|  |
| 0x8769 / 34665	| EXIF	| 4	| 1	| offset to the EXIF IFD. the EXIF IFD contains a pointer to the Makernote IFD |
| 0x9286 / 37510	| UserComment	| 7	| variable	|  |
| ...	| ...	| 	| 	| ... |

### 0th subIFD (存储有损JPEG图片)

| Tag value	| Name	| Type	| Length	| Description |
| --------- | ----- | ----- | --------- | ----------- |
| 0x00fe / 254	| SubfileType	| 4	| 1	| 1=Reduced-resolution image |
| 0x0103 / 259	| Compression	| 3	| 1	| 6=old/jpeg |
| 0x011a / 282	| XResolution	| 5=rational	| 1	| 300 |
| 0x011b / 283	| YResolution	| 5	| 1	| 300 |
| 0x0128 / 296	| ResolutionUnit	| 3	| 1	| 2=pixel_per_inch |
| 0x0201 / 513	| JpgFromRawStart	| 4	| 1	| offset to image data |
| 0x0202 / 514	| JpgFromRawLength	| 4	| 1	| image data length |
| 0x0213 / 531	| YCbCrPositioning	| 3	| 1	| 2=co_sited |

### 1th subIFD (存储无损RAW图片)

| Tag value	| Name	| Type	| Length	| Description |
| --------- | ----- | ----- | --------- | ----------- |
| 0x00fe / 254	| SubfileType	| 4	| 1	| 0=Full-resolution Image |
| 0x0100 / 256	| ImageWidth	| 4	| 1	| 3904 for the D60 |
| 0x0101 / 257	| ImageHeight	| 4	| 1	| 2616 for the D60 |
| 0x0102 / 258	| BitsPerSample	| 1	| 1	| 12bits for the D60 |
| 0x0103 / 259	| Compression	| 3	| 1	| 1=uncompressed, 34713=Nikon NEF Compressed [[Details](http://lclevy.free.fr/nef/nikon_compression.c), [Values](http://lclevy.free.fr/nef/values.txt)] |
| 0x0106 / 262	| PhotometricInterpretation	| 3	| 1	| 32803=Color Filter Array |
| 0x0111 / 273	| JpgFromRawStart	| 4	| 1	| offset to the image data |
| 0x0115 / 277	| SamplesPerPixel	| 3	| 1	| 1 |
| 0x0116 / 278	| RowsPerStrip	| 3	| 1	| 2616 for the D60 |
| 0x0117 / 279	| JpgFromRawLength	| 4	| 1	| image data lenght |
| 0x011a / 282	| XResolution	| 5=rational	| 1	| 300 |
| 0x011b / 283	| YResolution	| 5	| 1	| 300 |
| 0x011c / 284	| PlanarConfiguration	| 3	| 1	| 1 = Chunky |
| 0x0128 / 296	| ResolutionUnit	| 3	| 1	| 2=pixel_per_inch |
| 0x828d / 33421	| CFARepeatPatternDim	| 3	| 2	| [2, 2] = 2x2 |
| 0x828e / 33422	| CFAPattern2	| 1	| 4	| [1, 2, 0, 1] = [G, B, R, G] for the D60 |
| 0x9217 / 37399	| SensingMethod	| 3	| 1	| 2 = One-chip color area (D60) |

In [62]:
class NEF(TIFF_EP):
    def __init__(self, data):
        super().__init__(data)

        self.jpeg = self.get_jpeg_image()

        self.raw_compression = self.sub_ifd_tags[1][259][1].Data

        self.makernote_fmt = Struct(
            "magic_val" / Const(b"Nikon\0"),
            "version" / Bytes(2),
            Bytes(2),
            "tiff_data" / GreedyBytes,
        )

        makernote_offset = self.exif_tags[37500][1].Data
        makernote_len = self.exif_tags[37500][1].Count
        makernote_data = self.data[makernote_offset : makernote_offset + makernote_len]
        self.makernote_header = self.makernote_fmt.parse(makernote_data)

        makernote_tiff = TIFF(self.makernote_header.tiff_data)
        self.makernote = makernote_tiff.ifds_info_map[makernote_tiff.header.FirstIFDOffset]

    def get_jpeg_image(self):
        jpeg_offset = self.sub_ifd_tags[0][513][1].Data
        jpeg_length = self.sub_ifd_tags[0][514][1].Data

        return self.data[jpeg_offset : jpeg_offset + jpeg_length]

    def extract_jpeg(self, out_path):
        with open(out_path, "wb") as f:
            f.write(self.jpeg)

    def __str__(self):
        ret = f"NEF raw image compression type: {self.raw_compression}\n"
        ret += f'{"-"*50}\n'
        ret += 'Makernote known tags:\n'
        for tag in self.makernote[1].values():
            ret += f'{tag[0]} {tag[1]}\n'
        ret += f'{"-"*50}\n'
        ret += 'Makernote unknown tags:\n'
        for ifd in self.makernote[0]:
            ret += f'{ifd}\n'
        ret += f'{"-"*50}\n'
        ret += super().__str__()
        return ret


nef = NEF(data)
nef.extract_jpeg("./data/data_nef.jpg")
print(nef)


NEF raw image compression type: 34713
--------------------------------------------------
Makernote known tags:
ProcessingSoftware Container: 
    Tag = 11
    Type = 8
    Count = 2
    Data = 0
--------------------------------------------------
Makernote unknown tags:
Container: 
    IFDEntryCount = 47
    IFDEntries = ListContainer: 
        Container: 
            Tag = 1
            Type = 7
            Count = 4
            Data = 808530480
        Container: 
            Tag = 2
            Type = 3
            Count = 2
            Data = 52428800
        Container: 
            Tag = 4
            Type = 2
            Count = 8
            Data = 578
        Container: 
            Tag = 5
            Type = 2
            Count = 13
            Data = 586
        Container: 
            Tag = 7
            Type = 2
            Count = 7
            Data = 602
        Container: 
            Tag = 8
            Type = 2
            Count = 13
            Data = 610
        Conta