Skip to content

Commit

Permalink
reduce redundancy in font table writing
Browse files Browse the repository at this point in the history
  • Loading branch information
rkusa committed Aug 8, 2020
1 parent f0c3d9d commit bef32ab
Show file tree
Hide file tree
Showing 15 changed files with 472 additions and 384 deletions.
371 changes: 109 additions & 262 deletions otf/src/lib.rs

Large diffs are not rendered by default.

213 changes: 159 additions & 54 deletions otf/src/tables/cmap.rs
Expand Up @@ -2,9 +2,12 @@ mod format12;
mod format4;

use std::borrow::Cow;
use std::{io, mem};
use std::convert::TryFrom;
use std::io::{self, Cursor};
use std::mem;
use std::rc::Rc;

use super::{FontTable, Glyph};
use super::{FontTable, Glyph, NamedTable};
use crate::utils::limit_read::LimitRead;
use byteorder::{BigEndian, ReadBytesExt, WriteBytesExt};
use format12::Format12;
Expand All @@ -31,69 +34,185 @@ use format4::Format4;
#[derive(Debug, PartialEq, Clone)]
pub struct CmapTable {
pub(crate) version: u16,
pub(crate) num_tables: u16,
pub(crate) encoding_records: Vec<EncodingRecord>,
}

impl NamedTable for CmapTable {
fn name() -> &'static str {
"cmap"
}
}

impl<'a> FontTable<'a> for CmapTable {
type UnpackDep = ();
type SubsetDep = ();

fn unpack<R: io::Read>(mut rd: &mut R, _: Self::UnpackDep) -> Result<Self, io::Error> {
fn unpack<R: io::Read + AsRef<[u8]>>(
mut rd: &mut Cursor<R>,
_: Self::UnpackDep,
) -> Result<Self, io::Error> {
let offset = rd.position();

let version = rd.read_u16::<BigEndian>()?;
let num_tables = rd.read_u16::<BigEndian>()?;

let mut encoding_records = Vec::with_capacity(num_tables.min(4) as usize);
let mut raw_records = Vec::with_capacity(num_tables.min(4) as usize);
for _ in 0..num_tables {
let record = EncodingRecord::unpack(&mut rd, ())?;
let record = RawEncodingRecord::unpack(&mut rd, ())?;
// skip unsupported formats
if !matches!(
(record.platform_id, record.encoding_id),
(0, 4) | (0, 3) | (3, 10) | (3, 1)
) {
continue;
}
encoding_records.push(record);
raw_records.push(record);
}

if encoding_records.is_empty() {
if raw_records.is_empty() {
return Err(io::Error::new(
io::ErrorKind::Other,
"Font does not contain any supported CMAP",
));
}

let mut records: Vec<(u32, EncodingRecord)> = Vec::with_capacity(raw_records.len());
for raw_record in &raw_records {
let existing_subtable = records
.iter()
.find(|(offset, _)| raw_record.offset == *offset)
.map(|(_, subtable)| Rc::clone(&subtable.subtable));
if let Some(subtable) = existing_subtable {
records.push((
raw_record.offset,
EncodingRecord {
platform_id: raw_record.platform_id,
encoding_id: raw_record.encoding_id,
subtable,
},
));
continue;
}

rd.set_position(offset + (raw_record.offset) as u64);
let subtable = Subtable::unpack(&mut rd, ())?;
records.push((
raw_record.offset,
EncodingRecord {
platform_id: raw_record.platform_id,
encoding_id: raw_record.encoding_id,
subtable: Rc::new(subtable),
},
));
}
let encoding_records = records.into_iter().map(|(_, st)| st).collect();
Ok(CmapTable {
version,
num_tables,
encoding_records,
})
}

fn pack<W: io::Write>(&self, mut wr: &mut W) -> Result<(), io::Error> {
// cmap subtables
let mut encoding_records_data = Vec::new();
let mut raw_recods = Vec::with_capacity(self.encoding_records.len());

// reserve cmap table data
let mut subtable_offset = 4 + self.encoding_records.len() * 8;
let mut written_subtables = Vec::new();
for subtable in &self.encoding_records {
let prev_offset = written_subtables
.iter()
.find(|(_, other)| Rc::ptr_eq(other, &subtable.subtable))
.map(|(offset, _)| *offset);
if let Some(prev_offset) = prev_offset {
raw_recods.push(RawEncodingRecord {
platform_id: subtable.platform_id,
encoding_id: subtable.encoding_id,
offset: prev_offset,
});
continue;
}

let len_before = encoding_records_data.len();
subtable.subtable.pack(&mut encoding_records_data)?; // align to 4 bytes
let record_offset = u32::try_from(subtable_offset).ok().unwrap_or(u32::MAX);
raw_recods.push(RawEncodingRecord {
platform_id: subtable.platform_id,
encoding_id: subtable.encoding_id,
offset: record_offset,
});
written_subtables.push((record_offset, subtable.subtable.clone()));
subtable_offset += encoding_records_data.len() - len_before;
}

wr.write_u16::<BigEndian>(self.version)?;
wr.write_u16::<BigEndian>(self.num_tables)?;
for table in &self.encoding_records {
table.pack(&mut wr)?;
wr.write_u16::<BigEndian>(self.encoding_records.len() as u16)?;
for record in raw_recods {
record.pack(&mut wr)?;
}
wr.write_all(&encoding_records_data)?;

Ok(())
}

fn subset(&'a self, glyphs: &[Glyph], _dep: Self::SubsetDep) -> Cow<'a, Self>
where
Self: Clone,
{
let mut subsetted_subtables: Vec<(Rc<Subtable>, Rc<Subtable>)> = Vec::new();
let encoding_records = self
.encoding_records
.iter()
.map(|entry| {
let new_subtable = subsetted_subtables
.iter()
.find(|(prev, _)| Rc::ptr_eq(prev, &entry.subtable))
.map(|(_, new_subtable)| new_subtable.clone())
.unwrap_or_else(|| {
let new_subtable = Rc::new(entry.subtable.subset(&glyphs, ()).into_owned());
subsetted_subtables.push((entry.subtable.clone(), new_subtable.clone()));
new_subtable
});

EncodingRecord {
platform_id: entry.platform_id,
encoding_id: entry.encoding_id,
subtable: new_subtable,
}
})
.collect();

Cow::Owned(CmapTable {
version: self.version,
encoding_records,
})
}
}

pub struct RawEncodingRecord {
platform_id: u16,
encoding_id: u16,
/// Byte offset from beginning of table to the subtable for this encoding.
offset: u32,
}

#[derive(Debug, PartialEq, Clone)]
pub struct EncodingRecord {
pub(crate) platform_id: u16,
pub(crate) encoding_id: u16,
/// Byte offset from beginning of table to the subtable for this encoding.
pub(crate) offset: u32,
pub(crate) subtable: Rc<Subtable>,
}

impl<'a> FontTable<'a> for EncodingRecord {
impl<'a> FontTable<'a> for RawEncodingRecord {
type UnpackDep = ();
type SubsetDep = ();

fn unpack<R: io::Read>(rd: &mut R, _: Self::UnpackDep) -> Result<Self, io::Error> {
Ok(EncodingRecord {
fn unpack<R: io::Read + AsRef<[u8]>>(
rd: &mut Cursor<R>,
_: Self::UnpackDep,
) -> Result<Self, io::Error> {
Ok(RawEncodingRecord {
platform_id: rd.read_u16::<BigEndian>()?,
encoding_id: rd.read_u16::<BigEndian>()?,
offset: rd.read_u32::<BigEndian>()?,
Expand Down Expand Up @@ -127,23 +246,26 @@ impl<'a> FontTable<'a> for Subtable {
type UnpackDep = ();
type SubsetDep = ();

fn unpack<R: io::Read>(rd: &mut R, _: Self::UnpackDep) -> Result<Self, io::Error> {
fn unpack<R: io::Read + AsRef<[u8]>>(
rd: &mut Cursor<R>,
_: Self::UnpackDep,
) -> Result<Self, io::Error> {
let format = rd.read_u16::<BigEndian>()?;

match format {
4 => {
let mut length = rd.read_u16::<BigEndian>()?;
// length excluding format and length
length -= (mem::size_of::<u16>() * 2) as u16;
let mut rd = LimitRead::new(rd, length as usize);
let mut rd = Cursor::new(LimitRead::from_cursor(rd, length as usize));
Ok(Subtable::Format4(Format4::unpack(&mut rd, ())?))
}
12 => {
rd.read_u16::<BigEndian>()?; // reserved
let mut length = rd.read_u32::<BigEndian>()?;
// length excluding format, reserved and length
length -= (mem::size_of::<u16>() * 2 + mem::size_of::<u32>()) as u32;
let mut rd = LimitRead::new(rd, length as usize);
let mut rd = Cursor::new(LimitRead::from_cursor(rd, length as usize));
Ok(Subtable::Format12(Format12::unpack(&mut rd, ())?))
}
_ => Err(io::Error::new(
Expand Down Expand Up @@ -225,38 +347,25 @@ mod test {
.unwrap();

assert_eq!(cmap_table.version, 0);
assert_eq!(cmap_table.num_tables, 4);
assert_eq!(
cmap_table.encoding_records,
vec![
EncodingRecord {
platform_id: 0,
encoding_id: 3,
offset: 36,
},
EncodingRecord {
platform_id: 0,
encoding_id: 4,
offset: 1740,
},
EncodingRecord {
platform_id: 3,
encoding_id: 1,
offset: 36,
},
EncodingRecord {
platform_id: 3,
encoding_id: 10,
offset: 1740,
},
]
);
assert_eq!(cmap_table.encoding_records.len(), 4);

assert_eq!(cmap_table.encoding_records[0].platform_id, 0);
assert_eq!(cmap_table.encoding_records[0].encoding_id, 3);

assert_eq!(cmap_table.encoding_records[1].platform_id, 0);
assert_eq!(cmap_table.encoding_records[1].encoding_id, 4);

assert_eq!(cmap_table.encoding_records[2].platform_id, 3);
assert_eq!(cmap_table.encoding_records[2].encoding_id, 1);

assert_eq!(cmap_table.encoding_records[3].platform_id, 3);
assert_eq!(cmap_table.encoding_records[3].encoding_id, 10);

// re-pack and compare
let mut buffer = Vec::new();
cmap_table.pack(&mut buffer).unwrap();
assert_eq!(
CmapTable::unpack(&mut Cursor::new(buffer), ()).unwrap(),
CmapTable::unpack(&mut Cursor::new(&buffer[..]), ()).unwrap(),
cmap_table
);
}
Expand All @@ -266,21 +375,17 @@ mod test {
let data = include_bytes!("../../tests/fonts/Iosevka/iosevka-regular.ttf").to_vec();
let mut cursor = Cursor::new(&data[..]);
let table = OffsetTable::unpack(&mut cursor, ()).unwrap();
let cmap_record = table.get_table_record("cmap").unwrap();
let cmap_table: CmapTable = table
.unpack_required_table("cmap", (), &mut cursor)
.unwrap();

for record in &cmap_table.encoding_records {
cursor.set_position((cmap_record.offset + record.offset) as u64);
let subtable = Subtable::unpack(&mut cursor, ()).unwrap();

// re-pack and compare
let mut buffer = Vec::new();
subtable.pack(&mut buffer).unwrap();
record.subtable.pack(&mut buffer).unwrap();
assert_eq!(
Subtable::unpack(&mut Cursor::new(buffer), ()).unwrap(),
subtable
&Subtable::unpack(&mut Cursor::new(&buffer[..]), ()).unwrap(),
record.subtable.as_ref()
);
}
}
Expand Down
23 changes: 13 additions & 10 deletions otf/src/tables/cmap/format12.rs
@@ -1,6 +1,6 @@
use std::borrow::Cow;
use std::convert::TryFrom;
use std::io;
use std::io::{self, Cursor};

use crate::tables::{FontTable, Glyph};
use byteorder::{BigEndian, ReadBytesExt, WriteBytesExt};
Expand Down Expand Up @@ -43,7 +43,10 @@ impl<'a> FontTable<'a> for Format12 {
type UnpackDep = ();
type SubsetDep = ();

fn unpack<R: io::Read>(mut rd: &mut R, _: Self::UnpackDep) -> Result<Self, io::Error> {
fn unpack<R: io::Read + AsRef<[u8]>>(
mut rd: &mut Cursor<R>,
_: Self::UnpackDep,
) -> Result<Self, io::Error> {
let language = rd.read_u32::<BigEndian>()?;
let num_groups = rd.read_u32::<BigEndian>()?;

Expand Down Expand Up @@ -136,7 +139,10 @@ impl<'a> FontTable<'a> for SequentialMapGroup {
type UnpackDep = ();
type SubsetDep = ();

fn unpack<R: io::Read>(rd: &mut R, _: Self::UnpackDep) -> Result<Self, io::Error> {
fn unpack<R: io::Read + AsRef<[u8]>>(
rd: &mut Cursor<R>,
_: Self::UnpackDep,
) -> Result<Self, io::Error> {
Ok(SequentialMapGroup {
start_char_code: rd.read_u32::<BigEndian>()?,
end_char_code: rd.read_u32::<BigEndian>()?,
Expand All @@ -154,7 +160,7 @@ impl<'a> FontTable<'a> for SequentialMapGroup {

#[cfg(test)]
mod test {
use std::io::Cursor;
use std::rc::Rc;

use super::*;
use crate::tables::cmap::{CmapTable, Subtable};
Expand All @@ -164,20 +170,17 @@ mod test {
let data = include_bytes!("../../../tests/fonts/Iosevka/iosevka-regular.ttf").to_vec();
let mut cursor = Cursor::new(&data[..]);
let table = OffsetTable::unpack(&mut cursor, ()).unwrap();
let cmap_record = table.get_table_record("cmap").unwrap();
let cmap_table: CmapTable = table
.unpack_required_table("cmap", (), &mut cursor)
.unwrap();

let record = cmap_table
.encoding_records
.iter()
.into_iter()
.find(|r| r.platform_id == 0 && r.encoding_id == 4)
.unwrap();

cursor.set_position((cmap_record.offset + record.offset) as u64);
let subtable = Subtable::unpack(&mut cursor, ()).unwrap();
match subtable {
match Rc::try_unwrap(record.subtable).unwrap() {
Subtable::Format12(subtable) => subtable,
_ => panic!("Expected format 12 subtable"),
}
Expand All @@ -196,7 +199,7 @@ mod test {
let mut buffer = Vec::new();
format12.pack(&mut buffer).unwrap();
assert_eq!(
Format12::unpack(&mut Cursor::new(buffer), ()).unwrap(),
Format12::unpack(&mut Cursor::new(&buffer[..]), ()).unwrap(),
format12
);
}
Expand Down

0 comments on commit bef32ab

Please sign in to comment.