Skip to content

Commit

Permalink
move primary context out of main
Browse files Browse the repository at this point in the history
  • Loading branch information
thanhminhmr committed May 20, 2023
1 parent 37b23aa commit fe5baea
Show file tree
Hide file tree
Showing 5 changed files with 230 additions and 151 deletions.
162 changes: 11 additions & 151 deletions src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
*/

use crate::basic::{AnyError, AnyResult, Closable};
use crate::primary_context::{ByteMatched, PrimaryContext};
use crate::secondary_context::{Bit, BitDecoder, BitEncoder, SecondaryContext};
use std::cmp::min;
use std::fs::File;
Expand All @@ -31,6 +32,7 @@ use std::time::Instant;
use std::{env, thread};

mod basic;
mod primary_context;
mod secondary_context;

// =================================================================================================
Expand Down Expand Up @@ -248,160 +250,18 @@ impl<W: Write + Send + 'static> ThreadedEncoder<W> {

//endregion Secondary Context
// =================================================================================================
//region Symbol Ranking

// -------------------------------------------------------------------------------------------------
//region Matching Context

enum ByteMatched {
FIRST,
SECOND,
THIRD,
NONE,
}

#[derive(Clone)]
struct MatchingContext(u32);

impl MatchingContext {
fn new() -> Self {
Self(0)
}

fn get(&self) -> (u8, u8, u8, usize) {
(
self.0 as u8, // first byte
(self.0 >> 8) as u8, // second byte
(self.0 >> 16) as u8, // third byte
(self.0 >> 24) as usize, // count
)
}

fn matching(&mut self, next_byte: u8) -> ByteMatched {
let mask: u32 = self.0 ^ (0x10101 * next_byte as u32);
return if (mask & 0x0000FF) == 0 {
// mask for the first byte
// increase count by 1, capped at 255
self.0 += if self.0 < 0xFF000000 { 0x01000000 } else { 0 };

ByteMatched::FIRST
} else if (mask & 0x00FF00) == 0 {
// mask for the second byte
self.0 = (self.0 & 0xFF0000) // keep the third byte
| ((self.0 << 8) & 0xFF00) // bring the old first byte to second place
| next_byte as u32 // set the first byte
| 0x1000000; // set count to 1

ByteMatched::SECOND
} else if (mask & 0xFF0000) == 0 {
// mask for the third byte
self.0 = ((self.0 << 8) & 0xFFFF00) // move old first/second to second/third
| next_byte as u32 // set the first byte
| 0x1000000; // set count to 1

ByteMatched::THIRD
} else {
// not match
self.0 = ((self.0 << 8) & 0xFFFF00) // move old first/second to second/third
| next_byte as u32; // set the first byte

ByteMatched::NONE
};
}

fn matched(&mut self, next_byte: u8, matched: ByteMatched) {
match matched {
ByteMatched::FIRST => {
// first byte
// increase count by 1, capped at 255
self.0 += if self.0 < 0xFF000000 { 0x01000000 } else { 0 };
}
ByteMatched::SECOND => {
// second byte
self.0 = (self.0 & 0xFF0000) // keep the third byte
| ((self.0 << 8) & 0xFF00) // bring the old first byte to second place
| next_byte as u32 // set the first byte
| 0x1000000; // set count to 1
}
ByteMatched::THIRD => {
// third byte
self.0 = ((self.0 << 8) & 0xFFFF00) // move old first/second to second/third
| next_byte as u32 // set the first byte
| 0x1000000; // set count to 1
}
ByteMatched::NONE => {
// not match
self.0 = ((self.0 << 8) & 0xFFFF00) // move old first/second to second/third
| next_byte as u32; // set the first byte
}
}
}
}

// endregion Matching Context
// -------------------------------------------------------------------------------------------------
//region Matching Contexts

struct MatchingContexts {
last_byte: u8,
hash_value: usize,
contexts: Box<[MatchingContext]>,
}

impl MatchingContexts {
fn new(size_log: usize) -> Self {
Self {
last_byte: 0,
hash_value: 0,
contexts: vec![MatchingContext::new(); 1 << size_log].into_boxed_slice(),
}
}

fn get_last_byte(&self) -> u8 {
self.last_byte
}
fn get_hash_value(&self) -> usize {
self.hash_value
}
fn get_context(&self) -> &MatchingContext {
&self.contexts[self.hash_value]
}

fn matching(&mut self, next_byte: u8) -> ByteMatched {
let matching_byte: ByteMatched = self.contexts[self.hash_value].matching(next_byte);
self.last_byte = next_byte;
self.hash_value =
(self.hash_value * (5 << 5) + next_byte as usize + 1) & (self.contexts.len() - 1);
debug_assert!(self.hash_value < self.contexts.len());
return matching_byte;
}

fn matched(&mut self, next_byte: u8, matched: ByteMatched) {
self.contexts[self.hash_value].matched(next_byte, matched);
self.last_byte = next_byte;
self.hash_value =
(self.hash_value * (5 << 5) + next_byte as usize + 1) & (self.contexts.len() - 1);
debug_assert!(self.hash_value < self.contexts.len());
}
}

// endregion Matching Contexts
// -------------------------------------------------------------------------------------------------

//endregion Symbol Ranking
// =================================================================================================
//region Stream Encoder/Decoder

// -------------------------------------------------------------------------------------------------
//region StreamContext

const PRIMARY_CONTEXT_SIZE_LOG: usize = 24;
const SECONDARY_CONTEXT_SIZE: usize = (1024 + 32) * 768 + 0x400000;
const PRIMARY_CONTEXT_SIZE: usize = 1 << 24;
const SECONDARY_CONTEXT_SIZE: usize = 0x4000 * 256 + (1024 + 32) * 768;

struct StreamContexts(MatchingContexts);
struct StreamContexts(PrimaryContext<PRIMARY_CONTEXT_SIZE>);

impl Deref for StreamContexts {
type Target = MatchingContexts;
type Target = PrimaryContext<PRIMARY_CONTEXT_SIZE>;
fn deref(&self) -> &Self::Target {
&self.0
}
Expand All @@ -415,25 +275,25 @@ impl DerefMut for StreamContexts {

impl StreamContexts {
fn new() -> Self {
Self(MatchingContexts::new(PRIMARY_CONTEXT_SIZE_LOG))
Self(PrimaryContext::new())
}

fn calculate_context(&self) -> (u8, u8, u8, usize, usize, usize, usize) {
let (first_byte, second_byte, third_byte, count) = self.get_context().get();
let (last_byte, first_byte, second_byte, third_byte, count, hash_value) = self.get();

let bit_context: usize = if count < 4 {
((self.get_last_byte() as usize) << 2) | count
((last_byte as usize) << 2) | count
} else {
1024 + (min(count - 4, 63) >> 1)
} * 768
+ 0x400000;
+ 0x4000 * 256;

let first_context: usize = bit_context + first_byte as usize;
let second_context: usize =
bit_context + 256 + second_byte.wrapping_add(third_byte) as usize;
let third_context: usize =
bit_context + 512 + second_byte.wrapping_mul(2).wrapping_sub(third_byte) as usize;
let literal_context: usize = (self.get_hash_value() & 0x3FFF) * 256;
let literal_context: usize = (hash_value & 0x3FFF) * 256;

return (
first_byte,
Expand Down
71 changes: 71 additions & 0 deletions src/primary_context/context.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
/*
* srx: The fast Symbol Ranking based compressor.
* Copyright (C) 2023 Mai Thanh Minh (a.k.a. thanhminhmr)
*
* This program is free software: you can redistribute it and/or modify it under
* the terms of the GNU General Public License as published by the Free Software
* Foundation, either version 3 of the License, or (at your option) any later
* version.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
* FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
* details.
*
* You should have received a copy of the GNU General Public License along with
* this program. If not, see <https://www.gnu.org/licenses/>.
*/

use crate::primary_context::matched::ByteMatched;
use crate::primary_context::history::ByteHistory;

// -----------------------------------------------

pub struct PrimaryContext<const SIZE: usize> {
last_byte: u8,
hash_value: usize,
contexts: Box<[ByteHistory; SIZE]>,
}

impl<const SIZE: usize> PrimaryContext<SIZE> {
// assert that SIZE is power of 2
const _SIZE_CHECK: () = assert!(SIZE != 0 && (SIZE & (SIZE - 1)) == 0);

pub fn new() -> Self {
Self {
last_byte: 0,
hash_value: 0,
contexts: Box::new([ByteHistory::new(); SIZE]),
}
}

pub fn get(&self) -> (u8, u8, u8, u8, usize, usize) {
let (first_byte, second_byte, third_byte, count): (u8, u8, u8, usize) =
self.contexts[self.hash_value].get();
return (
self.last_byte,
first_byte,
second_byte,
third_byte,
count,
self.hash_value,
);
}

fn update(&mut self, next_byte: u8) {
self.last_byte = next_byte;
self.hash_value = (self.hash_value * (5 << 5) + next_byte as usize + 1) % SIZE;
debug_assert!(self.hash_value < SIZE);
}

pub fn matching(&mut self, next_byte: u8) -> ByteMatched {
let matching_byte: ByteMatched = self.contexts[self.hash_value].matching(next_byte);
self.update(next_byte);
return matching_byte;
}

pub fn matched(&mut self, next_byte: u8, matched: ByteMatched) {
self.contexts[self.hash_value].matched(next_byte, matched);
self.update(next_byte);
}
}
99 changes: 99 additions & 0 deletions src/primary_context/history.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
/*
* srx: The fast Symbol Ranking based compressor.
* Copyright (C) 2023 Mai Thanh Minh (a.k.a. thanhminhmr)
*
* This program is free software: you can redistribute it and/or modify it under
* the terms of the GNU General Public License as published by the Free Software
* Foundation, either version 3 of the License, or (at your option) any later
* version.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
* FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
* details.
*
* You should have received a copy of the GNU General Public License along with
* this program. If not, see <https://www.gnu.org/licenses/>.
*/

use crate::primary_context::matched::ByteMatched;

// -----------------------------------------------

#[derive(Clone, Copy)]
pub struct ByteHistory(u32);

impl ByteHistory {
pub fn new() -> Self {
Self(0)
}

pub fn get(&self) -> (u8, u8, u8, usize) {
(
self.0 as u8, // first byte
(self.0 >> 8) as u8, // second byte
(self.0 >> 16) as u8, // third byte
(self.0 >> 24) as usize, // count TODO would a state machine here be better?
)
}

pub fn matching(&mut self, next_byte: u8) -> ByteMatched {
let mask: u32 = self.0 ^ (0x10101 * next_byte as u32);
return if (mask & 0x0000FF) == 0 {
// mask for the first byte
// increase count by 1, capped at 255
self.0 += if self.0 < 0xFF000000 { 0x01000000 } else { 0 };

ByteMatched::FIRST
} else if (mask & 0x00FF00) == 0 {
// mask for the second byte
self.0 = (self.0 & 0xFF0000) // keep the third byte
| ((self.0 << 8) & 0xFF00) // bring the old first byte to second place
| next_byte as u32 // set the first byte
| 0x1000000; // set count to 1

ByteMatched::SECOND
} else if (mask & 0xFF0000) == 0 {
// mask for the third byte
self.0 = ((self.0 << 8) & 0xFFFF00) // move old first/second to second/third
| next_byte as u32 // set the first byte
| 0x1000000; // set count to 1

ByteMatched::THIRD
} else {
// not match
self.0 = ((self.0 << 8) & 0xFFFF00) // move old first/second to second/third
| next_byte as u32; // set the first byte

ByteMatched::NONE
};
}

pub fn matched(&mut self, next_byte: u8, matched: ByteMatched) {
match matched {
ByteMatched::FIRST => {
// first byte
// increase count by 1, capped at 255
self.0 += if self.0 < 0xFF000000 { 0x01000000 } else { 0 };
}
ByteMatched::SECOND => {
// second byte
self.0 = (self.0 & 0xFF0000) // keep the third byte
| ((self.0 << 8) & 0xFF00) // bring the old first byte to second place
| next_byte as u32 // set the first byte
| 0x1000000; // set count to 1
}
ByteMatched::THIRD => {
// third byte
self.0 = ((self.0 << 8) & 0xFFFF00) // move old first/second to second/third
| next_byte as u32 // set the first byte
| 0x1000000; // set count to 1
}
ByteMatched::NONE => {
// not match
self.0 = ((self.0 << 8) & 0xFFFF00) // move old first/second to second/third
| next_byte as u32; // set the first byte
}
}
}
}

0 comments on commit fe5baea

Please sign in to comment.