Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ edition = "2024"
members = [
"base64lab",
"bit_stream"
]
, "huffman_codes"]
resolver = "2"

[profile.test]
Expand Down
96 changes: 88 additions & 8 deletions bit_stream/src/bit_stream.rs
Original file line number Diff line number Diff line change
@@ -1,20 +1,21 @@
use std::fs::File;
use std::io::{Read, Write, Seek, SeekFrom};
use std::os::unix::fs::FileExt;

#[derive(Debug)]
pub struct BitStreamWriter {
file: File,
bit_len: usize,
current_byte: u8,
bit_count: usize,
pub file: File,
pub bytes_written: usize,
pub current_byte: u8,
pub bit_count: usize,
}

impl BitStreamWriter {
pub fn create(path: &str) -> std::io::Result<Self> {
let file = File::create(path)?;
Ok(Self {
file,
bit_len: 0,
bytes_written: 0,
current_byte: 0u8,
bit_count: 0,
})
Expand Down Expand Up @@ -46,9 +47,35 @@ impl BitStreamWriter {
buffer.push(self.current_byte);
}

self.file.seek(SeekFrom::Start(self.bit_len as u64))?;
self.file.seek(SeekFrom::Start(self.bytes_written as u64))?;

if self.bit_count % 8 == 0 {
self.bytes_written += buffer.len();
} else {
self.bytes_written += buffer.len() - 1
}

self.file.write_all(&buffer)?;
self.bit_len += to_take / 8;

// self.bit_len += to_take / 8;

Ok(())
}

pub fn write_byte_sequence_unchecked_at(&mut self, bytes: &[u8], start: u64) -> std::io::Result<()> {
self.file.seek(SeekFrom::Start(start))?;
self.file.write_all(&bytes)?;

Ok(())
}

pub fn skip_bytes(&mut self, amount: usize) {
self.bytes_written += amount;
}

pub fn finish(self) -> std::io::Result<()> {


Ok(())
}
}
Expand Down Expand Up @@ -96,6 +123,59 @@ impl BitStreamReader {

Ok(out)
}

pub fn read_all(path: &str) -> std::io::Result<Vec<u8>> {
let mut reader = Self::open(path)?;
let mut buf = Vec::new();
reader.file.read_to_end(&mut buf)?;

Ok(buf)
}

pub fn bit_len(&self) -> usize {
self.bit_len
}

pub fn byte_len(&self) -> usize {
(self.bit_len + 7) / 8
}
}

pub struct BitReader<'a> {
bytes: &'a [u8],
current_byte: usize,
current_bit: u8,
}

impl<'a> BitReader<'a> {
pub fn new(bytes: &'a [u8] ) -> Self {
Self {
bytes,
current_byte: 0,
current_bit: 0,
}
}
}

impl<'a> Iterator for BitReader<'a> {
type Item = bool;

fn next(&mut self) -> Option<Self::Item> {
if self.current_byte >= self.bytes.len() {
return None;
}

let byte = self.bytes[self.current_byte];
let bit = (byte >> self.current_bit) & 1;

self.current_bit += 1;
if self.current_bit == 8 {
self.current_bit = 0;
self.current_byte += 1;
}

Some(bit == 1)
}
}


Expand Down Expand Up @@ -162,4 +242,4 @@ mod tests {
[0x77],
);
}
}
}
9 changes: 9 additions & 0 deletions decoded_file
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
Lorem ipsum dolor sit amet, consectetur adipiscing elit. Maecenas posuere vehicula metus id convallis. Etiam consequat ut quam non hendrerit. Nullam sit amet lobortis quam. Aliquam rhoncus feugiat pellentesque. Fusce dictum sem at mauris hendrerit bibendum. Integer vitae nisl nisl. Suspendisse potenti. Vestibulum mollis dui eget facilisis laoreet. Pellentesque nec odio interdum, pharetra nisl a, luctus magna. Aliquam erat volutpat.

Suspendisse eu ligula dui. Praesent auctor urna volutpat, sollicitudin turpis non, tempus justo. Nulla malesuada ex ac aliquam laoreet. Integer aliquet nisl egestas finibus mattis. Duis at congue massa. Vivamus sed mi eget turpis imperdiet venenatis. Curabitur pulvinar lectus enim, ac convallis nulla imperdiet eu. Cras ultrices feugiat nisl, quis aliquet elit maximus quis. Cras scelerisque ac nisl vel venenatis.

Etiam vel risus diam. Proin pellentesque purus eget augue commodo, a aliquam odio fermentum. Quisque maximus convallis egestas. Aliquam tempus sed arcu eget varius. Sed libero nulla, tempor hendrerit nulla ut, scelerisque molestie ligula. Aliquam erat volutpat. Vestibulum ut nulla magna. Integer id facilisis diam, ac viverra erat. Praesent molestie consectetur lacus, et auctor eros rhoncus eget. Donec arcu turpis, dictum dignissim condimentum rhoncus, ornare non massa. In facilisis, justo eget aliquam placerat, mi magna lacinia eros, ac pretium velit nibh id metus. Nulla vel diam eget augue auctor elementum. Praesent porta sodales arcu id sollicitudin.

Sed eget orci eu purus fringilla ullamcorper. Cras consequat arcu ac tincidunt cursus. Donec lorem ex, lobortis nec sapien eu, consectetur pretium tellus. Proin accumsan quam nisi. Nullam viverra ante vitae sapien convallis, eget tempor diam fringilla. Nulla eu bibendum risus, vehicula venenatis metus. In tortor dui, tincidunt imperdiet molestie sed, pharetra ut enim. Fusce hendrerit placerat sem ac blandit. Proin condimentum sapien quis justo finibus sodales. Phasellus vestibulum tristique ex at commodo. Integer scelerisque justo ut felis accumsan vulputate. Nulla ac imperdiet turpis, non ultricies leo.

Fusce vitae felis eget nisi semper dapibus sed ut tortor. Duis euismod laoreet justo, quis cursus lectus pulvinar ut. Interdum et malesuada fames ac ante ipsum primis in faucibus. Pellentesque feugiat erat ut erat pellentesque semper. Fusce et mauris ac tortor rhoncus tincidunt et et velit. Vivamus bibendum eros urna, eu fringilla nunc blandit sed. Pellentesque non quam id lorem euismod porttitor mattis non sem. Etiam dictum dapibus massa malesuada placerat. Duis pharetra tristique lacus vel vestibulum. Duis non libero molestie, tristique velit vitae, lobortis turpis. Curabitur luctus ante quis tortor luctus dignissim ac ac enim.
Binary file added encoded_file
Binary file not shown.
11 changes: 11 additions & 0 deletions huffman_codes/Cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
[package]
name = "huffman_codes"
version = "0.1.0"
edition = "2024"

[[bin]]
name = "lab3"
path = "src/bin/main.rs"

[dependencies]
bit_stream = { path = "../bit_stream" }
116 changes: 116 additions & 0 deletions huffman_codes/src/algorithms.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,116 @@
use std::io::ErrorKind;

use bit_stream::{BitStreamReader, BitStreamWriter, BitReader};
use crate::tree::{Direction, Node};

pub fn encode(input_path: &str, output_path: &str) -> std::io::Result<()> {
let file_bytes = BitStreamReader::read_all(input_path)?;

let mut node_weights= vec![0; 256];
for byte in file_bytes {
node_weights[byte as usize] += 1;
}

println!("node_weights: {:?}", node_weights);

let mut non_zero_nodes = Vec::new();
for (byte, weight) in node_weights.iter().enumerate() {
if *weight != 0 {
non_zero_nodes.push(Node::Leaf {
value: byte as u8,
weight: *weight,
});
}
}

let huffman_root = Node::huffman_tree(non_zero_nodes).unwrap();

// write dict
let mut writer = BitStreamWriter::create(output_path)?;
writer.skip_bytes(8);
for weight in node_weights {
writer.write_bit_sequence(weight.to_le_bytes().as_slice(), 32)?
}

let dictionary = huffman_root.generate_dict();

// encode file
let bytes = BitStreamReader::read_all(input_path)?;
for byte in bytes {
let (numeric_repr, bit_len) = dictionary[&byte];
let bytes = numeric_repr.to_le_bytes();
writer.write_bit_sequence(&bytes, bit_len as usize)?
}

let data_length = (writer.bytes_written - 8 - 1024) * 8 + writer.bit_count;
println!("data_length: {}", data_length);
writer.write_byte_sequence_unchecked_at(&data_length.to_le_bytes(), 0)?;

Ok(())
}

pub fn decode(input_path: &str, output_path: &str) -> std::io::Result<()> {
let mut reader: BitStreamReader = BitStreamReader::open(input_path)?;

let length_bytes = reader.read_bit_sequence(64)?;
let length_bytes_slice = <&[u8; 8]>::try_from(&*length_bytes)
.map_err(|_| std::io::Error::new(ErrorKind::InvalidData, "slice length is not 8"))?;

let data_length = u64::from_le_bytes(*length_bytes_slice);
println!("data_length: {}", data_length);

let dict_files_bytes: Vec<u8> = reader.read_bit_sequence(1024 * 8)?;
let mut byte_weights: Vec<u32> = Vec::from([0u32; 256]);
for (i, ctr) in (0..dict_files_bytes.len()).step_by(4).enumerate() {
if let Ok(arr) = <&[u8; 4]>::try_from(&dict_files_bytes[ctr..ctr + 4]) {
byte_weights[i] = u32::from_le_bytes(*arr);
}
}

println!("byte_weights: {:?}", byte_weights);

let mut nodes: Vec<Node> = Vec::new();
for (byte, weight) in byte_weights.iter().enumerate() {
if *weight != 0 {
nodes.push(Node::Leaf {
value: byte as u8,
weight: *weight,
});
}
}

let huffman_root = Node::huffman_tree(nodes).unwrap();

// decode file
let mut writer: BitStreamWriter = BitStreamWriter::create(output_path)?;
let mut current_node = &huffman_root;
let mut processed_bits = 0;
let read_window = 8 * 2048;

let mut bytes = reader.read_bit_sequence(read_window)?;
while !bytes.is_empty() {
for bit in BitReader::new(&bytes) {
let dir = match bit {
true => Direction::Right,
false => Direction::Left,
};
if let Some(potential_leaf) = current_node.get_child(dir) {
if potential_leaf.is_leaf() {
writer.write_bit_sequence(&[potential_leaf.value().unwrap()], 8)?;
current_node = &huffman_root;
} else {
current_node = potential_leaf;
}
}

processed_bits += 1;
if processed_bits == data_length {
break;
}
}

bytes = reader.read_bit_sequence(read_window)?;
}

Ok(())
}
23 changes: 23 additions & 0 deletions huffman_codes/src/bin/main.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
use huffman_codes::algorithms::{encode, decode};
use std::time::Instant;

fn run() {
// let input_path = "sample3.xls";
let input_path = "test.txt";
let encoded_file_path = "encoded_file";
let decoded_file_path = "decoded_file";

println!("Encoding file ...");
let start = Instant::now();
encode(&input_path, &encoded_file_path).unwrap();
println!("Encoding time: {:?}", start.elapsed());

println!("Decode file ...");
let start = Instant::now();
decode(&encoded_file_path, &decoded_file_path).unwrap();
println!("Decoding time: {:?}", start.elapsed());
}

fn main() {
run();
}
2 changes: 2 additions & 0 deletions huffman_codes/src/lib.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
pub mod tree;
pub mod algorithms;
Loading