Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions ext/parsekit/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,8 @@ tesseract-rs = "0.1" # Tesseract with optional bundling
image = "0.25" # Image processing library (match rusty-tesseract's version)
calamine = "0.30" # Excel parsing
docx-rs = "0.4" # Word document parsing
quick-xml = "0.38" # XML parsing
zip = "2.1" # ZIP archive handling for PPTX
quick-xml = "0.36" # XML parsing
serde_json = "1.0" # JSON parsing
regex = "1.10" # Text parsing
encoding_rs = "0.8" # Encoding detection
Expand All @@ -33,4 +33,4 @@ bundled-tesseract = []
[profile.release]
opt-level = 3
lto = true
codegen-units = 1
codegen-units = 1
14 changes: 7 additions & 7 deletions ext/parsekit/src/error.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
use magnus::{exception, Error, RModule, Ruby, Module};
use magnus::{Error, RModule, Ruby, Module};

/// Custom error types for ParseKit
#[derive(Debug)]
Expand All @@ -15,13 +15,13 @@ impl ParserError {
pub fn to_error(&self) -> Error {
match self {
ParserError::ParseError(msg) => {
Error::new(exception::runtime_error(), msg.clone())
Error::new(Ruby::get().unwrap().exception_runtime_error(), msg.clone())
}
ParserError::ConfigError(msg) => {
Error::new(exception::arg_error(), msg.clone())
Error::new(Ruby::get().unwrap().exception_arg_error(), msg.clone())
}
ParserError::IoError(msg) => {
Error::new(exception::io_error(), msg.clone())
Error::new(Ruby::get().unwrap().exception_io_error(), msg.clone())
}
}
}
Expand All @@ -37,9 +37,9 @@ pub fn init(_ruby: &Ruby, module: RModule) -> Result<(), Error> {

// Define error classes as regular Ruby classes
// Users can still rescue them by name in Ruby code
let _error = module.define_class("Error", magnus::class::object())?;
let _parse_error = module.define_class("ParseError", magnus::class::object())?;
let _config_error = module.define_class("ConfigError", magnus::class::object())?;
let _error = module.define_class("Error", Ruby::get().unwrap().class_object())?;
let _parse_error = module.define_class("ParseError", Ruby::get().unwrap().class_object())?;
let _config_error = module.define_class("ConfigError", Ruby::get().unwrap().class_object())?;

Ok(())
}
36 changes: 18 additions & 18 deletions ext/parsekit/src/parser.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
use magnus::{
class, function, method, prelude::*, scan_args, Error, Module, RHash, RModule, Ruby, Value,
function, method, prelude::*, scan_args, Error, Module, RHash, RModule, Ruby, Value,
};
use std::path::Path;

Expand Down Expand Up @@ -59,7 +59,7 @@ impl Parser {
// Check size limit
if data.len() > self.config.max_size {
return Err(Error::new(
magnus::exception::runtime_error(),
Ruby::get().unwrap().exception_runtime_error(),
format!(
"File size {} exceeds maximum allowed size {}",
data.len(),
Expand Down Expand Up @@ -192,7 +192,7 @@ impl Parser {

if let Err(e) = init_result {
return Err(Error::new(
magnus::exception::runtime_error(),
Ruby::get().unwrap().exception_runtime_error(),
format!("Failed to initialize Tesseract: {:?}", e),
))
}
Expand All @@ -201,7 +201,7 @@ impl Parser {
let img = match image::load_from_memory(&data) {
Ok(img) => img,
Err(e) => return Err(Error::new(
magnus::exception::runtime_error(),
Ruby::get().unwrap().exception_runtime_error(),
format!("Failed to load image: {}", e),
))
};
Expand All @@ -220,7 +220,7 @@ impl Parser {
(width * 4) as i32, // bytes per line
) {
return Err(Error::new(
magnus::exception::runtime_error(),
Ruby::get().unwrap().exception_runtime_error(),
format!("Failed to set image: {}", e),
))
}
Expand All @@ -229,7 +229,7 @@ impl Parser {
match tesseract.get_utf8_text() {
Ok(text) => Ok(text.trim().to_string()),
Err(e) => Err(Error::new(
magnus::exception::runtime_error(),
Ruby::get().unwrap().exception_runtime_error(),
format!("Failed to perform OCR: {}", e),
)),
}
Expand All @@ -251,7 +251,7 @@ impl Parser {
Ok(count) => count,
Err(e) => {
return Err(Error::new(
magnus::exception::runtime_error(),
Ruby::get().unwrap().exception_runtime_error(),
format!("Failed to get page count: {}", e),
))
}
Expand Down Expand Up @@ -284,7 +284,7 @@ impl Parser {
}
}
Err(e) => Err(Error::new(
magnus::exception::runtime_error(),
Ruby::get().unwrap().exception_runtime_error(),
format!("Failed to parse PDF: {}", e),
)),
}
Expand Down Expand Up @@ -323,7 +323,7 @@ impl Parser {
Ok(result.trim().to_string())
}
Err(e) => Err(Error::new(
magnus::exception::runtime_error(),
Ruby::get().unwrap().exception_runtime_error(),
format!("Failed to parse DOCX file: {}", e),
)),
}
Expand All @@ -339,7 +339,7 @@ impl Parser {
Ok(archive) => archive,
Err(e) => {
return Err(Error::new(
magnus::exception::runtime_error(),
Ruby::get().unwrap().exception_runtime_error(),
format!("Failed to open PPTX as ZIP: {}", e),
))
}
Expand Down Expand Up @@ -441,7 +441,7 @@ impl Parser {
}
Ok(Event::Text(e)) => {
if in_text_element {
if let Ok(text) = e.unescape() {
if let Ok(text) = e.decode() {
let text_str = text.trim();
if !text_str.is_empty() {
text_parts.push(text_str.to_string());
Expand Down Expand Up @@ -493,7 +493,7 @@ impl Parser {
Ok(result)
}
Err(e) => Err(Error::new(
magnus::exception::runtime_error(),
Ruby::get().unwrap().exception_runtime_error(),
format!("Failed to parse Excel file: {}", e),
)),
}
Expand Down Expand Up @@ -522,13 +522,13 @@ impl Parser {
loop {
match reader.read_event_into(&mut buf) {
Ok(Event::Text(e)) => {
txt.push_str(&e.unescape().unwrap_or_default());
txt.push_str(&e.decode().unwrap_or_default());
txt.push(' ');
}
Ok(Event::Eof) => break,
Err(e) => {
return Err(Error::new(
magnus::exception::runtime_error(),
Ruby::get().unwrap().exception_runtime_error(),
format!("XML parse error: {}", e),
))
}
Expand Down Expand Up @@ -558,7 +558,7 @@ impl Parser {
fn parse(&self, input: String) -> Result<String, Error> {
if input.is_empty() {
return Err(Error::new(
magnus::exception::arg_error(),
Ruby::get().unwrap().exception_arg_error(),
"Input cannot be empty",
));
}
Expand All @@ -578,7 +578,7 @@ impl Parser {

let data = fs::read(&path).map_err(|e| {
Error::new(
magnus::exception::io_error(),
Ruby::get().unwrap().exception_io_error(),
format!("Failed to read file: {}", e),
)
})?;
Expand All @@ -590,7 +590,7 @@ impl Parser {
fn parse_bytes(&self, data: Vec<u8>) -> Result<String, Error> {
if data.is_empty() {
return Err(Error::new(
magnus::exception::arg_error(),
Ruby::get().unwrap().exception_arg_error(),
"Data cannot be empty",
));
}
Expand Down Expand Up @@ -668,7 +668,7 @@ fn parse_bytes_direct(data: Vec<u8>) -> Result<String, Error> {

/// Initialize the Parser class
pub fn init(_ruby: &Ruby, module: RModule) -> Result<(), Error> {
let class = module.define_class("Parser", class::object())?;
let class = module.define_class("Parser", Ruby::get().unwrap().class_object())?;

// Instance methods
class.define_singleton_method("new", function!(Parser::new, -1))?;
Expand Down