diff --git a/src/main.rs b/src/main.rs index cc16f30..33eada1 100644 --- a/src/main.rs +++ b/src/main.rs @@ -263,6 +263,17 @@ fn main() -> Result<(), Box> { .about("Stem word from stdin") .arg_required_else_help(false) .arg(Arg::new("words")), + ) + .subcommand( + Command::new("update") + .about("Update document from modified file") + .arg_required_else_help(true) + .arg( + Arg::new("file") + .help("edited document file") + .num_args(1) + .action(ArgAction::Append), + ), ); let matches = cmd.get_matches(); @@ -445,28 +456,35 @@ fn main() -> Result<(), Box> { true => print!("{}", s.text), // formatted output false => { - println!( - "uuid: {}\nname: {}\ntimestamp: {}\n----", - s.uuid, s.name, s.timestamp - ); - - // add a newline if not already present - match s.text.chars().last() { - Some(v) if v == '\n' => println!("{}----", s.text), - _ => println!("{}\n----", s.text), - } - - // show attachments s.collect_attachments(&conn)?; - if !s.attachments.is_empty() { - println!("attachments:"); - - println!("{:<36} {:>10} name", "uuid", "bytes"); - for a in &s.attachments { - println!("{} {:>10} {}", a.uuid, a.size, a.name); - } - } - } + s.print(); + } /* + false => { + println!( + "uuid: {}\nname: {}\ntimestamp: {}\n----", + s.uuid, + s.name, + s.timestamp.to_rfc3339() + ); + + // add a newline if not already present + match s.text.chars().last() { + Some(v) if v == '\n' => println!("{}----", s.text), + _ => println!("{}\n----", s.text), + } + + // show attachments + s.collect_attachments(&conn)?; + if !s.attachments.is_empty() { + println!("attachments:"); + + println!("{:<36} {:>10} name", "uuid", "bytes"); + for a in &s.attachments { + println!("{} {:>10} {}", a.uuid, a.size, a.name); + } + } + } + */ } } @@ -755,6 +773,25 @@ fn main() -> Result<(), Box> { println!("{:?}", stems); } + // UPDATE + if let Some(("update", sub_matches)) = matches.subcommand() { + if let Some(file) = sub_matches.get_one::("file") { + let s = snip::from_file(file)?; + s.update(&conn)?; + let mut s = snip::get_from_uuid(&conn, &s.uuid)?; + // re-index due to changed content + s.index(&conn)?; + eprintln!("update successful"); + + // collect attachments before printing so they are included in output + s.collect_attachments(&conn)?; + s.print(); + } else { + eprintln!("update failed"); + std::process::exit(1); + } + } + Ok(()) } diff --git a/src/snip/doc.rs b/src/snip/doc.rs index bc0802e..36395a7 100644 --- a/src/snip/doc.rs +++ b/src/snip/doc.rs @@ -141,6 +141,31 @@ impl Snip { Ok(()) } + pub fn print(&self) { + println!( + "uuid: {}\nname: {}\ntimestamp: {}\n----", + self.uuid, + self.name, + self.timestamp.to_rfc3339() + ); + + // add a newline if not already present + match self.text.chars().last() { + Some(v) if v == '\n' => println!("{}----", self.text), + _ => println!("{}\n----", self.text), + } + + // show attachments + if !self.attachments.is_empty() { + println!("attachments:"); + + println!("{:<36} {:>10} name", "uuid", "bytes"); + for a in &self.attachments { + println!("{} {:>10} {}", a.uuid, a.size, a.name); + } + } + } + /// scans and assigns all prefix and suffix strings to all analyzed words pub fn scan_fragments(&mut self) -> Result<(), SnipError> { // scan the document for tokens, in order collecting surrounding data for each token @@ -253,12 +278,18 @@ impl Snip { /// Writes all fields to the database, overwriting existing data pub fn update(&self, conn: &Connection) -> Result<(), Box> { let mut stmt = conn.prepare("UPDATE snip SET (data, timestamp, name) = (:data, :timestamp, :name) WHERE uuid = :uuid")?; - let _ = stmt.execute(&[ + let rows_affected = stmt.execute(&[ (":data", &self.text.to_string()), (":timestamp", &self.timestamp.to_rfc3339()), (":name", &self.name.to_string()), (":uuid", &self.uuid.to_string()), ])?; + if rows_affected != 1 { + return Err(Box::new(SnipError::General(format!( + "expected 1 row to be updated, got {}", + rows_affected + )))); + } Ok(()) } @@ -286,6 +317,11 @@ impl Snip { Ok(()) } } +struct SnipHeader { + uuid: Uuid, + name: String, + timestamp: DateTime, +} /// Clear the search index pub fn clear_index(conn: &Connection) -> Result> { @@ -344,6 +380,35 @@ pub fn find_by_graph(word: &str, text: Vec<&str>) -> Option { } None } +pub fn from_file(path: &str) -> Result> { + // read from file, parse header and body + let file_data = std::fs::read_to_string(path)?; + + // read header + let header = parse_header(file_data.as_str())?; + + // read document text + // find the end marker for the body text + // collect from line[4] to final line + + // read from bottom of file through possible attachments until line == "----" + // let lines: Vec<&str> = file_data.split('\n').collect(); + let text = parse_text(file_data.as_str())?; + + // assign headers for now + // The attachment vector is inconsequential for editing purposes. The database should reflect + // the associations between documents and their attachments. + let s = Snip { + uuid: header.uuid, + name: header.name, + timestamp: header.timestamp, + analysis: SnipAnalysis { words: Vec::new() }, + text, + attachments: Vec::new(), + }; + + Ok(s) +} /// Generate document name from provided text pub fn generate_name(text: &String, count: usize) -> Result> { @@ -424,34 +489,106 @@ pub fn insert_snip(conn: &Connection, s: &Snip) -> Result<(), Box> { Ok(()) } -/// Return a vector of Uuid of all documents in the database -pub fn uuid_list(conn: &Connection, limit: usize) -> Result, Box> { - let mut ids: Vec = Vec::new(); +fn parse_text(data: &str) -> Result> { + let lines: Vec<&str> = data.split('\n').collect(); - if limit != 0 { - let mut stmt = - conn.prepare("SELECT uuid FROM snip ORDER BY datetime(timestamp) DESC LIMIT :limit")?; - let query_iter = stmt.query_map(&[(":limit", &limit)], |row| { - let id_str: String = row.get(0)?; - Ok(id_str) - })?; + // find start + let mut text_start = 0; + for (i, line) in lines.iter().enumerate() { + if *line == "----" { + text_start = i + 1; + break; + } + } - for id_str in query_iter.flatten() { - let id = Uuid::try_parse(id_str.as_str())?; - ids.push(id); + let mut text_end = 0; + // locate text end from bottom (read in reverse) + for (i, line) in lines.iter().rev().enumerate() { + if *line == "----" { + text_end = lines.len() - (i + 1); + break; } - } else { - let mut stmt = conn.prepare("SELECT uuid FROM snip ORDER BY datetime(timestamp) DESC")?; - let mut rows = stmt.query([])?; + } - while let Some(row) = rows.next()? { - let id_str: String = row.get(0)?; - let id = Uuid::try_parse(id_str.as_str())?; - ids.push(id); + let text = match lines.get(text_start..text_end) { + Some(v) => v.join("\n"), + None => { + return Err(Box::new(SnipError::General( + "parsing document text from file".to_string(), + ))) } + }; + + Ok(text) +} + +/// Parses a document header from supplied data +fn parse_header(data: &str) -> Result> { + let default_error = Box::new(SnipError::General(format!("malformed header: {}", data))); + let lines = data.split('\n').collect::>(); + let lines = match lines.get(0..4) { + Some(v) => v, + None => return Err(default_error), + }; + + let uuid_parsed = parse_field("uuid", lines[0])?; + let name_parsed = parse_field("name", lines[1])?; + let timestamp_parsed = parse_field("timestamp", lines[2])?; + + let uuid = Uuid::try_parse(uuid_parsed.as_str())?; + let name = name_parsed.to_string(); + let timestamp = DateTime::parse_from_rfc3339(timestamp_parsed.as_str())?; + + let header = SnipHeader { + uuid, + name, + timestamp, + }; + + Ok(header) +} + +/// Parses a field string value from a single line. +fn parse_field(key_name: &str, line: &str) -> Result> { + let split_pos = match line.find(": ") { + Some(v) => v, + None => { + return Err(Box::new(SnipError::General(format!( + "could not find delimiting characters in line: {}", + line + )))); + } + }; + + // parse key and value + let key = match line.get(0..split_pos) { + Some(v) => v.trim(), + None => { + return Err(Box::new(SnipError::General(format!( + "malformed document header line: {}", + line + )))); + } + }; + // verify that parsed key is the same as requested + if key != key_name { + return Err(Box::new(SnipError::General(format!( + "parsed key: {} does not match requested key: {}", + key, key_name, + )))); } - Ok(ids) + let value = match line.get(split_pos + 2..) { + Some(v) => v.trim(), + None => { + return Err(Box::new(SnipError::General(format!( + "malformed document header line: {}", + line + )))); + } + }; + + Ok(value.to_string()) } /// Read all data from standard input, line by line, and return it as a String. @@ -532,6 +669,36 @@ pub fn strip_punctuation(s: &str) -> &str { clean } +/// Return a vector of Uuid of all documents in the database +pub fn uuid_list(conn: &Connection, limit: usize) -> Result, Box> { + let mut ids: Vec = Vec::new(); + + if limit != 0 { + let mut stmt = + conn.prepare("SELECT uuid FROM snip ORDER BY datetime(timestamp) DESC LIMIT :limit")?; + let query_iter = stmt.query_map(&[(":limit", &limit)], |row| { + let id_str: String = row.get(0)?; + Ok(id_str) + })?; + + for id_str in query_iter.flatten() { + let id = Uuid::try_parse(id_str.as_str())?; + ids.push(id); + } + } else { + let mut stmt = conn.prepare("SELECT uuid FROM snip ORDER BY datetime(timestamp) DESC")?; + let mut rows = stmt.query([])?; + + while let Some(row) = rows.next()? { + let id_str: String = row.get(0)?; + let id = Uuid::try_parse(id_str.as_str())?; + ids.push(id); + } + } + + Ok(ids) +} + #[cfg(test)] mod tests { use super::*; @@ -566,6 +733,13 @@ mod tests { Ok(()) } + #[test] + fn test_from_file() -> Result<(), Box> { + let s = from_file("test_data/document.txt")?; + println!("{} {} {}", s.uuid, s.name, s.timestamp); + Ok(()) + } + #[test] fn test_get_from_uuid() -> Result<(), Box> { let conn = prepare_database().expect("preparing in-memory database"); @@ -653,6 +827,29 @@ mod tests { Ok(()) } + #[test] + fn test_parse_field() -> Result<(), Box> { + let data = format!("uuid: {}", ID_STR); + let result = parse_field("uuid", &data)?; + let expect = ID_STR; + + assert_eq!(result, expect); + Ok(()) + } + + #[test] + fn test_parse_header() -> Result<(), Box> { + let data = concat!( + "uuid: 80fb4982-3a12-4804-9226-e54ffda66431\n", + "name: uname output\n", + "timestamp: 2023-06-10T13:35:39.142965-07:00\n", + "----\n", + ); + parse_header(data)?; + + Ok(()) + } + #[test] fn test_remove_snip() -> Result<(), Box> { let conn = prepare_database().expect("preparing in-memory database"); diff --git a/test_data/document.txt b/test_data/document.txt new file mode 100644 index 0000000..a9ea740 --- /dev/null +++ b/test_data/document.txt @@ -0,0 +1,17 @@ +uuid: ba652e2d-b248-4bcc-b36e-c26c0d0e8002 +name: Lorem ipsum dolor sit amet +timestamp: 2023-06-16T13:48:55.963691-07:00 +---- +Lorem ipsum dolor sit amet, consectetur adipiscing elit. Nunc finibus felis dolor, nec vestibulum libero rhoncus at. Ut non turpis sit amet nisi malesuada commodo. Phasellus eget tellus in nunc eleifend dapibus. Curabitur accumsan ornare sem, quis consectetur risus sollicitudin sit amet. Vivamus sit amet felis laoreet, tincidunt quam sed, finibus libero. Maecenas sit amet neque velit. Aliquam ac lobortis nunc. Nam ultrices, erat ac feugiat scelerisque, nibh urna malesuada mi, at posuere nunc orci vitae ipsum. Etiam eget ante vel justo condimentum malesuada ac sit amet dui. Fusce eleifend risus ut fringilla malesuada. Nulla facilisi. + +Aliquam cursus velit imperdiet ultricies laoreet. Quisque sem ante, vulputate quis elementum vitae, gravida a mauris. Vivamus vel tellus a neque porttitor porta quis nec nulla. Morbi nec sem risus. Integer ut ex vitae lacus vestibulum vulputate eget sit amet ipsum. Cras laoreet convallis imperdiet. Nunc dignissim ex a ligula auctor, mattis convallis erat semper. Ut purus tellus, luctus sollicitudin nisl a, rutrum finibus magna. Ut ac condimentum felis, at mattis ante. Donec maximus dignissim efficitur. Nulla eget odio euismod, accumsan est maximus, mollis purus. Nullam sed arcu mattis metus interdum suscipit. Pellentesque habitant morbi tristique senectus et netus et malesuada fames ac turpis egestas. Cras blandit arcu a ligula porta condimentum. Ut sit amet dapibus ex. Pellentesque ut nibh non lorem bibendum porta at ut quam. + +Donec gravida, massa interdum euismod accumsan, diam nunc dictum nisl, sed molestie mauris erat a arcu. Suspendisse hendrerit lectus et justo faucibus tincidunt. Suspendisse potenti. Suspendisse quis ipsum sed libero commodo porttitor id et tellus. Fusce eu nisl dapibus elit dignissim mattis. Nam tincidunt mauris leo, at malesuada mi ultricies id. Nulla in vestibulum dui. Nulla sit amet urna pharetra, pretium ligula sit amet, interdum libero. Suspendisse ornare maximus pretium. Vivamus sodales ornare sapien, eu cursus felis consequat vel. Proin arcu nulla, feugiat in molestie at, malesuada et ante. In hac habitasse platea dictumst. + +Fusce malesuada ipsum eget mi faucibus iaculis. Maecenas non tellus accumsan risus iaculis efficitur. Suspendisse auctor sem sed nulla sollicitudin lobortis. Proin dapibus consequat orci et viverra. Maecenas purus eros, malesuada et mauris nec, efficitur rhoncus ante. Maecenas urna est, dapibus eget blandit sit amet, egestas in risus. In sit amet bibendum ligula. Sed condimentum maximus tortor, et ultricies magna pretium a. Maecenas elementum, neque sit amet posuere pharetra, nibh nulla facilisis tellus, in sagittis massa sapien quis enim. Mauris aliquam nibh eget varius posuere. Suspendisse viverra augue ac urna aliquet, non consectetur libero ultricies. In hac habitasse platea dictumst. Nunc cursus porttitor mattis. Vestibulum vel congue orci. Fusce ac sodales risus, nec ultrices tortor. Mauris fringilla convallis tortor sit amet molestie. + +Etiam pharetra eros sit amet felis condimentum, vel molestie eros congue. Donec erat mi, egestas in nibh a, placerat ornare leo. Donec eu fermentum nisi, a scelerisque lectus. Donec in eleifend metus, ac rutrum nibh. Aliquam porta condimentum ante vel hendrerit. In bibendum at elit at finibus. Etiam mollis mollis leo, ut porta ligula tincidunt eu. Duis vel pharetra nisl. Etiam viverra nulla eu ligula tincidunt porttitor. Donec molestie placerat ante vitae placerat. Etiam sollicitudin varius est, sit amet laoreet ante ultricies sed. Etiam aliquet, metus a aliquet suscipit, purus leo dapibus est, eget gravida ante leo quis velit. +---- +attachments: +uuid bytes name +9cfc5a2d-2946-48ee-82e0-227ba4bcdbd5 100 Lorem ipsum - Wikipedia.pdf