diff --git a/Cargo.lock b/Cargo.lock index 61f68437b7a1..7cac8e8f3ad4 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -258,6 +258,7 @@ version = "1.2.1" dependencies = [ "criterion", "libc", + "lsh", "stdext", "toml-span", "windows-sys", diff --git a/crates/edit/Cargo.toml b/crates/edit/Cargo.toml index fcf8edeb79d1..3858b585cce7 100644 --- a/crates/edit/Cargo.toml +++ b/crates/edit/Cargo.toml @@ -19,6 +19,7 @@ harness = false debug-latency = [] [dependencies] +lsh.workspace = true stdext.workspace = true [target.'cfg(unix)'.dependencies] @@ -26,6 +27,7 @@ libc = "0.2" [build-dependencies] stdext.workspace = true +lsh.workspace = true # The default toml crate bundles its dependencies with bad compile times. Thanks. # Thankfully toml-span exists. FWIW the alternative is yaml-rust (without the 2 suffix). toml-span = { version = "0.6", default-features = false } diff --git a/crates/edit/build/main.rs b/crates/edit/build/main.rs index 4acb58a87817..96ae2f201421 100644 --- a/crates/edit/build/main.rs +++ b/crates/edit/build/main.rs @@ -3,6 +3,8 @@ #![allow(irrefutable_let_patterns)] +use stdext::arena::scratch_arena; + use crate::helpers::env_opt; mod helpers; @@ -24,12 +26,31 @@ fn main() { _ => TargetOs::Unix, }; + compile_lsh(); compile_i18n(); configure_icu(target_os); #[cfg(windows)] configure_windows_binary(target_os); } +fn compile_lsh() { + let scratch = scratch_arena(None); + + let lsh_path = lsh::compiler::builtin_definitions_path(); + let out_dir = env_opt("OUT_DIR"); + let out_path = format!("{out_dir}/lsh_definitions.rs"); + + let mut generator = lsh::compiler::Generator::new(&scratch); + match generator.read_directory(lsh_path).and_then(|_| generator.generate_rust()) { + Ok(c) => std::fs::write(out_path, c).unwrap(), + Err(err) => { + panic!("failed to compile lsh definitions: {err}"); + } + }; + + println!("cargo::rerun-if-changed={}", lsh_path.display()); +} + fn compile_i18n() { let i18n_path = "../../i18n/edit.toml"; diff --git a/crates/edit/src/bin/edit/documents.rs b/crates/edit/src/bin/edit/documents.rs index 23de9a77c33a..974963db8713 100644 --- a/crates/edit/src/bin/edit/documents.rs +++ b/crates/edit/src/bin/edit/documents.rs @@ -8,6 +8,7 @@ use std::{fs, io}; use edit::buffer::{RcTextBuffer, TextBuffer}; use edit::helpers::{CoordType, Point}; +use edit::lsh::{FILE_ASSOCIATIONS, Language, process_file_associations}; use edit::{path, sys}; use crate::apperr; @@ -20,6 +21,7 @@ pub struct Document { pub filename: String, pub file_id: Option, pub new_file_counter: usize, + pub language_override: Option>, } impl Document { @@ -62,15 +64,41 @@ impl Document { fn set_path(&mut self, path: PathBuf) { let filename = path.file_name().unwrap_or_default().to_string_lossy().into_owned(); let dir = path.parent().map(ToOwned::to_owned).unwrap_or_default(); + self.filename = filename; self.dir = Some(DisplayablePathBuf::from_path(dir)); self.path = Some(path); - self.update_file_mode(); + + self.buffer.borrow_mut().set_ruler(if self.filename == "COMMIT_EDITMSG" { 72 } else { 0 }); + self.update_language(); + } + + pub fn auto_detect_language(&mut self) { + self.language_override = None; + self.update_language(); } - fn update_file_mode(&mut self) { - let mut tb = self.buffer.borrow_mut(); - tb.set_ruler(if self.filename == "COMMIT_EDITMSG" { 72 } else { 0 }); + pub fn override_language(&mut self, lang: Option<&'static Language>) { + self.language_override = Some(lang); + self.update_language(); + } + + fn update_language(&mut self) { + self.buffer.borrow_mut().set_language(self.get_language()); + } + + fn get_language(&self) -> Option<&'static Language> { + if let Some(lang) = self.language_override { + return lang; + } + + if let Some(path) = &self.path + && let Some(lang) = process_file_associations(FILE_ASSOCIATIONS, path) + { + return Some(lang); + } + + None } } @@ -140,6 +168,7 @@ impl DocumentManager { filename: Default::default(), file_id: None, new_file_counter: 0, + language_override: None, }; self.gen_untitled_name(&mut doc); @@ -201,6 +230,7 @@ impl DocumentManager { filename: Default::default(), file_id, new_file_counter: 0, + language_override: None, }; doc.set_path(path); diff --git a/crates/edit/src/bin/edit/draw_statusbar.rs b/crates/edit/src/bin/edit/draw_statusbar.rs index 44a688f834c5..f061527932af 100644 --- a/crates/edit/src/bin/edit/draw_statusbar.rs +++ b/crates/edit/src/bin/edit/draw_statusbar.rs @@ -6,6 +6,7 @@ use edit::fuzzy::score_fuzzy; use edit::helpers::*; use edit::icu; use edit::input::vk; +use edit::lsh::LANGUAGES; use edit::tui::*; use stdext::arena::scratch_arena; use stdext::arena_format; @@ -28,15 +29,21 @@ pub fn draw_statusbar(ctx: &mut Context, state: &mut State) { ctx.table_next_row(); - if ctx.button("newline", if tb.is_crlf() { "CRLF" } else { "LF" }, ButtonStyle::default()) { - let is_crlf = tb.is_crlf(); - tb.normalize_newlines(!is_crlf); - } + state.wants_language_picker |= ctx.button( + "language", + tb.language().map_or("Plain Text", |l| l.name), + ButtonStyle::default(), + ); if state.wants_statusbar_focus { state.wants_statusbar_focus = false; ctx.steal_focus(); } + if ctx.button("newline", if tb.is_crlf() { "CRLF" } else { "LF" }, ButtonStyle::default()) { + let is_crlf = tb.is_crlf(); + tb.normalize_newlines(!is_crlf); + } + state.wants_encoding_picker |= ctx.button("encoding", tb.encoding(), ButtonStyle::default()); if state.wants_encoding_picker { @@ -201,6 +208,55 @@ pub fn draw_statusbar(ctx: &mut Context, state: &mut State) { ctx.table_end(); } +pub fn draw_dialog_language_change(ctx: &mut Context, state: &mut State) { + let doc = state.documents.active_mut(); + let mut done = doc.is_none(); + + ctx.modal_begin("language", loc(LocId::LanguageSelectMode)); + if let Some(doc) = doc { + let width = (ctx.size().width - 20).max(10); + let height = (ctx.size().height - 10).max(10); + + ctx.scrollarea_begin("scrollarea", Size { width, height }); + ctx.attr_background_rgba(ctx.indexed_alpha(IndexedColor::Black, 1, 4)); + ctx.inherit_focus(); + { + ctx.list_begin("languages"); + ctx.inherit_focus(); + + let auto_detect = doc.language_override.is_none(); + let selected = if auto_detect { None } else { doc.buffer.borrow().language() }; + + if ctx.list_item(auto_detect, loc(LocId::LanguageAutoDetect)) + == ListSelection::Activated + { + doc.auto_detect_language(); + done = true; + } + + if ctx.list_item(selected.is_none(), "Plain Text") == ListSelection::Activated { + doc.override_language(None); + done = true; + } + + for lang in LANGUAGES { + if ctx.list_item(Some(lang) == selected, lang.name) == ListSelection::Activated { + doc.override_language(Some(lang)); + done = true; + } + } + ctx.list_end(); + } + ctx.scrollarea_end(); + } + done |= ctx.modal_end(); + + if done { + state.wants_language_picker = false; + ctx.needs_rerender(); + } +} + pub fn draw_dialog_encoding_change(ctx: &mut Context, state: &mut State) { let encoding = state.documents.active_mut().map_or("", |doc| doc.buffer.borrow().encoding()); let reopen = state.wants_encoding_change == StateEncodingChange::Reopen; diff --git a/crates/edit/src/bin/edit/main.rs b/crates/edit/src/bin/edit/main.rs index ea77d0af19e3..8444df43dee0 100644 --- a/crates/edit/src/bin/edit/main.rs +++ b/crates/edit/src/bin/edit/main.rs @@ -325,6 +325,9 @@ fn draw(ctx: &mut Context, state: &mut State) { if state.wants_save { draw_handle_save(ctx, state); } + if state.wants_language_picker { + draw_dialog_language_change(ctx, state); + } if state.wants_encoding_change != StateEncodingChange::None { draw_dialog_encoding_change(ctx, state); } diff --git a/crates/edit/src/bin/edit/state.rs b/crates/edit/src/bin/edit/state.rs index c8d45bd8ca64..b3ac2806b42c 100644 --- a/crates/edit/src/bin/edit/state.rs +++ b/crates/edit/src/bin/edit/state.rs @@ -152,6 +152,8 @@ pub struct State { pub search_options: buffer::SearchOptions, pub search_success: bool, + pub wants_language_picker: bool, + pub wants_encoding_picker: bool, pub wants_encoding_change: StateEncodingChange, pub encoding_picker_needle: String, @@ -200,6 +202,8 @@ impl State { search_options: Default::default(), search_success: true, + wants_language_picker: false, + wants_encoding_picker: false, encoding_picker_needle: Default::default(), encoding_picker_results: Default::default(), diff --git a/crates/edit/src/buffer/line_cache.rs b/crates/edit/src/buffer/line_cache.rs deleted file mode 100644 index af7cd59493f5..000000000000 --- a/crates/edit/src/buffer/line_cache.rs +++ /dev/null @@ -1,116 +0,0 @@ -use std::ops::Range; - -use crate::{document::ReadableDocument, simd::memchr2}; - -/// Cache a line/offset pair every CACHE_EVERY lines to speed up line/offset calculations -const CACHE_EVERY: usize = 1024 * 64; - -#[derive(Clone)] -pub struct CachePoint { - pub index: usize, - pub line: usize, - // pub snapshot: ParserSnapshot -} - -pub struct LineCache { - cache: Vec, -} - -impl LineCache { - pub fn new() -> Self { - Self { cache: vec![] } - } - - pub fn from_document(&mut self, document: &T) { - self.cache.clear(); - - let mut offset = 0; - let mut line = 0; - loop { - let text = document.read_forward(offset); - if text.is_empty() { return; } - - let mut off = 0; - loop { - off = memchr2(b'\n', b'\n', text, off); - if off == text.len() { break; } - - if line % CACHE_EVERY == 0 { - self.cache.push(CachePoint { index: offset+off, line }); - } - line += 1; - off += 1; - } - - offset += text.len(); - } - } - - /// Updates the cache after a deletion. - /// `range` is the deleted byte range, and `text` is the content that was deleted. - pub fn delete(&mut self, range: Range, text: &Vec) { - let mut newlines = 0; - for c in text { - if *c == b'\n' { - newlines += 1; - } - } - - let mut beg_del = None; - let mut end_del = None; - for (i, point) in self.cache.iter_mut().enumerate() { - if point.index >= range.start { - if point.index < range.end { - // cache point is within the deleted range - if beg_del.is_none() { beg_del = Some(i); } - end_del = Some(i + 1); - } - else { - point.index -= text.len(); - point.line -= newlines; - } - } - } - - if let (Some(beg), Some(end)) = (beg_del, end_del) { - self.cache.drain(beg..end); - } - } - - /// Updates the cache after an insertion. - /// `offset` is where the insertion occurs, and `text` is the inserted content. - pub fn insert(&mut self, offset: usize, text: &[u8]) { - // Count how many newlines were inserted - let mut newlines = 0; - for c in text { - if *c == b'\n' { - newlines += 1; - } - } - - let len = text.len(); - for point in &mut self.cache { - if point.index > offset { - point.index += len; - point.line += newlines; - } - } - - // TODO: This also needs to insert new cache points - } - - /// Finds the nearest cached line-offset pair relative to a target line. - /// If `reverse` is false, it returns the closest *before* the target. - /// If `reverse` is true, it returns the closest *after or at* the target. - pub fn nearest_offset(&self, target_count: usize, reverse: bool) -> Option { - match self.cache.binary_search_by_key(&target_count, |p| p.line) { - Ok(i) => Some(self.cache[i].clone()), - Err(i) => { - if i == 0 || i == self.cache.len() { None } // target < lowest cache point || target > highest cache point - else { - Some(self.cache[ if reverse {i} else {i-1} ].clone()) - } - } - } - } -} diff --git a/crates/edit/src/buffer/mod.rs b/crates/edit/src/buffer/mod.rs index e20eac76e531..b63e72eac087 100644 --- a/crates/edit/src/buffer/mod.rs +++ b/crates/edit/src/buffer/mod.rs @@ -44,6 +44,8 @@ use crate::clipboard::Clipboard; use crate::document::{ReadableDocument, WriteableDocument}; use crate::framebuffer::{Framebuffer, IndexedColor}; use crate::helpers::*; +use crate::lsh::cache::HighlighterCache; +use crate::lsh::{HighlightKind, Highlighter, Language}; use crate::oklab::StraightRgba; use crate::simd::memchr2; use crate::unicode::{self, Cursor, MeasurementConfig}; @@ -250,6 +252,7 @@ pub struct TextBuffer { selection: Option, selection_generation: u32, search: Option>, + highlighter_cache: HighlighterCache, width: CoordType, margin_width: CoordType, @@ -259,6 +262,7 @@ pub struct TextBuffer { tab_size: CoordType, indent_with_tabs: bool, line_highlight_enabled: bool, + language: Option<&'static Language>, ruler: CoordType, encoding: &'static str, newlines_are_crlf: bool, @@ -298,6 +302,7 @@ impl TextBuffer { selection: None, selection_generation: 0, search: None, + highlighter_cache: HighlighterCache::new(), width: 0, margin_width: 0, @@ -307,6 +312,7 @@ impl TextBuffer { tab_size: 4, indent_with_tabs: false, line_highlight_enabled: false, + language: None, ruler: 0, encoding: "UTF-8", newlines_are_crlf: cfg!(windows), // Windows users want CRLF @@ -599,6 +605,15 @@ impl TextBuffer { self.line_highlight_enabled = enabled; } + pub fn language(&self) -> Option<&'static Language> { + self.language + } + + pub fn set_language(&mut self, language: Option<&'static Language>) { + self.language = language; + self.highlighter_cache.invalidate_from(0); + } + /// Sets a ruler column, e.g. 80. pub fn set_ruler(&mut self, column: CoordType) { self.ruler = column; @@ -677,6 +692,7 @@ impl TextBuffer { self.set_selection(None); self.mark_as_clean(); self.reflow(); + self.highlighter_cache.invalidate_from(0); } /// Copies the contents of the buffer into a string. @@ -1993,6 +2009,10 @@ impl TextBuffer { cursor = cursor_end; } + let logical_y_beg = self.cursor_for_rendering.unwrap().logical_pos.y; + let logical_y_end = cursor.logical_pos.y + 1; + self.render_apply_highlights(origin, destination, logical_y_beg..logical_y_end, fb); + // Colorize the margin that we wrote above. if self.margin_width > 0 { let margin = Rect { @@ -2058,6 +2078,132 @@ impl TextBuffer { Some(RenderResult { visual_pos_x_max }) } + fn render_apply_highlights( + &mut self, + origin: Point, + destination: Rect, + logical_y_range: Range, + fb: &mut Framebuffer, + ) { + let Some(language) = self.language else { + return; + }; + + let mut highlighter = Highlighter::new(&self.buffer, language); + + // Track cursor position for efficient offset-to-position conversions. + // Start from the rendering cursor which is at the beginning of the visible area. + let mut cursor = self.cursor_for_rendering.unwrap(); + + // Visible vertical range in visual coordinates. + let visible_top = origin.y; + let visible_bottom = origin.y + destination.height(); + + // Text area boundaries in screen coordinates (excluding margin). + let text_left = destination.left + self.margin_width; + let text_right = destination.right; + + for logical_y in logical_y_range { + // Seek cursor to the start of this logical line for efficient lookups. + // This is important because highlights are sorted by offset within + // each logical line. + cursor = self.goto_line_start(cursor, logical_y); + + let scratch = scratch_arena(None); + let highlights = + self.highlighter_cache.parse_line(&scratch, &mut highlighter, logical_y); + + for pair in highlights.windows(2) { + let curr = &pair[0]; + let next = &pair[1]; + + // Skip highlights with no visual effect. + if curr.kind == HighlightKind::Other { + continue; + } + + // Convert byte offsets to cursor positions. Since highlights are + // sorted by offset, we chain from cursor -> beg -> end for efficiency. + let beg = self.cursor_move_to_offset_internal(cursor, curr.start); + let end = self.cursor_move_to_offset_internal(beg, next.start); + cursor = end; + + let color = match curr.kind { + HighlightKind::Other => None, + HighlightKind::Comment => Some(IndexedColor::Green), + HighlightKind::ConstantNumeric => Some(IndexedColor::BrightGreen), + HighlightKind::KeywordControl => Some(IndexedColor::BrightMagenta), + HighlightKind::MarkupChanged => Some(IndexedColor::BrightBlue), + HighlightKind::MarkupDeleted => Some(IndexedColor::BrightRed), + HighlightKind::MarkupInserted => Some(IndexedColor::BrightGreen), + HighlightKind::MetaHeader => Some(IndexedColor::BrightBlue), + }; + + // Handle the case where the highlight spans multiple visual lines + // due to word wrapping. The range is [beg, end) in terms of offsets, + // which maps to visual lines [beg.visual_pos.y, end.visual_pos.y]. + // + // When beg and end are on the same visual line, we highlight + // [beg.visual_pos.x, end.visual_pos.x). + // + // When they span multiple lines: + // - First line: [beg.visual_pos.x, end_of_line) + // - Middle lines: [0, end_of_line) + // - Last line: [0, end.visual_pos.x) + // + // However, if end.visual_pos.x == 0, the last line has no content + // to highlight (the span ends exactly at the line boundary). + let visual_y_end = if end.visual_pos.x == 0 && end.visual_pos.y > beg.visual_pos.y { + // The span ends at position 0 of a new visual line, meaning + // it actually ends at the end of the previous visual line. + end.visual_pos.y - 1 + } else { + end.visual_pos.y + }; + + // Use min/max to skip visual lines outside the visible vertical range. + for visual_y in + beg.visual_pos.y.max(visible_top)..(visual_y_end + 1).min(visible_bottom) + { + let vis_left = if visual_y == beg.visual_pos.y { + beg.visual_pos.x + } else { + // Wrapped continuation lines start at visual x=0. + 0 + }; + let vis_right = if visual_y == end.visual_pos.y { + end.visual_pos.x + } else { + // Line extends to the word wrap column or beyond. + COORD_TYPE_SAFE_MAX + }; + + // Convert to screen coordinates. + let screen_left = text_left + vis_left - origin.x; + let screen_right = (text_left + vis_right - origin.x).min(text_right); + let screen_y = destination.top + visual_y - origin.y; + + // Create the target rectangle, clamped to the text area. + let rect = Rect { + left: screen_left.max(text_left), + top: screen_y, + right: screen_right, + bottom: screen_y + 1, + }; + + // Skip empty or invalid rectangles. + if rect.left >= rect.right { + continue; + } + + if let Some(color) = color { + fb.blend_fg(rect, fb.indexed(color)); + } + } + } + } + } + pub fn cut(&mut self, clipboard: &mut Clipboard) { self.cut_copy(clipboard, true); } @@ -2613,6 +2759,7 @@ impl TextBuffer { } self.active_edit_off = cursor.offset; + self.highlighter_cache.invalidate_from(cursor.logical_pos.y); // If word-wrap is enabled, the visual layout of all logical lines affected by the write // may have changed. This includes even text before the insertion point up to the line @@ -2861,6 +3008,8 @@ impl TextBuffer { return; } + self.highlighter_cache.invalidate_from(damage_start); + if entry_buffer_generation.is_some() { self.recalc_after_content_changed(); } diff --git a/crates/edit/src/framebuffer.rs b/crates/edit/src/framebuffer.rs index 464d11941e52..f6640e344fdb 100644 --- a/crates/edit/src/framebuffer.rs +++ b/crates/edit/src/framebuffer.rs @@ -509,6 +509,13 @@ impl Framebuffer { if last_attr != attr { let diff = last_attr ^ attr; + if diff.is(Attributes::Bold) { + if attr.is(Attributes::Bold) { + result.push_str(arena, "\x1b[1m"); + } else { + result.push_str(arena, "\x1b[22m"); + } + } if diff.is(Attributes::Italic) { if attr.is(Attributes::Italic) { result.push_str(arena, "\x1b[3m"); @@ -523,6 +530,13 @@ impl Framebuffer { result.push_str(arena, "\x1b[24m"); } } + if diff.is(Attributes::Strikethrough) { + if attr.is(Attributes::Strikethrough) { + result.push_str(arena, "\x1b[9m"); + } else { + result.push_str(arena, "\x1b[29m"); + } + } last_attr = attr; } @@ -838,9 +852,11 @@ pub struct Attributes(u8); #[allow(non_upper_case_globals)] impl Attributes { pub const None: Self = Self(0); - pub const Italic: Self = Self(0b1); - pub const Underlined: Self = Self(0b10); - pub const All: Self = Self(0b11); + pub const Bold: Self = Self(1); + pub const Italic: Self = Self(2); + pub const Underlined: Self = Self(4); + pub const Strikethrough: Self = Self(8); + pub const All: Self = Self(16 - 1); pub const fn is(self, attr: Self) -> bool { (self.0 & attr.0) == attr.0 diff --git a/crates/edit/src/lib.rs b/crates/edit/src/lib.rs index 72cddd83343f..585b3bc928fd 100644 --- a/crates/edit/src/lib.rs +++ b/crates/edit/src/lib.rs @@ -20,6 +20,7 @@ pub mod helpers; pub mod icu; pub mod input; pub mod json; +pub mod lsh; pub mod oklab; pub mod path; pub mod simd; diff --git a/crates/edit/src/lsh/cache.rs b/crates/edit/src/lsh/cache.rs new file mode 100644 index 000000000000..95788d95ddda --- /dev/null +++ b/crates/edit/src/lsh/cache.rs @@ -0,0 +1,87 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +use lsh::runtime::Highlight; +use stdext::arena::{Arena, scratch_arena}; +use stdext::collections::BVec; + +use crate::helpers::CoordType; +use crate::lsh::{HighlightKind, Highlighter, HighlighterState}; + +#[cfg(debug_assertions)] +const INTERVAL: CoordType = 16; +#[cfg(not(debug_assertions))] +const INTERVAL: CoordType = 1024; + +#[derive(Default)] +pub struct HighlighterCache { + checkpoints: Vec, +} + +impl HighlighterCache { + pub fn new() -> Self { + Self::default() + } + + /// Drop any cached states starting at (including) the given logical line. + pub fn invalidate_from(&mut self, line: CoordType) { + self.checkpoints.truncate(Self::ceil_line_to_offset(line)); + } + + /// Parse the given logical line. Returns the highlight spans. + pub fn parse_line<'a>( + &mut self, + arena: &'a Arena, + highlighter: &mut Highlighter, + line: CoordType, + ) -> BVec<'a, Highlight> { + // Do we need to random seek? + if line != highlighter.logical_pos_y() { + // If so, restore the nearest, preceding checkpoint... + if !self.checkpoints.is_empty() { + let n = Self::floor_line_to_offset(line); + let n = n.min(self.checkpoints.len() - 1); + highlighter.restore(&self.checkpoints[n]); + } else { + // The assumption is that you pass in a default constructed highlighter, + // and this class handles random seeking for you. As such, there should + // never be a case where we don't have a checkpoint for line 0, + // but you have a highlighter for line >0. + debug_assert!(highlighter.logical_pos_y() == 0); + } + + // ...and then seek in front of the requested line. + while highlighter.logical_pos_y() < line { + // There's a bit of waste here, because we just throw away the results, + // but that's better than duplicating the logic. The arena is very fast. + let scratch = scratch_arena(Some(arena)); + _ = self.parse_line_impl(&scratch, highlighter); + } + } + + self.parse_line_impl(arena, highlighter) + } + + fn parse_line_impl<'a>( + &mut self, + arena: &'a Arena, + highlighter: &mut Highlighter, + ) -> BVec<'a, Highlight> { + // If we need to store a checkpoint for the start of the next line, do so now. + if Self::floor_line_to_offset(highlighter.logical_pos_y()) == self.checkpoints.len() { + self.checkpoints.push(highlighter.snapshot()); + } + + highlighter.parse_next_line(arena) + } + + /// Since this line cache is super simplistic (no insertions, only append), + /// we can directly map from line numbers to offsets in the cache. + fn floor_line_to_offset(line: CoordType) -> usize { + (line / INTERVAL).try_into().unwrap_or(0) + } + + fn ceil_line_to_offset(line: CoordType) -> usize { + ((line + INTERVAL - 1) / INTERVAL).try_into().unwrap_or(0) + } +} diff --git a/crates/edit/src/lsh/definitions.rs b/crates/edit/src/lsh/definitions.rs new file mode 100644 index 000000000000..7c3cea5cfff6 --- /dev/null +++ b/crates/edit/src/lsh/definitions.rs @@ -0,0 +1,4 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +include!(concat!(env!("OUT_DIR"), "/lsh_definitions.rs")); diff --git a/crates/edit/src/lsh/highlighter.rs b/crates/edit/src/lsh/highlighter.rs new file mode 100644 index 000000000000..376f9cc50643 --- /dev/null +++ b/crates/edit/src/lsh/highlighter.rs @@ -0,0 +1,147 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +use lsh::runtime::*; +use stdext::arena::{Arena, scratch_arena}; +use stdext::collections::BVec; + +use crate::document::ReadableDocument; +use crate::helpers::*; +use crate::lsh::definitions::*; +use crate::{simd, unicode}; + +const MAX_LINE_LEN: usize = 32 * KIBI; + +#[derive(Clone)] +pub struct Highlighter<'a> { + doc: &'a dyn ReadableDocument, + offset: usize, + logical_pos_y: CoordType, + runtime: Runtime<'static, 'static, 'static>, +} + +#[derive(Clone)] +pub struct HighlighterState { + offset: usize, + logical_pos_y: CoordType, + state: RuntimeState, +} + +impl<'doc> Highlighter<'doc> { + pub fn new(doc: &'doc dyn ReadableDocument, language: &'static Language) -> Self { + Self { + doc, + offset: 0, + logical_pos_y: 0, + runtime: Runtime::new(&ASSEMBLY, &STRINGS, &CHARSETS, language.entrypoint), + } + } + + pub fn logical_pos_y(&self) -> CoordType { + self.logical_pos_y + } + + /// Create a restorable snapshot of the current highlighter state + /// so we can resume highlighting from this point later. + pub fn snapshot(&self) -> HighlighterState { + HighlighterState { + offset: self.offset, + logical_pos_y: self.logical_pos_y, + state: self.runtime.snapshot(), + } + } + + /// Restore the highlighter state from a previously captured snapshot. + pub fn restore(&mut self, snapshot: &HighlighterState) { + self.offset = snapshot.offset; + self.logical_pos_y = snapshot.logical_pos_y; + self.runtime.restore(&snapshot.state); + } + + pub fn parse_next_line<'a>(&mut self, arena: &'a Arena) -> BVec<'a, Highlight> { + let scratch = scratch_arena(Some(arena)); + let (line_off, line) = self.read_next_line(&scratch); + + // Empty lines can be somewhat common. + // + // If the line is too long, we don't highlight it. + // This is to prevent performance issues with very long lines. + if line.is_empty() || line.len() >= MAX_LINE_LEN { + return BVec::empty(); + } + + let line = unicode::strip_newline(line); + let mut res = self.runtime.parse_next_line(arena, line); + + // Adjust the range to account for the line offset. + for h in res.iter_mut() { + h.start = line_off + h.start.min(line.len()); + } + + res + } + + fn read_next_line<'a>(&mut self, arena: &'a Arena) -> (usize, &'a [u8]) + where + 'doc: 'a, + { + self.logical_pos_y += 1; + + let line_beg = self.offset; + let mut chunk; + let mut line_buf; + + // Try to read a chunk and see if it contains a newline. + // In that case we can skip concatenating chunks. + { + chunk = self.doc.read_forward(self.offset); + if chunk.is_empty() { + return (line_beg, chunk); + } + + let (off, line) = simd::lines_fwd(chunk, 0, 0, 1); + self.offset += off; + + if line == 1 { + return (line_beg, &chunk[..off]); + } + + let next_chunk = self.doc.read_forward(self.offset); + if next_chunk.is_empty() { + return (line_beg, &chunk[..off]); + } + + line_buf = BVec::empty(); + + // Ensure we don't overflow the heap size with a 1GB long line. + let end = off.min(MAX_LINE_LEN - line_buf.len()); + let end = end.min(chunk.len()); + line_buf.extend_from_slice(arena, &chunk[..end]); + + chunk = next_chunk; + } + + // Concatenate chunks until we get a full line. + while line_buf.len() < MAX_LINE_LEN { + let (off, line) = simd::lines_fwd(chunk, 0, 0, 1); + self.offset += off; + + // Ensure we don't overflow the heap size with a 1GB long line. + let end = off.min(MAX_LINE_LEN - line_buf.len()); + let end = end.min(chunk.len()); + line_buf.extend_from_slice(arena, &chunk[..end]); + + // Start of the next line found. + if line == 1 { + break; + } + + chunk = self.doc.read_forward(self.offset); + if chunk.is_empty() { + break; + } + } + + (line_beg, line_buf.leak()) + } +} diff --git a/crates/edit/src/lsh/mod.rs b/crates/edit/src/lsh/mod.rs new file mode 100644 index 000000000000..986083bfce85 --- /dev/null +++ b/crates/edit/src/lsh/mod.rs @@ -0,0 +1,33 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +//! Microsoft Edit's adapter to LSH. + +pub mod cache; +mod definitions; +mod highlighter; + +use std::path::Path; + +pub use definitions::{FILE_ASSOCIATIONS, HighlightKind, LANGUAGES}; +pub use highlighter::*; +pub use lsh::runtime::Language; +use stdext::glob::glob_match; + +pub fn process_file_associations( + associations: &[(T, &'static Language)], + path: &Path, +) -> Option<&'static Language> +where + T: AsRef<[u8]>, +{ + let path = path.as_os_str().as_encoded_bytes(); + + for a in associations { + if glob_match(a.0.as_ref(), path) { + return Some(a.1); + } + } + + None +} diff --git a/crates/lsh/definitions/diff.lsh b/crates/lsh/definitions/diff.lsh new file mode 100644 index 000000000000..7b71c3dd2cc1 --- /dev/null +++ b/crates/lsh/definitions/diff.lsh @@ -0,0 +1,12 @@ +#[display_name = "Diff"] +#[path = "**/*.diff"] +#[path = "**/*.patch"] +pub fn diff() { + if /(?:diff|---|\+\+\+).*/ { + yield meta.header; + } else if /-.*/ { + yield markup.deleted; + } else if /\+.*/ { + yield markup.inserted; + } +} diff --git a/i18n/edit.toml b/i18n/edit.toml index 01248b1577ad..f0eaff969a15 100644 --- a/i18n/edit.toml +++ b/i18n/edit.toml @@ -1661,6 +1661,32 @@ vi = "Đóng" zh_hans = "关闭" zh_hant = "關閉" +[LanguageSelectMode] +en = "Select Language Mode" +de = "Sprachmodus auswählen" +es = "Seleccionar modo de lenguaje" +fr = "Sélectionner le mode du langage" +it = "Seleziona modalità del linguaggio" +ja = "言語モードの選択" +ko = "언어 모드 선택" +pt_br = "Selecionar modo de linguagem" +ru = "Выбрать режим языка" +zh_hans = "选择语言模式" +zh_hant = "選擇語言模式" + +[LanguageAutoDetect] +en = "Auto Detect" +de = "Automatisch erkennen" +es = "Detección automática" +fr = "Détection automatique" +it = "Rilevamento automatico" +ja = "自動検出" +ko = "자동 감지" +pt_br = "Detectar automaticamente" +ru = "Определить автоматически" +zh_hans = "自动检测" +zh_hant = "自動偵測" + [EncodingReopen] en = "Reopen with encoding…" ar = "إعادة فتح مع الترميز…"