Skip to content

Commit

Permalink
Refactor BibLaTeX parser
Browse files Browse the repository at this point in the history
  • Loading branch information
pfoerster committed Mar 5, 2020
1 parent ca5f23c commit 71ce0a2
Show file tree
Hide file tree
Showing 9 changed files with 942 additions and 6 deletions.
1 change: 1 addition & 0 deletions benches/bench_main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,5 +5,6 @@ use criterion::Criterion;
#[tokio::main]
async fn main() {
benchmarks::open_latex::benches().await;
benchmarks::open_bibtex::benches().await;
Criterion::default().configure_from_args().final_summary();
}
1 change: 1 addition & 0 deletions benches/benchmarks/mod.rs
Original file line number Diff line number Diff line change
@@ -1 +1,2 @@
pub mod open_bibtex;
pub mod open_latex;
24 changes: 24 additions & 0 deletions benches/benchmarks/open_bibtex.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
use criterion::Criterion;
use texlab::{syntax::bibtex, tex::Distribution};
use tokio::fs;

async fn criterion_benchmark(criterion: &mut Criterion) {
let distro = Distribution::detect().await;
distro
.load()
.await
.expect("failed to load TeX distribution");
let resolver = distro.resolver().await;
let path = resolver
.files_by_name
.get("biblatex-examples.bib")
.expect("unable to retrieve biblatex-examples.bib");

let text = fs::read_to_string(&path).await.unwrap();
criterion.bench_function("biblatex-examples.bib", |b| b.iter(|| bibtex::open(&text)));
}

pub async fn benches() {
let mut criterion = Criterion::default().configure_from_args();
criterion_benchmark(&mut criterion).await;
}
326 changes: 326 additions & 0 deletions src/syntax/bibtex/ast.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,326 @@
use crate::{
protocol::{Position, Range, RangeExt},
syntax::{Span, SyntaxNode},
};
use itertools::Itertools;
use petgraph::graph::{Graph, NodeIndex};
use serde::{Deserialize, Serialize};

#[derive(Debug, PartialEq, Eq, Clone, Copy, Serialize, Deserialize)]
pub enum TokenKind {
PreambleKind,
StringKind,
EntryKind,
Word,
Command,
Assign,
Comma,
Concat,
Quote,
BeginBrace,
EndBrace,
BeginParen,
EndParen,
}

#[derive(Debug, PartialEq, Eq, Clone, Serialize, Deserialize)]
pub struct Token {
pub span: Span,
pub kind: TokenKind,
}

impl SyntaxNode for Token {
fn range(&self) -> Range {
self.span.range()
}
}

impl Token {
pub fn new(span: Span, kind: TokenKind) -> Self {
Self { span, kind }
}

pub fn text(&self) -> &str {
&self.span.text
}
}

#[derive(Debug, PartialEq, Eq, Clone, Copy, Serialize, Deserialize)]
pub struct Root {
pub range: Range,
}

impl SyntaxNode for Root {
fn range(&self) -> Range {
self.range
}
}

#[derive(Debug, PartialEq, Eq, Clone, Serialize, Deserialize)]
pub struct Comment {
pub token: Token,
}

impl SyntaxNode for Comment {
fn range(&self) -> Range {
self.token.range()
}
}

#[derive(Debug, PartialEq, Eq, Clone, Serialize, Deserialize)]
pub struct Preamble {
pub range: Range,
pub ty: Token,
pub left: Option<Token>,
pub right: Option<Token>,
}

impl SyntaxNode for Preamble {
fn range(&self) -> Range {
self.range
}
}

#[derive(Debug, PartialEq, Eq, Clone, Serialize, Deserialize)]
pub struct String {
pub range: Range,
pub ty: Token,
pub left: Option<Token>,
pub name: Option<Token>,
pub assign: Option<Token>,
pub right: Option<Token>,
}

impl SyntaxNode for String {
fn range(&self) -> Range {
self.range
}
}

#[derive(Debug, PartialEq, Eq, Clone, Serialize, Deserialize)]
pub struct Entry {
pub range: Range,
pub ty: Token,
pub left: Option<Token>,
pub key: Option<Token>,
pub comma: Option<Token>,
pub right: Option<Token>,
}

impl SyntaxNode for Entry {
fn range(&self) -> Range {
self.range
}
}

impl Entry {
pub fn is_comment(&self) -> bool {
self.ty.text().to_lowercase() == "@comment"
}
}

#[derive(Debug, PartialEq, Eq, Clone, Serialize, Deserialize)]
pub struct Field {
pub range: Range,
pub name: Token,
pub assign: Option<Token>,
pub comma: Option<Token>,
}

impl SyntaxNode for Field {
fn range(&self) -> Range {
self.range
}
}

#[derive(Debug, PartialEq, Eq, Clone, Serialize, Deserialize)]
pub struct Word {
pub token: Token,
}

impl SyntaxNode for Word {
fn range(&self) -> Range {
self.token.range()
}
}

#[derive(Debug, PartialEq, Eq, Clone, Serialize, Deserialize)]
pub struct Command {
pub token: Token,
}

impl SyntaxNode for Command {
fn range(&self) -> Range {
self.token.range()
}
}

#[derive(Debug, PartialEq, Eq, Clone, Serialize, Deserialize)]
pub struct QuotedContent {
pub range: Range,
pub left: Token,
pub right: Option<Token>,
}

impl SyntaxNode for QuotedContent {
fn range(&self) -> Range {
self.range
}
}

#[derive(Debug, PartialEq, Eq, Clone, Serialize, Deserialize)]
pub struct BracedContent {
pub range: Range,
pub left: Token,
pub right: Option<Token>,
}

impl SyntaxNode for BracedContent {
fn range(&self) -> Range {
self.range
}
}

#[derive(Debug, PartialEq, Eq, Clone, Serialize, Deserialize)]
pub struct Concat {
pub range: Range,
pub operator: Token,
}

impl SyntaxNode for Concat {
fn range(&self) -> Range {
self.range
}
}

#[derive(Debug, PartialEq, Eq, Clone, Serialize, Deserialize)]
pub enum Node {
Root(Root),
Comment(Comment),
Preamble(Preamble),
String(String),
Entry(Entry),
Field(Field),
Word(Word),
Command(Command),
QuotedContent(QuotedContent),
BracedContent(BracedContent),
Concat(Concat),
}

impl SyntaxNode for Node {
fn range(&self) -> Range {
match self {
Self::Root(root) => root.range(),
Self::Comment(comment) => comment.range(),
Self::Preamble(preamble) => preamble.range(),
Self::String(string) => string.range(),
Self::Entry(entry) => entry.range(),
Self::Field(field) => field.range(),
Self::Word(word) => word.range(),
Self::Command(cmd) => cmd.range(),
Self::QuotedContent(content) => content.range(),
Self::BracedContent(content) => content.range(),
Self::Concat(concat) => concat.range(),
}
}
}

#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct Tree {
pub graph: Graph<Node, ()>,
pub root: NodeIndex,
}

impl Tree {
pub fn children(&self, parent: NodeIndex) -> impl Iterator<Item = NodeIndex> {
self.graph
.neighbors(parent)
.sorted_by_key(|child| self.graph[*child].start())
}

pub fn walk<V: Visitor>(&self, visitor: &mut V, parent: NodeIndex) {
for child in self.children(parent) {
visitor.visit(self, child);
}
}

pub fn find(&self, positon: Position) -> Vec<NodeIndex> {
let mut finder = Finder::new(positon);
finder.visit(self, self.root);
finder.results
}

pub fn as_preamble(&self, node: NodeIndex) -> Option<&Preamble> {
if let Node::Preamble(preamble) = &self.graph[node] {
Some(preamble)
} else {
None
}
}

pub fn as_string(&self, node: NodeIndex) -> Option<&String> {
if let Node::String(string) = &self.graph[node] {
Some(string)
} else {
None
}
}

pub fn as_entry(&self, node: NodeIndex) -> Option<&Entry> {
if let Node::Entry(entry) = &self.graph[node] {
Some(entry)
} else {
None
}
}

pub fn as_field(&self, node: NodeIndex) -> Option<&Field> {
if let Node::Field(field) = &self.graph[node] {
Some(field)
} else {
None
}
}

pub fn field_by_name(&self, parent: NodeIndex, name: &str) -> Option<NodeIndex> {
let name = name.to_lowercase();
self.as_entry(parent)?;
for node in self.children(parent) {
if let Some(field) = self.as_field(node) {
if field.name.text() == name {
return Some(node);
}
}
}
None
}
}

pub trait Visitor {
fn visit(&mut self, tree: &Tree, node: NodeIndex);
}

#[derive(Debug)]
struct Finder {
position: Position,
results: Vec<NodeIndex>,
}

impl Finder {
fn new(position: Position) -> Self {
Self {
position,
results: Vec::new(),
}
}
}

impl Visitor for Finder {
fn visit(&mut self, tree: &Tree, node: NodeIndex) {
if tree.graph[node].range().contains(self.position) {
self.results.push(node);
tree.walk(self, node);
}
}
}
Loading

0 comments on commit 71ce0a2

Please sign in to comment.