Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse files

I made some improvements to the LaTeX parser to make it more useful

since the current support is a bit lacking. In particular:

- Colons are now allowed in identifiers; this is a common way of
  separating parts in a label.
- LaTeX doesn't have a concept of strings like other languages, so
  quotes shouldn't be handled specially, otherwise this can lead to
  whole passages of text being ignored. This fixes bug #2886871.
- Sectioning commands now report the scope they're in, for example a
  subsection will report the section it's in using a 'section:'
  extension field. This allows for example the Tagbar Vim plugin to
  display the sections in a properly nested way.
  At the moment the scopes are separated by two double quotes ('""')
  as that is a character sequence that shouldn't really appear in
  normal LaTeX text, but if anyone has a better idea please tell me.
- Labels can now be output as an additional kind. They're not listed
  in their respective scopes as that seems to be what other editors
  do, too, and I think it makes more sense this way.

It's working fine for me (and another guy who tested it together with my
Tagbar plugin), but it's still possible that I overlooked something --
if so please tell me.

Cheers,
Jan Larres
  • Loading branch information...
commit 3a5c93ccd9d41ca6b1f99f61492a22c4a9c1f5a2 1 parent 1ff9779
David Fishburn authored
Showing with 150 additions and 81 deletions.
  1. +150 −81 tex.c
View
231 tex.c
@@ -2,6 +2,7 @@
* $Id: tex.c 666 2008-05-15 17:47:31Z dfishburn $
*
* Copyright (c) 2008, David Fishburn
+ * Copyright (c) 2012, Jan Larres
*
* This source code is released for free distribution under the terms of the
* GNU General Public License.
@@ -47,13 +48,14 @@ typedef enum eException { ExceptionNone, ExceptionEOF } exception_t;
*/
typedef enum eKeywordId {
KEYWORD_NONE = -1,
+ KEYWORD_part,
KEYWORD_chapter,
KEYWORD_section,
KEYWORD_subsection,
KEYWORD_subsubsection,
- KEYWORD_part,
KEYWORD_paragraph,
KEYWORD_subparagraph,
+ KEYWORD_label,
KEYWORD_include
} keywordId;
@@ -99,38 +101,47 @@ static langType Lang_js;
static jmp_buf Exception;
+static vString *lastPart;
+static vString *lastChapter;
+static vString *lastSection;
+static vString *lastSubS;
+static vString *lastSubSubS;
+
typedef enum {
+ TEXTAG_PART,
TEXTAG_CHAPTER,
TEXTAG_SECTION,
TEXTAG_SUBSECTION,
TEXTAG_SUBSUBSECTION,
- TEXTAG_PART,
TEXTAG_PARAGRAPH,
TEXTAG_SUBPARAGRAPH,
+ TEXTAG_LABEL,
TEXTAG_INCLUDE,
TEXTAG_COUNT
} texKind;
static kindOption TexKinds [] = {
+ { TRUE, 'p', "part", "parts" },
{ TRUE, 'c', "chapter", "chapters" },
{ TRUE, 's', "section", "sections" },
{ TRUE, 'u', "subsection", "subsections" },
{ TRUE, 'b', "subsubsection", "subsubsections" },
- { TRUE, 'p', "part", "parts" },
{ TRUE, 'P', "paragraph", "paragraphs" },
{ TRUE, 'G', "subparagraph", "subparagraphs" },
+ { TRUE, 'l', "label", "labels" },
{ TRUE, 'i', "include", "includes" }
};
static const keywordDesc TexKeywordTable [] = {
/* keyword keyword ID */
+ { "part", KEYWORD_part },
{ "chapter", KEYWORD_chapter },
{ "section", KEYWORD_section },
{ "subsection", KEYWORD_subsection },
{ "subsubsection", KEYWORD_subsubsection },
- { "part", KEYWORD_part },
{ "paragraph", KEYWORD_paragraph },
{ "subparagraph", KEYWORD_subparagraph },
+ { "label", KEYWORD_label },
{ "include", KEYWORD_include }
};
@@ -141,8 +152,8 @@ static const keywordDesc TexKeywordTable [] = {
static boolean isIdentChar (const int c)
{
return (boolean)
- (isalpha (c) || isdigit (c) || c == '$' ||
- c == '_' || c == '#' || c == '-' || c == '.');
+ (isalpha (c) || isdigit (c) || c == '$' ||
+ c == '_' || c == '#' || c == '-' || c == '.' || c == ':');
}
static void buildTexKeywordHash (void)
@@ -178,15 +189,76 @@ static void deleteToken (tokenInfo *const token)
eFree (token);
}
+static void getScopeInfo(texKind kind, vString *const parentKind,
+ vString *const parentName)
+{
+ int i;
+
+ /*
+ * Put labels separately instead of under their scope.
+ * Is this The Right Thing To Do?
+ */
+ if (kind >= TEXTAG_LABEL) {
+ return;
+ }
+
+ /*
+ * This abuses the enum internals somewhat, but it should be ok in this
+ * case.
+ */
+ for (i = kind - 1; i >= TEXTAG_PART; --i) {
+ if (i == TEXTAG_SUBSECTION && vStringLength(lastSubS) > 0) {
+ vStringCopyS(parentKind, "subsection");
+ break;
+ } else if (i == TEXTAG_SECTION && vStringLength(lastSection) > 0) {
+ vStringCopyS(parentKind, "section");
+ break;
+ } else if (i == TEXTAG_CHAPTER && vStringLength(lastChapter) > 0) {
+ vStringCopyS(parentKind, "chapter");
+ break;
+ } else if (i == TEXTAG_PART && vStringLength(lastPart) > 0) {
+ vStringCopyS(parentKind, "part");
+ break;
+ }
+ }
+
+ /*
+ * Is '""' the best way to separate scopes? It has to be something that
+ * should ideally never occur in normal LaTeX text.
+ */
+ for (i = TEXTAG_PART; i < (int)kind; ++i) {
+ if (i == TEXTAG_PART && vStringLength(lastPart) > 0) {
+ vStringCat(parentName, lastPart);
+ } else if (i == TEXTAG_CHAPTER && vStringLength(lastChapter) > 0) {
+ if (vStringLength(parentName) > 0) {
+ vStringCatS(parentName, "\"\"");
+ }
+ vStringCat(parentName, lastChapter);
+ } else if (i == TEXTAG_SECTION && vStringLength(lastSection) > 0) {
+ if (vStringLength(parentName) > 0) {
+ vStringCatS(parentName, "\"\"");
+ }
+ vStringCat(parentName, lastSection);
+ } else if (i == TEXTAG_SUBSECTION && vStringLength(lastSubS) > 0) {
+ if (vStringLength(parentName) > 0) {
+ vStringCatS(parentName, "\"\"");
+ }
+ vStringCat(parentName, lastSubS);
+ }
+ }
+}
+
/*
* Tag generation functions
*/
-static void makeConstTag (tokenInfo *const token, const texKind kind)
+static void makeTexTag (tokenInfo *const token, texKind kind)
{
- if (TexKinds [kind].enabled )
+ if (TexKinds [kind].enabled)
{
const char *const name = vStringValue (token->string);
+ vString *parentKind = vStringNew();
+ vString *parentName = vStringNew();
tagEntryInfo e;
initTagEntry (&e, name);
@@ -195,31 +267,13 @@ static void makeConstTag (tokenInfo *const token, const texKind kind)
e.kindName = TexKinds [kind].name;
e.kind = TexKinds [kind].letter;
- makeTagEntry (&e);
- }
-}
-
-static void makeTexTag (tokenInfo *const token, texKind kind)
-{
- vString * fulltag;
-
- if (TexKinds [kind].enabled)
- {
- /*
- * If a scope has been added to the token, change the token
- * string to include the scope when making the tag.
- */
- if ( vStringLength (token->scope) > 0 )
- {
- fulltag = vStringNew ();
- vStringCopy (fulltag, token->scope);
- vStringCatS (fulltag, ".");
- vStringCatS (fulltag, vStringValue (token->string));
- vStringTerminate (fulltag);
- vStringCopy (token->string, fulltag);
- vStringDelete (fulltag);
+ getScopeInfo(kind, parentKind, parentName);
+ if (vStringLength(parentKind) > 0) {
+ e.extensionFields.scope [0] = vStringValue(parentKind);
+ e.extensionFields.scope [1] = vStringValue(parentName);
}
- makeConstTag (token, kind);
+
+ makeTagEntry (&e);
}
}
@@ -227,28 +281,7 @@ static void makeTexTag (tokenInfo *const token, texKind kind)
* Parsing functions
*/
-static void parseString (vString *const string, const int delimiter)
-{
- boolean end = FALSE;
- while (! end)
- {
- int c = fileGetc ();
- if (c == EOF)
- end = TRUE;
- else if (c == '\\')
- {
- c = fileGetc(); /* This maybe a ' or ". */
- vStringPut (string, c);
- }
- else if (c == delimiter)
- end = TRUE;
- else
- vStringPut (string, c);
- }
- vStringTerminate (string);
-}
-
-/*
+/*
* Read a C identifier beginning with "firstChar" and places it into
* "name".
*/
@@ -296,14 +329,6 @@ static void readToken (tokenInfo *const token)
case ']': token->type = TOKEN_CLOSE_SQUARE; break;
case '*': token->type = TOKEN_STAR; break;
- case '\'':
- case '"':
- token->type = TOKEN_STRING;
- parseString (token->string, c);
- token->lineNumber = getSourceLineNumber ();
- token->filePosition = getInputFilePosition ();
- break;
-
case '\\':
/*
* All Tex tags start with a backslash.
@@ -423,7 +448,7 @@ static boolean parseTag (tokenInfo *const token, texKind kind)
}
readToken (token);
}
- if (useLongName)
+ if (useLongName)
{
vStringTerminate (fullname);
vStringCopy (name->string, fullname);
@@ -431,6 +456,41 @@ static boolean parseTag (tokenInfo *const token, texKind kind)
}
}
+ /*
+ * save the name of the last section definitions for scope-resolution
+ * later
+ */
+ switch (kind)
+ {
+ case TEXTAG_PART:
+ vStringCopy(lastPart, fullname);
+ vStringClear(lastChapter);
+ vStringClear(lastSection);
+ vStringClear(lastSubS);
+ vStringClear(lastSubSubS);
+ break;
+ case TEXTAG_CHAPTER:
+ vStringCopy(lastChapter, fullname);
+ vStringClear(lastSection);
+ vStringClear(lastSubS);
+ vStringClear(lastSubSubS);
+ break;
+ case TEXTAG_SECTION:
+ vStringCopy(lastSection, fullname);
+ vStringClear(lastSubS);
+ vStringClear(lastSubSubS);
+ break;
+ case TEXTAG_SUBSECTION:
+ vStringCopy(lastSubS, fullname);
+ vStringClear(lastSubSubS);
+ break;
+ case TEXTAG_SUBSUBSECTION:
+ vStringCopy(lastSubSubS, fullname);
+ break;
+ default:
+ break;
+ }
+
deleteToken (name);
vStringDelete (fullname);
return TRUE;
@@ -446,34 +506,37 @@ static void parseTexFile (tokenInfo *const token)
{
switch (token->keyword)
{
- case KEYWORD_chapter:
- parseTag (token, TEXTAG_CHAPTER);
+ case KEYWORD_part:
+ parseTag (token, TEXTAG_PART);
break;
- case KEYWORD_section:
- parseTag (token, TEXTAG_SECTION);
+ case KEYWORD_chapter:
+ parseTag (token, TEXTAG_CHAPTER);
break;
- case KEYWORD_subsection:
- parseTag (token, TEXTAG_SUBSECTION);
+ case KEYWORD_section:
+ parseTag (token, TEXTAG_SECTION);
break;
- case KEYWORD_subsubsection:
- parseTag (token, TEXTAG_SUBSUBSECTION);
+ case KEYWORD_subsection:
+ parseTag (token, TEXTAG_SUBSECTION);
break;
- case KEYWORD_part:
- parseTag (token, TEXTAG_PART);
+ case KEYWORD_subsubsection:
+ parseTag (token, TEXTAG_SUBSUBSECTION);
break;
- case KEYWORD_paragraph:
- parseTag (token, TEXTAG_PARAGRAPH);
+ case KEYWORD_paragraph:
+ parseTag (token, TEXTAG_PARAGRAPH);
break;
- case KEYWORD_subparagraph:
- parseTag (token, TEXTAG_SUBPARAGRAPH);
+ case KEYWORD_subparagraph:
+ parseTag (token, TEXTAG_SUBPARAGRAPH);
break;
- case KEYWORD_include:
- parseTag (token, TEXTAG_INCLUDE);
+ case KEYWORD_label:
+ parseTag (token, TEXTAG_LABEL);
+ break;
+ case KEYWORD_include:
+ parseTag (token, TEXTAG_INCLUDE);
break;
default:
break;
}
- }
+ }
} while (TRUE);
}
@@ -482,13 +545,19 @@ static void initialize (const langType language)
Assert (sizeof (TexKinds) / sizeof (TexKinds [0]) == TEXTAG_COUNT);
Lang_js = language;
buildTexKeywordHash ();
+
+ lastPart = vStringNew();
+ lastChapter = vStringNew();
+ lastSection = vStringNew();
+ lastSubS = vStringNew();
+ lastSubSubS = vStringNew();
}
static void findTexTags (void)
{
tokenInfo *const token = newToken ();
exception_t exception;
-
+
exception = (exception_t) (setjmp (Exception));
while (exception == ExceptionNone)
parseTexFile (token);
Please sign in to comment.
Something went wrong with that request. Please try again.