merged several changes submitted by @edbacher

mangini · Oct 9, 2013 · 7637168 · 7637168
1 parent c2139cf
commit 7637168
Showing 1 changed file with 71 additions and 17 deletions.
diff --git a/converttomarkdown.gapps b/converttomarkdown.gapps
@@ -1,19 +1,40 @@
+/*
+Usage: 
+  Adding this script to your doc: 
+    - Tools > Script Manager > New
+    - Select "Blank Project", then paste this code in and save.
+  Running the script:
+    - Tools > Script Manager
+    - Select "ConvertToMarkdown" function.
+    - Click Run button.
+    - Converted doc will be mailed to you. Subject will be "[MARKDOWN_MAKER]...".
+*/
+
 function ConvertToMarkdown() {
   var numChildren = DocumentApp.getActiveDocument().getActiveSection().getNumChildren();
   var text = "";
   var inSrc = false;
   var inClass = false;
   var globalImageCounter = 0;
   var globalListCounters = {};
+  // edbacher: added a variable for indent in src <pre> block. Let style sheet do margin.
+  var srcIndent = "";
 
   var attachments = [];
 
-  for (var i=0; i<numChildren; i++) {
+  // Walk through all the child elements of the doc.
+  for (var i = 0; i < numChildren; i++) {
     var child = DocumentApp.getActiveDocument().getActiveSection().getChild(i);
     var result = processParagraph(i, child, inSrc, globalImageCounter, globalListCounters);
     globalImageCounter += (result && result.images) ? result.images.length : 0;
     if (result!==null) {
-      if (result.source==="start" && !inSrc) {
+      if (result.sourcePretty==="start" && !inSrc) {
+        inSrc=true;
+        text+="<pre class=\"prettyprint\">\n";
+      } else if (result.sourcePretty==="end" && inSrc) {
+        inSrc=false;
+        text+="</pre>\n\n";
+      } else if (result.source==="start" && !inSrc) {
         inSrc=true;
         text+="<pre>\n";
       } else if (result.source==="end" && inSrc) {
@@ -28,7 +49,7 @@ function ConvertToMarkdown() {
       } else if (inClass) {
         text+=result.text+"\n\n";
       } else if (inSrc) {
-        text+=("    "+escapeHTML(result.text)+"\n");
+        text+=(srcIndent+escapeHTML(result.text)+"\n");
       } else if (result.text && result.text.length>0) {
         text+=result.text+"\n\n";
       }
@@ -52,33 +73,55 @@ function ConvertToMarkdown() {
   MailApp.sendEmail(Session.getActiveUser().getEmail(), 
                     "[MARKDOWN_MAKER] "+DocumentApp.getActiveDocument().getName(), 
                     "Your converted markdown document is attached (converted from "+DocumentApp.getActiveDocument().getUrl()+")"+
-                    "\n\nDon't know how to use the format options? See http://github.com/mangini/gdocs2md#readme\n",
+                    "\n\nDon't know how to use the format options? See http://github.com/mangini/gdocs2md\n",
                     { "attachments": attachments });
 }
 
 function escapeHTML(text) {
   return text.replace(/</g, '&lt;').replace(/>/g, '&gt;');
 }
 
+// Process each child element (not just paragraphs).
 function processParagraph(index, element, inSrc, imageCounter, listCounters) {
+  // First, check for things that require no processing.
   if (element.getNumChildren()==0) {
     return null;
-  }
-
+  }  
+  // Punt on TOC.
   if (element.getType() === DocumentApp.ElementType.TABLE_OF_CONTENTS) {
     return {"text": "[[TOC]]"};
   }
 
+  // Set up for real results.
   var result = {};
-
   var pOut = "";
   var textElements = [];
   var imagePrefix = "image_";
 
-  for (var i=0; i<element.getNumChildren(); i++) {
+  // Handle Table elements. Pretty simple-minded now, but works for simple tables.
+  // Note that Markdown does not process within block-level HTML, so it probably 
+  // doesn't make sense to add markup within tables.
+  if (element.getType() === DocumentApp.ElementType.TABLE) {
+    textElements.push("<table>\n");
+    var nCols = element.getChild(0).getNumCells();
+    for (var i = 0; i < element.getNumChildren(); i++) {
+      textElements.push("  <tr>\n");
+      // process this row
+      for (var j = 0; j < nCols; j++) {
+        textElements.push("    <td>" + element.getChild(i).getChild(j).getText() + "</td>\n");
+      }
+      textElements.push("  </tr>\n");
+    }
+    textElements.push("</table>\n");
+  }
+
+  // Process various types (ElementType).
+  for (var i = 0; i < element.getNumChildren(); i++) {
     var t=element.getChild(i).getType();
-    if (t === DocumentApp.ElementType.TEXT || t === DocumentApp.ElementType.TABLE_ROW) {
-      // TODO: proper handling of table_row
+
+    if (t === DocumentApp.ElementType.TABLE_ROW) {
+      // do nothing: already handled TABLE_ROW
+    } else if (t === DocumentApp.ElementType.TEXT) {
       var txt=element.getChild(i);
       pOut += txt.getText();
       textElements.push(txt);
@@ -109,23 +152,28 @@ function processParagraph(index, element, inSrc, imageCounter, listCounters) {
     } else if (t === DocumentApp.ElementType.FOOTNOTE) {
       textElements.push(' (NOTE: '+element.getChild(i).getFootnoteContents().getText()+')');
     } else {
-      throw "Paragraph "+index+" of type "+element.getType()+" has an unsupported child: "+t+" "+(element.getChild(i)["getText"] ? element.getChild(i).getText():'')+" index="+index;
+      throw "Paragraph "+index+" of type "+element.getType()+" has an unsupported child: "
+      +t+" "+(element.getChild(i)["getText"] ? element.getChild(i).getText():'')+" index="+index;
     }
   }
 
   if (textElements.length==0) {
+    // Isn't result empty now?
     return result;
   }
 
-
+  // evb: Add source pretty too. (And abbreviations: src and srcp.)
   // process source code block:
-  if (/^\s*---\s+source code\s*$/.test(pOut)) {
+  if (/^\s*---\s+srcp\s*$/.test(pOut) || /^\s*---\s+source pretty\s*$/.test(pOut)) {
+    result.sourcePretty = "start";
+  } else if (/^\s*---\s+src\s*$/.test(pOut) || /^\s*---\s+source code\s*$/.test(pOut)) {
     result.source = "start";
   } else if (/^\s*---\s+class\s+([^ ]+)\s*$/.test(pOut)) {
     result.inClass = "start";
     result.className = RegExp.$1;
   } else if (/^\s*---\s*$/.test(pOut)) {
     result.source = "end";
+    result.sourcePretty = "end";
     result.inClass = "end";
   } else if (/^\s*---\s+jsperf\s*([^ ]+)\s*$/.test(pOut)) {
     result.text = '<iframe style="width: 100%; height: 340px; overflow: hidden; border: 0;" '+
@@ -149,12 +197,14 @@ function processParagraph(index, element, inSrc, imageCounter, listCounters) {
   return result;
 }
 
+// Add correct prefix to list items.
 function findPrefix(inSrc, element, listCounters) {
   var prefix="";
   if (!inSrc) {
     if (element.getType()===DocumentApp.ElementType.PARAGRAPH) {
       var paragraphObj = element;
       switch (paragraphObj.getHeading()) {
+        // Add a # for each heading level. No break, so we accumulate the right number.
         case DocumentApp.ParagraphHeading.HEADING6: prefix+="#";
         case DocumentApp.ParagraphHeading.HEADING5: prefix+="#";
         case DocumentApp.ParagraphHeading.HEADING4: prefix+="#";
@@ -170,10 +220,13 @@ function findPrefix(inSrc, element, listCounters) {
         prefix += "    ";
       }
       var gt = listItem.getGlyphType();
-      if (gt === DocumentApp.GlyphType.BULLET || gt === DocumentApp.GlyphType.HOLLOW_BULLET || 
-          gt === DocumentApp.GlyphType.SQUARE_BULLET) {
+      // Bullet list (<ul>):
+      if (gt === DocumentApp.GlyphType.BULLET
+          || gt === DocumentApp.GlyphType.HOLLOW_BULLET
+          || gt === DocumentApp.GlyphType.SQUARE_BULLET) {
         prefix += "* ";
       } else {
+        // Ordered list (<ol>):
         var key = listItem.getListId() + '.' + listItem.getNestingLevel();
         var counter = listCounters[key] || 0;
         counter++;
@@ -197,7 +250,7 @@ function processTextElement(inSrc, txt) {
 
   var attrs=txt.getTextAttributeIndices();
   var lastOff=pOut.length;
-  
+
   for (var i=attrs.length-1; i>=0; i--) {
     var off=attrs[i];
     var url=txt.getLinkUrl(off);
@@ -223,7 +276,8 @@ function processTextElement(inSrc, txt) {
     if (txt.isBold(off)) {
       var d1 = d2 = "**";
       if (txt.isItalic(off)) {
-        d1 = "** *"; d2 = "* **";
+        // edbacher: changed this to handle bold italic properly.
+        d1 = "**_"; d2 = "_**";
       }
       pOut=pOut.substring(0, off)+d1+pOut.substring(off, lastOff)+d2+pOut.substring(lastOff);
     } else if (txt.isItalic(off)) {