-
Notifications
You must be signed in to change notification settings - Fork 4
/
index.js
123 lines (114 loc) · 3.61 KB
/
index.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
/**
* Extracts all attachments from a .mbox file.
* This is designed to process HUGE mbox files; it was created to process an 80.6GB file extracted from GMail.
*
* Created by Rick Brown 2017-06-10.
*/
var fs = require("fs"),
MailParser = require("mailparser").MailParser,
Mbox = require("node-mbox"),
path = require("path");
/**
* Extracts attachments from mbox.
* @param config An object with properties as shown below:
* @param {String} config.outputDir The path to the output directory.
* @param {Boolean} [config.dryRun] If true do not write any files to the output directory.
* @param {Boolean} [config.subDirs] If true create a sub directory for each day.
* @param {String} [config.mboxFile] The path to the mbox file. If not provided you must pipe the mbox on stdin.
*/
function extract(config) {
var mbox;
if (config.outputDir) {
ensureDirectoryExistence(config.outputDir);
mbox = instantiateMbox(config.outputDir, !!config.dryRun, !!config.subDirs);
if (!config.mboxFile) {
console.log("No mbox file provided. Waiting for stdin.");
}
streamMbox(mbox, config.mboxFile);
} else {
console.log("Must specify outputDir");
}
}
/**
* Creates an instance of Mbox ready to run.
* @param {String} outputDir The path to the output directory.
* @param {Boolean} dryRun If true do not write any files to the output directory.
* @param {Boolean} subDirs If true create a sub directory for each day.
* @returns {Mbox} An instance of node-mbox.
*/
function instantiateMbox(outputDir, dryRun, subDirs) {
var mbox = new Mbox();
mbox.on("message", function (msg) {
var currentDir = outputDir,
mailParser = new MailParser({ streamAttachments: true });
if (subDirs) {
mailParser.on("headers", function(headers) {
var dirName, mailDate, headerDate = headers.get("date");
if (headerDate) {
try {
mailDate = new Date(headerDate); // converting to date should adjust for locale
dirName = [mailDate.getFullYear(), pad(mailDate.getMonth() + 1), pad(mailDate.getDate())];
dirName = dirName.join("-");
currentDir = path.join(outputDir, dirName);
ensureDirectoryExistence(currentDir);
} catch (ex) {
console.error("Could not parse date ", headerDate);
}
}
console.log(headers.get("date"));
});
}
mailParser.on("data", function (data) {
var myFile, fileToWrite;
if (data.type === "attachment" && data.filename) {
var filename = data.filename;
if (process.platform != "win32") {
filename = filename.replace(/\//g, "-");
}
fileToWrite = path.join(currentDir, filename);
console.log(filename);
if (!dryRun) {
myFile = fs.createWriteStream(fileToWrite);
data.content.pipe(myFile);
}
data.release();
}
});
mailParser.write(msg);
mailParser.end();
});
return mbox;
}
function pad(num) {
var result = "0" + num;
return result.slice(-2);
}
/**
* Once the event listeners are ready to go, let's start piping an mbox.
* @param {Mbox} mbox An instance of node-mbox.
* @param {String} [mboxFile] The path to the mbox file. If not provided you must pipe the mbox on stdin.
*/
function streamMbox(mbox, mboxFile) {
var mboxStream;
if (!mboxFile) {
mboxStream = process.stdin;
} else if (fs.existsSync(mboxFile)) {
mboxStream = fs.createReadStream(mboxFile);
} else {
console.log("Can't find your mbox file", mboxFile);
return;
}
mboxStream.pipe(mbox);
}
/**
* Ensures the directory exists and creates it if it doesn't.
* @param dirName The path to the directory.
*/
function ensureDirectoryExistence(dirName) {
if (!fs.existsSync(dirName)) {
fs.mkdirSync(dirName);
}
}
module.exports = {
extract: extract
};