-
Notifications
You must be signed in to change notification settings - Fork 0
/
export-tweets.js
executable file
·116 lines (94 loc) · 2.64 KB
/
export-tweets.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
#!/usr/bin/env node
const fs = require('fs');
const fileName = 'tweet.js';
const pathPrefix = './converted_tweets';
const chronTweets = (a, b) => (new Date(a.created_at)) - (new Date(b.created_at));
const humanize = (n) => {
const x = n;
return (x < 10 ? "0" : "" ) + x;
}
const presenter = (t) => {
t = t.tweet;
const p = {};
const keys = 'id_str,created_at,full_text,created_at';
const cDate = new Date(t.created_at);
for (const k of keys.split(',')) {
p[k] = t[k];
}
p.epochDate = cDate.valueOf();
p.year = cDate.getFullYear();
p.month = cDate.getMonth() + 1;
p.day = cDate.getDate();
p.original_tweet = t;
return p;
};
const processedTweets = (tweets) => {
const corpus = tweets
.sort(chronTweets)
.map(presenter);
return corpus;
};
const grouper = token => (coll) => {
const s = {};
coll.reduce((memo, t) => {
if (t[token] in memo) {
memo[t[token]].push(t);
} else {
memo[t[token]] = [t];
}
return memo;
}, s);
return s;
};
const statsify = (o) => {
for (const year in o) {
fs.mkdirSync(`${pathPrefix}/${year}`);
for (const month in o[year]) {
fs.mkdirSync(`${pathPrefix}/${year}/${humanize(month)}`);
for (const day in o[year][month]) {
fs.mkdirSync(`${pathPrefix}/${year}/${humanize(month)}/${humanize(day)}`);
for (const tweet of o[year][month][day]) {
const path = `${year}-${humanize(month)}-${humanize(day)}-${tweet.id_str}.tweet.json`;
const fp = `${pathPrefix}/${year}/${humanize(month)}/${humanize(day)}/${path}`;
fs.writeFile(fp, JSON.stringify(tweet.original_tweet), (err) => {
if (err) throw err;
console.log(`Wrote ${fp}`);
});
}
}
}
}
};
const regroupByYMD = (tweets) => {
const tw = {};
const byYear = grouper('year')(tweets);
for (const year in byYear) {
tw[year] = grouper('month')(byYear[year]);
for (const month in tw[year]) {
const byDay = grouper('day')(tw[year][month]);
tw[year][month] = byDay;
for (const day in byDay) {
tw[year][month][day] = tw[year][month][day].sort((a, b) => a.epochDate - b.epochDate);
}
}
}
return tw;
};
const sanitizeData = (dirty) => {
const bracketPos = dirty.indexOf('[');
const clean = dirty.slice(bracketPos);
return clean;
};
try {
fs.mkdirSync(pathPrefix);
} catch (e) {
if (!e.code === 'EEXIST') {
console.error(e.code);
process.exit(1);
}
}
fs.readFile(fileName, 'utf8', (err, data) => {
const unprocessedTweets = JSON.parse(sanitizeData(data));
const tweets = processedTweets(unprocessedTweets);
statsify(regroupByYMD(tweets));
});