-
Notifications
You must be signed in to change notification settings - Fork 9
/
getljxml.rb
executable file
·44 lines (35 loc) · 2.05 KB
/
getljxml.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
#!/usr/bin/ruby
require 'cgi'
# Put your username and password here
lj_username = 'USERNAME' # replace USERNAME with your actual username
lj_password = 'PASSWORD' # replace PASSWORD with your actual password
firstyear = 2013 # Change this to the year your LJ starts
# You shouldn't have to change these, but here they are just in case!
lj_login_url = 'http://www.livejournal.com/interface/flat' # LJ API url
lj_archive_url = 'http://www.livejournal.com/export_do.bml' # XML download URL
# Build login string, then log into LJ and save the cookie.
loginstring = 'mode=sessiongenerate&user=' + CGI.escape(lj_username) + '&password=' + CGI.escape(lj_password)
lj_session_cookie = %x(curl --data #{loginstring.dump} #{lj_login_url.dump}).lines
if lj_session_cookie[0] =~ /ljsession/ # if we logged in successfully, write out the cookie (We can detect a successful login if the first line of the response to our query includes the "ljsession" string
open('cookies.txt', 'w') do |f|
f.puts("#HttpOnly_.livejournal.com\tTRUE\t/\tFALSE\t0\tljsession\t" + lj_session_cookie[1])
end
else
abort('ERROR: Could not log in to LiveJournal.')
end
# Make sure we actually logged in
unless File.exists?('cookies.txt')
abort('Error: Could not log in to LiveJournal')
end
# Iterate over years, starting with firstyear and running up to the current year
(firstyear..Time.now.year).each do |current_year|
# In each month of each year, send POST data that will fetch the LJ XML for that month.
(1..12).each do |current_month|
poststring = 'what=journal&year=' + current_year.to_s + '&month=' + current_month.to_s + '&format=xml&header=on&encid=2&field_eventtime=on&field_subject=on&field_event=on'
open(current_year.to_s + '-' + current_month.to_s + '.xml', 'w') do |f| # Open a file named e.g. "2006-4.xml"
f.puts %x(curl -L --cookie cookies.txt --data #{poststring.dump} #{lj_archive_url.dump}).encode("UTF-8") # run CURL with the current month's POST info and dump the result into a file
end
puts "Waiting for 1 second so Livejournal doesn't have a fit..."
sleep(1)
end
end