Skip to content
This repository has been archived by the owner on Jul 16, 2018. It is now read-only.

Commit

Permalink
Support multiple users.
Browse files Browse the repository at this point in the history
  • Loading branch information
kvs committed Jan 9, 2013
1 parent d2bf1af commit 2888cc0
Showing 1 changed file with 66 additions and 47 deletions.
113 changes: 66 additions & 47 deletions eboks-backup.rb
Expand Up @@ -17,6 +17,7 @@
require 'bundler'; Bundler.setup
require 'selenium-webdriver'
require 'mechanize'
require 'fileutils'

## CONFIG

Expand Down Expand Up @@ -54,55 +55,73 @@ def cookies_from_selenium
# Re-fetch page now that the login-cookies have been added
page = agent.get('https://min.e-boks.dk/inbox.aspx')

# Process each folder - the second set of <div class="nodes"> is "Arkivmapper", which is what we're going to backup
page.search('//div[@id="folders"]/div[@class="nodes"][2]//span[@class="node"]/a').each do |folderlink|
next if folderlink["title"] == "Arkivmapper"

foldertitle = folderlink["title"]
folderpath = "#{BACKUP_PATH}/#{foldertitle}"
folderpath_hidden = "#{folderpath}/.documents"

puts "\nNavigating to folder '#{foldertitle}'"
Dir.mkdir folderpath unless Dir.exist? folderpath
Dir.mkdir folderpath_hidden unless Dir.exist? folderpath_hidden

folderpage = agent.get("https://min.e-boks.dk/inbox.aspx#{folderlink['href']}")
folderpage.search('//div[@id="messages"]/ul/li/dl').each do |msg|
elm = msg.xpath('.//dt/label/input').first
if elm["name"] != "did"
$stderr.puts "Error. HTML may have changed, and script needs updating."
$stderr.puts reason
exit 1
end
page.search('//div[@id="folders_options_toolbar"]//a[@class="archive"]').each do |userlink|
username = userlink['title']
userlink = userlink['href']

next if userlink =~ /^javascript/

page = agent.get(userlink)

# Process each folder - the second set of <div class="nodes"> is "Arkivmapper", which is what we're going to backup
page.search('//div[@id="folders"]/div[@class="nodes"][2]//span[@class="node"]/a').each do |folderlink|
next if folderlink["title"] == "Arkivmapper"

foldertitle = folderlink["title"]
folderpath = "#{BACKUP_PATH}/#{username}/#{foldertitle}"
folderpath_hidden = "#{folderpath}/.documents"

puts "\nNavigating to folder '#{username}/#{foldertitle}'"
FileUtils.mkdir_p folderpath unless Dir.exist? folderpath
FileUtils.mkdir_p folderpath_hidden unless Dir.exist? folderpath_hidden

folderpage = agent.get("https://min.e-boks.dk/inbox.aspx#{folderlink['href']}")
folderpage.search('//div[@id="messages"]/ul/li/dl').each do |msg|
elm = msg.xpath('.//dt/label/input').first
if elm["name"] != "did"
$stderr.puts "Error. HTML may have changed, and script needs updating."
$stderr.puts reason
exit 1
end

did = elm["value"]
title = msg.xpath('.//dt/label/span').first.content
sender = msg.xpath('.//dd[@class="content"]/span').first.content
links = msg.xpath('.//dd[@class="content"]/ul/li/a')
date = msg.xpath('.//dd[@class="actions"]/span').first.content.split('-').reverse.join('-')

puts " - found Document ID: #{did} from #{date} (\"#{sender} - #{title}\")"

links.each do |link|
doctitle = title
doctitle = "#{title} (#{link["title"]})" if link["title"] != title # Attachment
query_args = link["href"].split('?', 2)[1]
duid = query_args.match(/duid=(\w+)&/).captures.first
url = "https://download.e-boks.dk/privat/download.aspx?#{query_args.gsub('&', '&amp;')}" # don't ask - the link is pseudo-escaped from e-boks's side
file = "#{did}-#{duid}.pdf"

if File.exist? "#{folderpath_hidden}/#{file}"
puts " already downloaded, skipping."
next
else
puts " downloading #{did} (#{doctitle})"
File.open("#{folderpath_hidden}/#{file}", "w") { |f| f.write agent.get_file(url) }

doctitle.gsub!(/\//, ':')
if SYMLINK
File.symlink(".documents/#{file}", "#{folderpath}/#{date} - #{sender} - #{doctitle}.pdf")
did = elm["value"]
title = msg.xpath('.//dt/label/span').first.content
sender = msg.xpath('.//dd[@class="content"]/span').first.content
links = msg.xpath('.//dd[@class="content"]/ul/li/a')
date = msg.xpath('.//dd[@class="actions"]/span').first.content.split('-').reverse.join('-')

puts " - found Document ID: #{did} from #{date} (\"#{sender} - #{title}\")"

links.each do |link|
doctitle = title
doctitle = "#{title} (#{link["title"]})" if link["title"] != title # Attachment
query_args = link["href"].split('?', 2)[1]
duid = query_args.match(/duid=(\w+)&/).captures.first
url = "https://download.e-boks.dk/privat/download.aspx?#{query_args.gsub('&', '&amp;')}" # don't ask - the link is pseudo-escaped from e-boks's side
file = "#{did}-#{duid}.pdf"

if File.exist? "#{folderpath_hidden}/#{file}"
puts " already downloaded, skipping."
next
else
File.link("#{folderpath_hidden}/#{file}", "#{folderpath}/#{date} - #{sender} - #{doctitle}.pdf")
puts " downloading #{did} (#{doctitle})"
File.open("#{folderpath_hidden}/#{file}", "w") { |f| f.write agent.get_file(url) }

doctitle.gsub!(/\//, ':')

# Determine filename, uniquifying if necessary
filename = "#{date} - #{sender} - #{doctitle}"
i = 2
while File.exist?("#{folderpath}/#{filename}.pdf")
filename = "#{date} - #{sender} - #{doctitle} (#{i})"
i += 1
end

if SYMLINK
File.symlink(".documents/#{file}", "#{folderpath}/#{filename}.pdf")
else
File.link("#{folderpath_hidden}/#{file}", "#{folderpath}/#{filename}.pdf")
end
end
end
end
Expand Down

0 comments on commit 2888cc0

Please sign in to comment.