Permalink
Switch branches/tags
Nothing to show
Find file Copy path
Fetching contributors…
Cannot retrieve contributors at this time
executable file 208 lines (170 sloc) 5.32 KB
#!/usr/bin/env ruby
require 'rubygems'
require 'slugify'
require 'bundler/setup'
require_relative '../lib/helper_download'
require_relative '../lib/helper_extract'
require_relative '../lib/helper_selector'
class Selector2009
include HelperSelector
def initialize
@sessions_filepath = Dir[File.expand_path('./data/2009/sessions*.html')]
@authors_filepath = Dir[File.expand_path('./data/2009/authors/*.html')]
@workshop_filepath = File.expand_path('./data/2009/workshops.html')
@ressources_filepath = File.expand_path('./data/2009/ressources.html')
end
def clean_text(text)
text.delete("\t").tr("\r", ' ').tr("\n", ' ').strip
end
def title(event)
h3 = element(event, 'h3')
element(h3, 'span,abbr,small').remove
text = clean_text(h3.text)
text = text.delete('()').strip
text
end
def description(event)
tags = element(event, '.description p')
return nil if tags.length < 2
tags = tags[1..-1]
clean_text(tags.text)
end
def authors(event)
tags = element(event, '.description p')
return nil if tags.length < 2
links = element(tags, 'a')
authors = []
links.each do |link|
next if link.attr('href') =~ /^http/
authors << {
name: clean_text(link.text)
}
end
authors
end
def url(title, filepath)
basename = File.basename(filepath)
anchor = title.attr('id')
return nil if anchor.nil?
prefix = 'https://www.paris-web.fr/2009/'
url = "#{prefix}-Jeudi-8-octobre-.html##{anchor}" if basename == 'sessions01.html'
url = "#{prefix}-Vendredi-9-octobre-.html##{anchor}" if basename == 'sessions02.html'
url = "#{prefix}-Samedi-10-octobre-.html##{anchor}" if basename == 'workshops.html'
url
end
def sessions
sessions = []
@sessions_filepath.each do |filepath|
doc = Nokogiri::HTML(File.open(filepath).read)
events = element(doc, '#contenu div[id].vevent')
events.each do |event|
sessions << {
year: 2009,
type: 'conference',
title: title(event),
url: url(event, filepath),
description: description(event),
authors: authors(event)
}
end
end
sessions += workshops
sessions = sessions.reject do |session|
session[:description].nil?
end
sessions
end
def workshops
doc = Nokogiri::HTML(File.open(@workshop_filepath).read)
workshops = []
events = element(doc, '.vevent[id^="hcalendar"]')
events.each do |event|
element(event, 'h3 span,h3 small,h3 abbr').remove
title = clean_text(element(event, 'h3').text)
next unless title['()']
title = title.delete('()').strip
workshops << {
year: 2009,
type: 'workshop',
title: title,
url: url(event, @workshop_filepath),
description: description(event),
authors: authors(event)
}
end
workshops
end
def add_authors(sessions)
authors = {}
@authors_filepath.each do |filepath|
doc = Nokogiri::HTML(File.open(filepath).read)
name = clean_text(element(doc, '#orateur h2').text)
picture = attribute(doc, '#orateur p.photo img', 'src')
picture = "http://www.paris-web.fr/2009/#{picture}"
description = author_description(doc)
authors[name.slugify] = {
name: name,
picture: picture,
description: description
}
end
sessions = sessions.map do |session|
session[:authors] = session[:authors].map do |author|
matching_author = authors[author[:name].slugify]
next if matching_author.nil?
matching_author
end
session
end
sessions.reject(&:nil?)
end
def add_ressources(sessions)
doc = Nokogiri::HTML(File.open(@ressources_filepath).read)
ressources = {}
lines = element(doc, 'tbody tr')
lines.each do |line|
element(line, 'td[rowspan="14"]').remove
author_cell = element(line, 'td')[1]
element(author_cell, 'small').remove
slug = clean_text(author_cell.text).slugify
ressources[slug] = {}
slides = attribute(line, 'a[href$="pdf"],a[href$="zip"]', 'href')
ressources[slug][:slides] = slides
video = attribute(line, 'a[href^="http://www.dailymotion"]', 'href')
ressources[slug][:video] = video
end
sessions.each do |session|
next if session[:type] == 'workshop'
slug = session[:authors].map do |author|
author[:name]
end.join('-').slugify
slug = 'tristan-nitot--paul-rouget' if slug == 'tristan-nitot-paul-rouget'
slug = 'aurelien-levy--sebastien-delorme' if slug == 'aurelien-levy-sebastien-delorme'
slug = 'florent-verschelde--benjamin-de-cock' if slug == 'florent-verschelde-benjamin-de-cock'
if ressources[slug].nil?
ap ressources.keys
ap slug
end
session[:ressources] = ressources[slug]
end
sessions
end
def author_description(author)
paras = element(author, '#orateur p')
paras.each do |para|
text = clean_text(para.text)
return text unless text.empty?
end
nil
end
def run
content = add_authors(sessions)
content = add_ressources(content)
ap content
output = HelperPath.record('2009')
FileUtils.mkdir_p(File.dirname(output))
content = JSON.pretty_generate(content)
File.write(output, content)
end
end
Selector2009.new.run