Permalink
Browse files

GH#4 refine show 362+ shownote format handling

  • Loading branch information...
1 parent da4c14e commit 014ea81c3130b3058310a0b3a903ea8c323ccecb @tardate committed Jan 2, 2012
View
2 app/controllers/notes_controller.rb
@@ -12,7 +12,7 @@ def collection
@notes ||= if search_params.present?
end_of_association_chain.search(search_params)
else
- end_of_association_chain.scoped
+ end_of_association_chain.show_meme_note_order
end
end
View
36 app/models/meme.rb
@@ -53,6 +53,8 @@ def non_trending_arel
# Tries to coalesce different spellings to one standard form
def normalize_name(value)
case value
+ when /2tth/i
+ '2TTH'
when /arab.+prin/i
'Arab Spring'
when /agen.+21/i
@@ -63,22 +65,38 @@ def normalize_name(value)
'Biodiversitée'
when /cyb.+war/i
'CyberWar$'
+ when /devil.+weed/i
+ 'Devil Weed & Powder'
when /eq.+chine/i
'EQ Machine$'
- when /fal[s\$]e.*flag/i
+ when /fal[s\$]{1,2}e.*flag/i
'Fal$e Flag'
- when /hollywood.+whack/i
+ when /hiker/i
+ 'Hikers'
+ when /hollywood.+whack|hollywood.+shoot/i
'Hollywood Whacker$'
when /l.b.*a/i
'Libya'
+ when /lone.+wolf/i
+ 'Lone Wolf'
+ when /lucifer/i
+ 'Lucifer'
when /magic.*number/i
'Magic Numbers'
when /military.+industrial.+compl/i
'Military Industrial Complex'
when /minist.*truth/i
'Ministry of Truth'
- when /monsant/i
+ when /monsa.*t/i
'Monsantooo'
+ when /^pedo|pedo.*bear/i
+ 'PedoBear'
+ when /ron.+paul/i
+ 'Ron Paul'
+ when /science/i
+ 'The Science Is In!'
+ when /hadow.*puppet.*thea/i
+ 'Shadow Puppet Theatre'
when /shut.*up.*slave/i
'Shut Up Slave!'
when /squirrel/i
@@ -89,12 +107,20 @@ def normalize_name(value)
'Trains Good, Planes Bad (Whoo Hoo!)'
when /united.*tate.*euro/i
'United $tates of EUROpe'
+ when /vaccin/i
+ 'Vaccine$'
when /vagina/i
'Vagina'
+ when /we.+can.+wait/i
+ "We Can't Wait!"
when /word.+matter/i
'Words Matter'
+ when /zombie/i
+ 'Zombie Nation'
+ when /clip|stuff/i
+ 'Clips'
else
- value
+ value.length > 60 ? 'Other' : value
end
end
@@ -104,7 +130,7 @@ def normalize_name(value)
def factory(name)
normalized_name = normalize_name(name)
meme = find_or_create_by_name(normalized_name)
- if meme && name == 'VIDEO'
+ if meme && name =~ /VIDEO|Clips/
meme.update_attribute(:trending,false)
end
meme
View
98 lib/navd/scraper/show_loader.rb
@@ -69,7 +69,7 @@ def show_notes
def get_nested_show_notes
notes = []
current_meme = nil
- p_shownotes_detail_all.at_css('ul.ulDirectory').children.each do |n|
+ show_notes_collection(:nested).each do |n|
if n.name=='li' && n[:class]=='directoryItem'
current_meme = n.text
elsif n.name=='ul' && n[:class]=='ulDirectory'
@@ -95,54 +95,86 @@ def get_nested_show_notes
def get_flat_show_notes
show_notes = []
# scan all the top-level li elements under the shownotes node
- p_shownotes_menu.css('div.divOutlineBody > ul > div.hide:last > ul > li').each do |n|
+ show_notes_collection(:flat).each do |n|
if n.next_element && n.next_element.name=='div' && (current_meme = n.css('span').text)
notes = n.next_element.xpath('./ul/li') # top level li elements
- notes.each do |note|
+ notes.each do |note|
current_title = note.text
+ name = current_title.truncate(255)
+ partial_description = current_title == name ? nil : current_title
if (note_collection = note.next_element) && note_collection.name=='div' && (subnotes = note_collection.css('li'))
- anchor = subnotes.at_css('a') || {}
- description = subnotes.text
+ subnotes.each do |subnote|
+ anchor = (subnote.at_css('a') || {})[:href]
+ description = [partial_description,subnote.text].compact.join(': ')
+ show_notes << {
+ :name => name,
+ :meme_name => current_meme,
+ :description => description,
+ :url => anchor
+ }
+ end
else
- anchor = note.at_css('a') || {}
- description = current_title
+ anchor = (note.at_css('a') || {})[:href]
+ show_notes << {
+ :name => name,
+ :meme_name => current_meme,
+ :description => partial_description,
+ :url => anchor
+ }
end
- show_notes << {
- :name => current_title.truncate(255),
- :meme_name => current_meme,
- :description => description,
- :url => anchor[:href]
- }
end
end
end
show_notes
end
- # Returns a text representation of the show credits
- def credits
- @credits ||= credits_list.try(:join,'<br/>')
+ def show_notes_collection(format = :flat)
+ case format
+ when :nested
+ p_shownotes_detail_all.at_css('ul.ulDirectory').children
+ when :flat
+ p_shownotes_menu.css('div.divOutlineBody > ul > li').map{|n| n.at_css('span').text =~ /notes/i ? n.next_element : nil }.compact.first.xpath('./ul/li')
+ else
+ []
+ end
+ end
+
+ def show_credits_collection(format = :flat)
+ case format
+ when :nested
+ p_credits.css('.directoryComment').children
+ when :flat
+ p_shownotes_menu.css('div.divOutlineBody > ul > li').map{|n| n.at_css('span').text =~ /credit/i ? n.next_element : nil }.compact.first.css('*').children
+ else
+ []
+ end
end
+
# Returns an array of credit items given an Nokogiri::HTML::Document container node
def normalize_credit_list(collection_root)
nbsp = Nokogiri::HTML("&nbsp;").text
- c = collection_root.children.map{|c| c.is_a?(Nokogiri::XML::Text) ? c.text.gsub(nbsp,' ').gsub(/\t|\n/,'') : nil }
+ c = collection_root.map{|c| c.is_a?(Nokogiri::XML::Text) ? c.text.gsub(nbsp,' ').gsub(/\t|\n/,'') : nil }
c.reject!{|i| i.blank?}
c
end
+
+ # Returns a text representation of the show credits
+ def credits
+ @credits ||= credits_list.try(:join,'<br/>')
+ end
# Returns an array of credits for the show
def credits_list
- @credits_list ||= case shownotes_format
- when :nested
- normalize_credit_list(p_credits.css('.directoryComment'))
- when :flat
- normalize_credit_list(p_shownotes_menu.css('div.divOutlineBody > ul > div.hide').first.css('*'))
- end
+ @credits_list ||= normalize_credit_list(show_credits_collection(shownotes_format))
end
# Returns the human name of the show
def show_name
- credits_list.try(:first)
+ case number
+ when 368 # special case
+ 'Too Many Clips'
+ else
+ credits_list.try(:first)
+ end
end
# Returns the show date (as extracted from the audio file name)
@@ -186,6 +218,8 @@ def assets_url
# http://349.nashownotes.com/shownotes/na34920111020Credits
def credits_uri
@credits_uri ||= uri.merge(extract_nodes(p_shownotes_menu,:credits)[:href])
+ rescue
+ # ignore errors getting the asset url
end
# Returns the nested shownotes page content
# e.g. http://349.nashownotes.com/shownotes ->
@@ -198,20 +232,26 @@ def p_credits
# e.g. http://349.nashownotes.com/shownotes
def shownotes_menu_uri
@shownotes_menu_uri ||= uri.merge(extract_nodes(p_shownotes_main,:notes)[:href])
+ rescue
+ # ignore errors getting the asset url
end
# Returns Nokogiri::HTML::Document of the main shownotes menu page being processed
def p_shownotes_menu
- @p_shownotes_menu ||= spider.get_page(shownotes_menu_uri)
+ @p_shownotes_menu ||= shownotes_menu_uri && spider.get_page(shownotes_menu_uri)
end
# Returns the shownote menu page format type.
# Currently supports:
- # :nested - as for shows ~333-361
+ # :nested - as for shows ~325-361
# :flat - shows 362+
def shownotes_format
- @shownotes_format ||= if p_shownotes_menu.css('ul.ulDirectory').present?
- :nested
+ @shownotes_format ||= if p_shownotes_menu
+ if p_shownotes_menu.css('ul.ulDirectory').present?
+ :nested
+ else
+ :flat
+ end
else
- :flat
+ nil
end
end
View
9 lib/navd/scraper/spider.rb
@@ -9,8 +9,13 @@ def initialize
# Returns the url for the specific show
def get_uri_for_show(show_number)
- # TODO: prior to 301, this didn't work - need alternative method'
- url = "http://#{show_number}.nashownotes.com/"
+ url = case show_number
+ when 368
+ "http://xmas2011.nashownotes.com/"
+ else
+ # TODO: prior to 301, this didn't work - need alternative method
+ "http://#{show_number}.nashownotes.com/"
+ end
normalize_uri(url)
rescue Exception => e
self.errors << e
View
1 spec/fixtures/html/362/credits.htm
@@ -1 +0,0 @@
-<unused/>
View
1 spec/fixtures/html/362/shownotes_detail.htm
@@ -1 +0,0 @@
-<unused/>
View
1 spec/fixtures/html/364/credits.htm
@@ -1 +0,0 @@
-<unused/>
View
1 spec/fixtures/html/364/shownotes_detail.htm
@@ -1 +0,0 @@
-<unused/>
View
511 spec/fixtures/html/368/show.htm
@@ -0,0 +1,511 @@
+<!DOCTYPE html>
+<html lang="en">
+ <head>
+ <title>NA-Too Many Clips</title>
+ <link rel="alternate" type="application/rss+xml" title="RSS" href="http://static.curry.com/worldoutline/admin/rss.xml" />
+
+ <link href="http://fonts.googleapis.com/css?family=IM Fell DW Pica SC" rel="stylesheet" type="text/css">
+<link href="http://fonts.googleapis.com/css?family=Ubuntu" rel="stylesheet" type="text/css">
+<link href="http://static.scripting.com/github/bootstrap/bootstrap.css" rel="stylesheet">
+<link href="http://static.scripting.com/github/bootstrap/docs.css" rel="stylesheet">
+<link href="http://static.scripting.com/github/bootstrap/prettify.css" rel="stylesheet">
+
+<script src="http://static.scripting.com/github/bootstrap/jquery.min.js"></script>
+
+<script src="http://static.scripting.com/github/bootstrap/jquery.tablesorter.min.js"></script>
+
+<script src="http://static.scripting.com/github/bootstrap/prettify.js"></script>
+<script>$(function () { prettyPrint() })</script>
+<script src="http://static.scripting.com/github/bootstrap/bootstrap-dropdown.js"></script>
+<script src="http://static.scripting.com/github/bootstrap/bootstrap-twipsy.js"></script>
+<script src="http://static.scripting.com/github/bootstrap/bootstrap-scrollspy.js"></script>
+<script src="http://static.scripting.com/github/bootstrap/application.js"></script>
+<script src="http://static.scripting.com/github/bootstrap/bootstrap-popover.js"></script>
+
+<script>
+ $(function () {
+ $("a[rel=twipsy]").twipsy({
+ live: true
+ })
+ })
+ </script>
+<script>
+ $(function () {
+ $("a[rel=popover]")
+ .popover({
+ offset: 10
+ })
+ .click(function(e) {
+ e.preventDefault()
+ })
+ })
+ </script>
+<style>
+ body {
+ margin-top: -5px;
+ }
+ table th {
+ border: 0;
+ }
+ table {
+ border: 0px;
+ }
+ .actions {
+ border: 0;
+ background: none;
+ padding: 0;
+ }
+ </style>
+<style>
+ body {
+ color: black;
+ }
+ /* eliminate borders on tables */
+ table th {
+ border: 0;
+ }
+ table td {
+ border: 0;
+ }
+ table {
+ border: 0px;
+ }
+ .divOpmlWebpage {
+ margin-left: 12%;
+ margin-right: 12%;
+ margin-bottom: 65px;
+ color: black;
+ }
+ .divOpmlHeader {
+ margin-top: 25px;
+ margin-bottom: 25px;
+ }
+ .divOpmlHeader h1 {
+ font-family: 'IM Fell DW Pica SC';
+ text-align: center;
+ text-shadow: 4px 4px 4px #AAA;
+ font-size: 68px;
+ padding: 0px;
+ margin: 0px;
+ }
+ .divOpmlHeader red {
+ color: red;
+ font-size: 110%;
+ }
+ .divOpmlHeader a:link {
+ text-decoration: none;
+ color:#000;
+ }
+ .divOpmlHeader a:hover {
+ text-decoration: none;
+ color:#000;
+ }
+ .divOpmlHeader a:visited {
+ text-decoration: none;
+ color:#000;
+ }
+
+ .divOpmlMenubar a, .divOpmlMenubar a:link, .divOpmlMenubar a:visited {
+ color: lightgray;
+ }
+ .divOpmlMenubar white {
+ color: white;
+ }
+ .divOpmlMenubarStatus {
+ color: lightgray;
+ float: right;
+ line-height: 19px;
+ padding: 10px 0px 11px;
+ font-size: 13px;
+ }
+
+ .divOpmlWebpageBody {
+ padding-top: 0px;
+ }
+ .divOpmlWebpageBody h3 {
+ font-family: Ubuntu;
+ font-size: 24px;
+ }
+
+ .divIntroPgf {
+ font-size: 14px;
+ line-height: 120%;
+ color: black;
+ margin-top: 10px;
+ margin-bottom: 15px;
+ }
+ .prefsTable th, .prefsTable td {
+ border: 0;
+ font-size: 140%;
+ line-height: 100%;
+ }
+ .prefsTable td.input: {
+ height: 100%;
+ padding: 0;
+ border: 0;
+ }
+
+ /* sign-in form */
+ .divSigninForm .signinLabel {
+ text-align: right;
+ font-family: Ubuntu;
+ font-size: 18px;
+ }
+ .divSigninForm td {
+ border: none;
+ }
+ .divSigninForm input {
+ height: 32px;
+ padding: 3px;
+ font-family: Ubuntu;
+ font-size: 18px;
+ }
+ .divSigninForm table {
+ padding-top: 30px;
+ width: auto;
+ }
+ /* prefs */
+ .divPrefs {
+ font-family: Ubuntu;
+ font-size: 17px;
+ }
+ .divPrefs label {
+ font-family: Ubuntu;
+ font-size: 17px;
+ }
+ .divPrefs .row {
+ line-height: 24px;
+ margin-bottom: 12px;
+ }
+ .divPrefs red {
+ color: red;
+ }
+ .divPrefs li {
+ font-family: Ubuntu;
+ font-size: 17px;
+ padding-top: 5px;
+ padding-bottom: 5px;
+ }
+ .divPrefs h4 {
+ font-family: Ubuntu;
+ font-size: 22px;
+ padding-top: 15px;
+ padding-bottom: 5px;
+ }
+ /* log pages */
+ .logEvent {
+ vertical-align: top;
+ text-align: left;
+ color: black;
+ }
+ .logDescription {
+ vertical-align: top;
+ text-align: left;
+ color: black;
+ line-height: 120%;
+ }
+ .logDate {
+ vertical-align: top;
+ text-align: right;
+ color: black;
+ font-size: 14px;
+ }
+ .logSeconds {
+ vertical-align: top;
+ text-align: right;
+ color: black;
+ font-size: 14px;
+ }
+ .divLogPage table {
+ width: 90%;
+ margin-left: auto;
+ margin-right: auto;
+ }
+ .divLogPage td {
+ border: none;
+ font-size: 16px;
+ }
+ .divLogPage th {
+ border: none;
+ font-size: 14px;
+ padding-bottom: 0px;
+ }
+ </style>
+
+ <style>
+ /* header */
+ .divGraphic {
+ margin: 0px auto;
+ }
+ .divGraphic img {
+ display: block;
+ margin-left:auto;
+ margin-right:auto;
+ padding-top:5px;
+ padding-bottom:20px;
+ }
+ .divGraphic h1 {
+ font-family: Lobster;
+ font-size: 64px;
+ line-height: 135%;
+ white-space: nowrap;
+ text-align:center;
+ padding-bottom: 10px;
+ padding-top: 0px;
+ }
+ /* directory */
+ .divDirectory {
+ width: 300px;
+ margin: 30px auto;
+ }
+ .ulDirectory {
+ font-family: Arial;
+ font-size: 22px;
+ line-height: 28px;
+ text-align: left;
+ list-style-type: none;
+ padding-left: 15px;
+ }
+ .directoryItem {
+ margin-top: 10px;
+ margin-bottom: 10px;
+ }
+ .directoryComment {
+ margin-top: 10px;
+ margin-bottom: 10px;
+ }
+ .directoryLink {
+ padding-left: 24px;
+ background: transparent url(http://static.opml.org/images/folder.gif) no-repeat top left;
+ }
+ .directoryIcon {
+ display: none;
+ padding-right: 10px;
+ }
+ .ulDirectory a:link {
+ text-decoration: underline;
+ color:#1E68A6;
+ }
+ .ulDirectory a:hover {
+ text-decoration: underline;
+ color:#1E68A6;
+ }
+ .ulDirectory a:visited {
+ text-decoration: underline;
+ color:#819fC7;
+ }
+ .directoryErrorMessage {
+ font-family: Arial;
+ font-size: 24;
+ padding-top: 24;
+ padding-bottom: 12;
+ }
+ /* directory additions */
+ .divDirectoryTitle {
+ font-family: Arial;
+ font-size: 22px;
+ line-height: 28px;
+ font-weight: bold;
+ }
+ .divLegend {
+ font-family: Arial;
+ font-size: 12px;
+ line-height: 14px;
+ color:#1E68A6;
+ }
+ /* bread crumbs */
+ .divBreadCrumbs {
+ display: inline;
+ background: none;
+ }
+ .divBreadCrumbs {
+ font-family: Arial;
+ font-size: 15px;
+ line-height: 18px;
+ padding-bottom: 15px;
+ }
+ .divBreadCrumbs a:link {
+ text-decoration: underline;
+ color: #1E68A6;
+ }
+ .divBreadCrumbs a:hover {
+ text-decoration: underline;
+ color: #1E68A6;
+ }
+ .divBreadCrumbs a:visited {
+ text-decoration: underline;
+ color: #819fC7;
+ }
+ .aCurrentCrumb {
+ font-weight: bold;
+ color: black;
+ }
+ /* blogpost nodetype */
+ .divBlogPost h3 {
+ font-family: Georgia;
+ font-size: 22px;
+ line-height: 28px;
+ padding-left: 20px;
+ padding-top: 20px;
+ padding-bottom: 10px;
+ }
+ .divBlogPost p {
+ font-family: Georgia;
+ font-size: 16px;
+ line-height: 22px;
+ padding-left: 25px;
+ padding-right: 25px;
+ padding-bottom: 3px;
+ }
+ /* river nodetype */
+ .article .footer {
+ padding: 0px;
+ background-color: white;
+ border-top: 0px;
+ text-shadow: 0 0px;
+ min-width: 0px;
+ -moz-box-shadow: 0 0px 0px;
+ }
+ .article .description {
+ font-family: Arial,sans-serif;
+ font-size: 13px;
+ line-height: 17px;
+ }
+ .article h3 {
+ font-family: Arial,sans-serif;
+ font-size: 16px;
+ line-height: 22px;
+ }
+ /* thumbList nodetype */
+ .divWorldOutlineThumbList td {
+ border: 3px red;
+ border-style: solid;
+ text-align: center;
+ padding-top: 12px;
+ padding-bottom: 8px;
+ }
+ .divWorldOutlineThumbList table {
+ border-collapse: collapse;
+ }
+ .divWorldOutlineThumbList .thumbListCaptionString {
+ font-size: 13px;
+ }
+ /* domains page */
+ .divDomains ul {
+ padding-top: 5px;
+ list-style: disc outside normal;
+ padding-left: 35px;
+ }
+ .divDomains li {
+ font-family: Ubuntu;
+ font-size: 18px;
+ line-height: 28px;
+ color: black;
+ }
+ .divLegend {
+ padding-top: 30px;
+ }
+ </style>
+ <style>html {
+ width: 980px;
+ margin: auto;
+}
+
+body {
+ width: 980px;
+ margin: auto;
+}
+
+p.blogPostPgf {
+ font-family: georgia;
+ font-size: 17px;
+ line-height: 25px;
+ margin-left: 0px;
+ }
+
+h3.blogPostTitle {
+ font-family: georgia;
+ font-size: 28px;
+ }
+
+li.directoryComment {
+ font-family: georgia;
+ font-size: 17px;
+ line-height: 25px;
+ margin-left: 0px;
+ }
+
+li.directoryItem {
+ font-family: arial;
+ font-size: 17px;
+ font-weight: plain;
+ line-height: 25px;
+ margin-left: 0px;
+ }
+
+.divDirectory {
+ width: 680px;
+ margin: 3em auto;
+}
+
+.divFooter {
+ position: auto;
+ bottom: 0;
+ left: 0;
+ right: 0;
+ background-color: white;
+ padding: 5px;
+ -webkit-box-shadow: 0 0 8px rgba(0,0,0,.3);
+ -mox-box-shadow: 0 0 8px rgba(0,0,0,.3);
+ box-shadow: 0 0 8px rgba(0,0,0,.3)
+}
+</style>
+ </head>
+
+ <body>
+
+ <div class="divOpmlWebpage">
+ <div class="divOpmlWebpageBody">
+
+ <div class="divGraphic"><a href="/"><img src="http://blog.curry.com/images/2011/12/25/NA_CLIP_REEL.jpg" width="512" height="512" border="0" align="center" hspace="15" vspace="5" alt="A picture named NA_CLIP_REEL.jpg"></a></div>
+
+ <ul class="ulDirectory">
+ <li class="directoryComment" ><a href="http://blog.curry.com/stories/2011/12/25/naChristmasCliptacular.html"><img class="icon" src="http://www.google.com/s2/favicons?domain=blog.curry.com" width="16" height="16" border="0" alt="Click here to visit the site."></a>&nbsp;&nbsp;<a href="http://blog.curry.com/stories/2011/12/25/naChristmasCliptacular.html">Episode Webpage</a></li>
+ <li class="directoryComment" ><embed type="application/x-shockwave-flash" src="http://www.dvorak.org/blog/wp-content/uploads/2008/01/playersingle.swf" id="mymovie" name="mymovie" quality="high" flashvars="autoPlay=no&amp;soundPath=http://m.podshow.com/media/15412/episodes/307185/noagenda-307185-12-25-2011.mp3&overColor=#ff0000" height="80" ></embed></li>
+ <li class="directoryComment" ><a href="http://dropbox.curry.com/ShowNotesArchive/2011/12/NA-Christmas%20Cliptacular/NA_CLIP_REEL.png"><img class="icon" src="http://www.google.com/s2/favicons?domain=dropbox.curry.com" width="16" height="16" border="0" alt="Click here to visit the site."></a>&nbsp;&nbsp;<a href="http://dropbox.curry.com/ShowNotesArchive/2011/12/NA-Christmas%20Cliptacular/NA_CLIP_REEL.png">Cover Art by <a href="http://adiosmofos.com./">Thoren</a></a></li>
+
+ <li class="directoryComment" ><a href="http://m.podshow.com/media/15412/episodes/307185/noagenda-307185-12-25-2011.mp3"><img class="icon" src="http://www.google.com/s2/favicons?domain=m.podshow.com" width="16" height="16" border="0" alt="Click here to visit the site."></a>&nbsp;&nbsp;<a href="http://m.podshow.com/media/15412/episodes/307185/noagenda-307185-12-25-2011.mp3">Direct link to the mp3 file</a></li>
+ <li class="directoryComment" ><a href="http://templates.worldoutline.org/"><img class="icon" src="http://www.google.com/s2/favicons?domain=templates.worldoutline.org" width="16" height="16" border="0" alt="Click here to visit the site."></a>&nbsp;&nbsp;<a href="http://templates.worldoutline.org/">HowTo on designing this page</a></li>
+ <li class="directoryComment" ><a href="http://blog.curry.com/stories/2011/05/27/noAgendaShownotesDesignCha.html"><img class="icon" src="http://www.google.com/s2/favicons?domain=blog.curry.com" width="16" height="16" border="0" alt="Click here to visit the site."></a>&nbsp;&nbsp;<a href="http://blog.curry.com/stories/2011/05/27/noAgendaShownotesDesignCha.html">Design this page!</a></li>
+ <li class="directoryComment" ><a href="http://dvorak.org/na"><img class="icon" src="http://www.google.com/s2/favicons?domain=dvorak.org" width="16" height="16" border="0" alt="Click here to visit the site."></a>&nbsp;&nbsp;<a href="http://dvorak.org/na"><a href="http://dvorak.org/na"></a><a href="http://dvorak.org/na">Support our work and the show</a></a></li>
+ <li class="directoryComment" ><a href="http://mevio.com/feeds/noagenda.xml"><img class="icon" src="http://www.google.com/s2/favicons?domain=mevio.com" width="16" height="16" border="0" alt="Click here to visit the site."></a>&nbsp;&nbsp;<a href="http://mevio.com/feeds/noagenda.xml">Subscribe to our <a href="http://mevio.com/feeds/noagenda.xml">Podcast Feed</a></a></li>
+ <li class="directoryComment" ><a href="http://noagendanewsnetwork.com/"><img class="icon" src="http://www.google.com/s2/favicons?domain=noagendanewsnetwork.com" width="16" height="16" border="0" alt="Click here to visit the site."></a>&nbsp;&nbsp;<a href="http://noagendanewsnetwork.com/">The No Agenda News Network- <a href="http://noagendanewsnetwork.com/">noagendanewsnetwork.com</a></a></li>
+
+ <li class="directoryComment" ><a href="http://podcasts.nashownotes.com/"><img class="icon" src="http://www.google.com/s2/favicons?domain=podcasts.nashownotes.com" width="16" height="16" border="0" alt="Click here to visit the site."></a>&nbsp;&nbsp;<a href="http://podcasts.nashownotes.com/">All the No Agenda Stream Podcasts and Feeds on One Page</a></li>
+ <li class="directoryComment" ><a href="http://www.dvorak.org/blog/no-agenda-mailing-list-signup-here/">Join</a> the Mailing List!</li>
+ </ul>
+
+
+<center>
+
+ <br>
+ <div id="disqus_thread"></div>
+
+ <script type="text/javascript" src="http://disqus.com/forums/adamcurrycomments/embed.js"></script>
+ <noscript><a href="http://adamcurrycomments.disqus.com/?url=ref">View the forum thread.</a></noscript>
+
+ </center>
+
+ <div class="divLegend">
+ <p>Editor: Adam Curry; Updated: 1/1/12; 547 reads. 11 items. Page build ran for 0.033 seconds.</p>
+ <p><a class="aOpmlSourceLink" href="http://static.curry.com/worldoutline/admin/root.opml" title="Click on this icon to view the OPML source text for this directory. More info: http://www.opml.org/spec2."><img src="http://scripting.com/images/xmlIcon2.gif" border="0" width="36" height="14" alt="XML" /></a></p>
+
+ <p><a href="http://127.0.0.1:5337/opmlEditor/openOpml?url=http://xmas2011.nashownotes.com/">View Outline</a></p>
+ <p><a href="http://xmas2011.nashownotes.com/"><img src="http://blog.curry.com/images/2011/06/14/BigAppsRounded.jpg" width="16" height="16" border="0" alt="Home"></a></p>
+ </div>
+ </div>
+
+ </div>
+ </div>
+ </body>
+ </html>
View
49 spec/lib/scraper/show_loader_examples_spec.rb
@@ -24,7 +24,7 @@
},
:shownotes_format => :nested,
:shownotes_menu_url => 'http://333.nashownotes.com/shownotes',
- :shownotes_count => 11,
+ :shownotes_count => 94, # will only load 86,
:show_name => 'Lions Stood Still',
:credits=>%(Lions Stood Still<br/>Executive Producers: Bryan Raley, Alan Thompson, Michael Kearns, Oscar Nadal, Richard Hyde, Robert Claeson, Scott Hankel, Jrdan Wyatt<br/>Associate Executive Producers: Scott Hankel<br/>),
:credits_url => 'http://333.nashownotes.com/shownotes/na33320110825Credits'
@@ -44,7 +44,7 @@
},
:shownotes_format => :flat,
:shownotes_menu_url => 'http://362.nashownotes.com/shownotes',
- :shownotes_count => 11,
+ :shownotes_count => 100, # will only load 57,
:show_name => 'Drone Journalism',
:credits=>%(Drone Journalism<br/>Executive Producers: Adam Curry & John C Dvroak<br/>),
:credits_url => nil
@@ -64,10 +64,30 @@
},
:shownotes_format => :flat,
:shownotes_menu_url => 'http://364.nashownotes.com/shownotes',
- :shownotes_count => 11,
+ :shownotes_count => 217, # will only load 81,
:show_name => 'Katy Bar The Door, Baby!',
:credits=>%(Katy Bar The Door, Baby!<br/>Executive Producers: Sir Richard Scott Bagwell),
:credits_url => nil
+ },
+ '368' => {
+ :expected_attributes => {
+ :number => 368,
+ :published => true,
+ :show_notes_url=>"http://xmas2011.nashownotes.com/",
+ :audio_url=>"http://m.podshow.com/media/15412/episodes/307185/noagenda-307185-12-25-2011.mp3",
+ :published_date=>Date.parse('2011-12-25'),
+ :cover_art_url=>"http://dropbox.curry.com/ShowNotesArchive/2011/12/NA-Christmas%20Cliptacular/NA_CLIP_REEL.png",
+ :assets_url=>nil,
+ :url=>"http://blog.curry.com/stories/2011/12/25/naChristmasCliptacular.html",
+ :credits=>nil,
+ :name=>'Too Many Clips'
+ },
+ :shownotes_format => nil,
+ :shownotes_menu_url => nil,
+ :shownotes_count => 0,
+ :show_name => 'Too Many Clips',
+ :credits=> "",
+ :credits_url => nil
}
}.each do |number,options|
context "show ##{number}" do
@@ -78,7 +98,7 @@
before {
show_loader.spider.stub(:get_page).and_return(published_show_page_html(show_number))
show_loader.stub(:p_shownotes_menu).and_return(shownotes_menu_page_html(show_number))
- show_loader.stub(:get_nested_show_notes).and_return([])
+ show_loader.stub(:p_shownotes_detail_all).and_return(shownotes_detail_page_html(show_number))
show_loader.stub(:credits_list).and_return(nil)
show_loader.scan_show_assets
}
@@ -89,7 +109,7 @@
describe "#shownotes_menu_uri [protected]" do
subject { show_loader.send(:shownotes_menu_uri) }
- let(:expected) { URI.parse(options[:shownotes_menu_url]) }
+ let(:expected) { options[:shownotes_menu_url] ? URI.parse(options[:shownotes_menu_url]) : nil }
it { should eql(expected) }
end
@@ -98,16 +118,19 @@
it { should eql(options[:shownotes_format]) }
end
+ describe "#show_notes" do
+ subject { show_loader.show_notes }
+ its(:count) { should eql(options[:shownotes_count]) }
+ end
+
end
context "with credits" do
before {
show_loader.spider.stub(:get_page).and_return(published_show_page_html(show_number))
show_loader.stub(:p_shownotes_menu).and_return(shownotes_menu_page_html(show_number))
show_loader.stub(:get_nested_show_notes).and_return([])
- if options[:credits_url]
- show_loader.stub(:p_credits).and_return(credits_page_html(show_number))
- end
+ show_loader.stub(:p_credits).and_return(credits_page_html(show_number))
show_loader.scan_show_assets
}
@@ -122,13 +145,19 @@
if options[:credits_url]
let(:expected) { URI.parse(options[:credits_url]) }
it { should eql(expected) }
+ else
+ it { should be_nil }
end
end
describe "#credits [protected]" do
subject { show_loader.send(:credits) }
- let(:expected) { options[:credits] }
- it { should include(expected) }
+ if options[:credits].present?
+ let(:expected) { options[:credits] }
+ it { should include(expected) }
+ else
+ it { should be_empty }
+ end
end
end
View
12 spec/support/scraper_mocks.rb
@@ -7,9 +7,15 @@ def html_mock_path
end
def mock_text(*path_elements)
IO.read(html_mock_path.join(*path_elements))
+ rescue
+ nil
end
def mock_html(*path_elements)
- Nokogiri::HTML(mock_text(*path_elements))
+ if doc = mock_text(*path_elements)
+ Nokogiri::HTML(doc)
+ else
+ nil
+ end
end
def unpublished_show_page_html
@@ -24,6 +30,10 @@ def shownotes_menu_page_html(show_number=333)
mock_html(show_number.to_s,'shownotes_menu.htm')
end
+ def shownotes_detail_page_html(show_number=333)
+ mock_html(show_number.to_s,'shownotes_detail.htm')
+ end
+
def credits_page_html(show_number=333)
mock_html(show_number.to_s,'credits.htm')
end

0 comments on commit 014ea81

Please sign in to comment.