Skip to content

Commit

Permalink
Add support for RSS syndication module
Browse files Browse the repository at this point in the history
The RSS syndication module allows feeds to specify how often they are
updated as a hint to crawlers.

This commit adds support for this module as an extension.
  • Loading branch information
markpritchard committed May 6, 2019
1 parent 91c0c03 commit baa9b36
Show file tree
Hide file tree
Showing 13 changed files with 316 additions and 58 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,7 @@ channel.validate().unwrap();

Elements which have non-default namespaces will be considered extensions. Extensions are stored in `Channel.extensions` and `Item.extensions`.

For conveninence, [Dublin Core](http://dublincore.org/documents/dces/) and [iTunes](https://help.apple.com/itc/podcasts_connect/#/itcb54353390) extensions are extracted to structs and stored in as properties on channels and items.
For conveninence, [Dublin Core](http://dublincore.org/documents/dces/), [Syndication](http://web.resource.org/rss/1.0/modules/syndication/) and [iTunes](https://help.apple.com/itc/podcasts_connect/#/itcb54353390) extensions are extracted to structs and stored in as properties on channels and items.

## Invalid Feeds

Expand Down
8 changes: 8 additions & 0 deletions benches/read.rs
Original file line number Diff line number Diff line change
Expand Up @@ -36,3 +36,11 @@ fn read_dublincore(b: &mut Bencher) {
let _ = Channel::read_from(input).expect("failed to parse feed");
});
}

#[bench]
fn read_syndication(b: &mut Bencher) {
let input: &[u8] = include_bytes!("../tests/data/syndication.xml");
b.iter(|| {
let _ = Channel::read_from(input).expect("failed to parse feed");
});
}
9 changes: 9 additions & 0 deletions benches/write.rs
Original file line number Diff line number Diff line change
Expand Up @@ -40,3 +40,12 @@ fn write_dublincore(b: &mut Bencher) {
let _ = channel.write_to(sink()).expect("failed to write");
});
}

#[bench]
fn write_syndication(b: &mut Bencher) {
let input: &[u8] = include_bytes!("../tests/data/syndication.xml");
let channel = Channel::read_from(input).expect("failed to parse feed");
b.iter(|| {
let _ = channel.write_to(sink()).expect("failed to write");
});
}
102 changes: 60 additions & 42 deletions src/channel.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,17 +10,18 @@ use std::io::{BufRead, Write};
use std::str::{self, FromStr};

use quick_xml::Error as XmlError;
use quick_xml::events::attributes::Attributes;
use quick_xml::events::{BytesEnd, BytesStart, Event};
use quick_xml::events::attributes::Attributes;
use quick_xml::Reader;
use quick_xml::Writer;

use category::Category;
use cloud::Cloud;
use error::Error;
use extension::{self, ExtensionMap};
use extension::dublincore::DublinCoreExtension;
use extension::itunes::ITunesChannelExtension;
use extension::ExtensionMap;
use extension::dublincore;
use extension::itunes;
use extension::syndication;
use extension::util::{extension_name, parse_extension};
use image::Image;
use item::Item;
Expand Down Expand Up @@ -76,9 +77,11 @@ pub struct Channel {
/// The extensions for the channel.
extensions: ExtensionMap,
/// The iTunes extension for the channel.
itunes_ext: Option<ITunesChannelExtension>,
itunes_ext: Option<itunes::ITunesChannelExtension>,
/// The Dublin Core extension for the channel.
dublin_core_ext: Option<DublinCoreExtension>,
dublin_core_ext: Option<dublincore::DublinCoreExtension>,
/// The Syndication extension for the channel.
syndication_ext: Option<syndication::SyndicationExtension>,
/// The namespaces present in the RSS tag.
namespaces: HashMap<String, String>,
}
Expand Down Expand Up @@ -759,7 +762,7 @@ impl Channel {
/// channel.set_itunes_ext(ITunesChannelExtension::default());
/// assert!(channel.itunes_ext().is_some());
/// ```
pub fn itunes_ext(&self) -> Option<&ITunesChannelExtension> {
pub fn itunes_ext(&self) -> Option<&itunes::ITunesChannelExtension> {
self.itunes_ext.as_ref()
}

Expand All @@ -776,7 +779,7 @@ impl Channel {
/// ```
pub fn set_itunes_ext<V>(&mut self, itunes_ext: V)
where
V: Into<Option<ITunesChannelExtension>>,
V: Into<Option<itunes::ITunesChannelExtension>>,
{
self.itunes_ext = itunes_ext.into();
}
Expand All @@ -793,7 +796,7 @@ impl Channel {
/// channel.set_dublin_core_ext(DublinCoreExtension::default());
/// assert!(channel.dublin_core_ext().is_some());
/// ```
pub fn dublin_core_ext(&self) -> Option<&DublinCoreExtension> {
pub fn dublin_core_ext(&self) -> Option<&dublincore::DublinCoreExtension> {
self.dublin_core_ext.as_ref()
}

Expand All @@ -810,11 +813,45 @@ impl Channel {
/// ```
pub fn set_dublin_core_ext<V>(&mut self, dublin_core_ext: V)
where
V: Into<Option<DublinCoreExtension>>,
V: Into<Option<dublincore::DublinCoreExtension>>,
{
self.dublin_core_ext = dublin_core_ext.into();
}

/// Return the Syndication extension for this channel.
///
/// # Examples
///
/// ```
/// use rss::Channel;
/// use rss::extension::syndication::SyndicationExtension;
///
/// let mut channel = Channel::default();
/// channel.set_syndication_ext(SyndicationExtension::default());
/// assert!(channel.syndication_ext().is_some());
/// ```
pub fn syndication_ext(&self) -> Option<&syndication::SyndicationExtension> {
self.syndication_ext.as_ref()
}

/// Set the Syndication extension for this channel.
///
/// # Examples
///
/// ```
/// use rss::Channel;
/// use rss::extension::syndication::SyndicationExtension;
///
/// let mut channel = Channel::default();
/// channel.set_syndication_ext(SyndicationExtension::default());
/// ```
pub fn set_syndication_ext<V>(&mut self, syndication_ext: V)
where
V: Into<Option<syndication::SyndicationExtension>>,
{
self.syndication_ext = syndication_ext.into();
}

/// Return the extensions for this channel.
///
/// # Examples
Expand Down Expand Up @@ -1029,32 +1066,6 @@ impl Channel {
let mut element = BytesStart::borrowed(name, name.len());
element.push_attribute(("version", "2.0"));

let mut itunes_ns = self.itunes_ext.is_some();
let mut dc_ns = self.dublin_core_ext.is_some();

if !itunes_ns || !dc_ns {
for item in &self.items {
if !itunes_ns {
itunes_ns = item.itunes_ext().is_some();
}

if !dc_ns {
dc_ns = item.dublin_core_ext().is_some();
}

if itunes_ns && dc_ns {
break;
}
}
}

if itunes_ns {
element.push_attribute(("xmlns:itunes", extension::itunes::NAMESPACE));
}

if dc_ns {
element.push_attribute(("xmlns:dc", extension::dublincore::NAMESPACE));
}
for (name, url) in &self.namespaces {
element.push_attribute((format!("xmlns:{}", &**name).as_bytes(), url.as_bytes()));
}
Expand Down Expand Up @@ -1228,11 +1239,14 @@ impl Channel {
// Process each of the namespaces we know (note that the values are not removed prior and reused to support pass-through of unknown extensions)
for (prefix, namespace) in namespaces {
match namespace.as_ref() {
"http://www.itunes.com/dtds/podcast-1.0.dtd" => {
channel.extensions.remove(prefix).map(|v| channel.itunes_ext = Some(ITunesChannelExtension::from_map(v)))
itunes::NAMESPACE => {
channel.extensions.remove(prefix).map(|v| channel.itunes_ext = Some(itunes::ITunesChannelExtension::from_map(v)))
},
dublincore::NAMESPACE => {
channel.extensions.remove(prefix).map(|v| channel.dublin_core_ext = Some(dublincore::DublinCoreExtension::from_map(v)))
},
"http://purl.org/dc/elements/1.1/" => {
channel.extensions.remove(prefix).map(|v| channel.dublin_core_ext = Some(DublinCoreExtension::from_map(v)))
syndication::NAMESPACE => {
channel.extensions.remove(prefix).map(|v| channel.syndication_ext = Some(syndication::SyndicationExtension::from_map(v)))
},
_ => None
};
Expand Down Expand Up @@ -1333,14 +1347,18 @@ impl ToXml for Channel {
}
}

if let Some(ext) = self.itunes_ext.as_ref() {
if let Some(ext) = &self.itunes_ext {
ext.to_xml(writer)?;
}

if let Some(ext) = self.dublin_core_ext.as_ref() {
if let Some(ext) = &self.dublin_core_ext {
ext.to_xml(writer)?;
}

if let Some(ext) = &self.syndication_ext {
ext.to_xml(&self.namespaces, writer)?;
}

writer.write_objects(&self.items)?;

writer.write_event(Event::End(BytesEnd::borrowed(name)))?;
Expand Down
2 changes: 1 addition & 1 deletion src/extension/dublincore.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ use extension::util::remove_extension_values;
use toxml::{ToXml, WriterExt};

/// The Dublin Core XML namespace.
pub static NAMESPACE: &'static str = "http://purl.org/dc/elements/1.1/";
pub const NAMESPACE: &str = "http://purl.org/dc/elements/1.1/";

/// A Dublin Core element extension.
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
Expand Down
2 changes: 1 addition & 1 deletion src/extension/itunes/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ pub use self::itunes_item_extension::*;
pub use self::itunes_owner::*;

/// The iTunes XML namespace.
pub static NAMESPACE: &'static str = "http://www.itunes.com/dtds/podcast-1.0.dtd";
pub const NAMESPACE: &str = "http://www.itunes.com/dtds/podcast-1.0.dtd";

fn parse_image(map: &mut HashMap<String, Vec<Extension>>) -> Option<String> {
let mut element = match map.remove("image").map(|mut v| v.remove(0)) {
Expand Down
3 changes: 3 additions & 0 deletions src/extension/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,9 @@ pub mod itunes;
/// Types and methods for [Dublin Core](http://dublincore.org/documents/dces/) extensions.
pub mod dublincore;

/// Types and methods for [Syndication](http://web.resource.org/rss/1.0/modules/syndication/) extensions.
pub mod syndication;

pub(crate) mod util;

/// A map of extension namespace prefixes to local names to elements.
Expand Down
150 changes: 150 additions & 0 deletions src/extension/syndication.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,150 @@
// This file is part of rss.
//
// Copyright © 2015-2017 The rust-syndication Developers
//
// This program is free software; you can redistribute it and/or modify
// it under the terms of the MIT License and/or Apache 2.0 License.

use std::collections::HashMap;
use std::fmt;
use std::io::Write;
use std::str::FromStr;

use quick_xml::Error as XmlError;
use quick_xml::Writer;

use extension::Extension;
use toxml::WriterExt;

/// The Syndication XML namespace.
pub const NAMESPACE: &str = "http://purl.org/rss/1.0/modules/syndication/";

/// The unit of time between updates/refreshes
#[derive(Debug, Clone, PartialEq)]
pub enum UpdatePeriod {
/// refresh hourly
HOURLY,
/// refresh daily
DAILY,
/// refresh weekly
WEEKLY,
/// refresh monthly
MONTHLY,
/// refresh yearly
YEARLY,
}

impl FromStr for UpdatePeriod {
type Err = ();

fn from_str(s: &str) -> Result<Self, Self::Err> {
match s {
"hourly" => Ok(UpdatePeriod::HOURLY),
"daily" => Ok(UpdatePeriod::DAILY),
"weekly" => Ok(UpdatePeriod::WEEKLY),
"monthly" => Ok(UpdatePeriod::MONTHLY),
"yearly" => Ok(UpdatePeriod::YEARLY),
_ => Err(())
}
}
}

impl fmt::Display for UpdatePeriod {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match *self {
UpdatePeriod::HOURLY => write!(f, "hourly"),
UpdatePeriod::DAILY => write!(f, "daily"),
UpdatePeriod::WEEKLY => write!(f, "weekly"),
UpdatePeriod::MONTHLY => write!(f, "monthly"),
UpdatePeriod::YEARLY => write!(f, "yearly"),
}
}
}

/// An RSS syndication element extension.
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[derive(Debug, Clone, PartialEq, Builder)]
#[builder(setter(into), default)]
pub struct SyndicationExtension {
/// The refresh period for this channel
period: UpdatePeriod,
/// Number of periods between refreshes
frequency: u32,
/// Timestamp from which the refresh periods are calculated
base: String,
}

impl SyndicationExtension {
/// Retrieve the base timestamp from which the refresh periods are calculated
pub fn base(&self) -> &str {
&self.base
}

/// Set the base from which the refresh periods are calculated
pub fn set_base(&mut self, base: &str) {
self.base = base.to_owned();
}

/// Retrieve the number of periods between refreshes
pub fn frequency(&self) -> u32 {
self.frequency
}

/// Set the number of periods between refreshes
pub fn set_frequency(&mut self, frequency: u32) {
self.frequency = frequency;
}

/// Retrieve the refresh period for this channel
pub fn period(&self) -> &UpdatePeriod {
&self.period
}

/// Set the refresh period for this channel
pub fn set_period(&mut self, period: UpdatePeriod) {
self.period = period;
}

/// Serialises this extension to the nominated writer
pub fn to_xml<W: Write>(&self, namespaces: &HashMap<String, String>, writer: &mut Writer<W>) -> Result<(), XmlError> {
for (prefix, namespace) in namespaces {
if NAMESPACE == namespace {
writer.write_text_element(format!("{}:updatePeriod", prefix), &self.period.to_string())?;
writer.write_text_element(format!("{}:updateFrequency", prefix), &format!("{}", self.frequency))?;
writer.write_text_element(format!("{}:updateBase", prefix), &self.base)?;
}
}
Ok(())
}
}

impl Default for SyndicationExtension {
fn default() -> Self {
SyndicationExtension { period: UpdatePeriod::DAILY, frequency: 1, base: String::from("1970-01-01T00:00+00:00") }
}
}

/// Retrieves the extensions for the nominated field and runs the callback if there is at least 1 extension value
fn with_first_ext_value<'a, F>(map: &'a HashMap<String, Vec<Extension>>, field: &str, f: F)
where F: FnOnce(&'a str) {
if let Some(extensions) = map.get(field) {
if !extensions.is_empty() {
if let Some(v) = extensions[0].value.as_ref() {
f(v);
}
}
}
}

impl SyndicationExtension {
/// Creates a `SyndicationExtension` using the specified `HashMap`.
pub fn from_map(map: HashMap<String, Vec<Extension>>) -> Self {
let mut syn = SyndicationExtension::default();

with_first_ext_value(&map, "updatePeriod", |value| syn.period = value.parse().unwrap());
with_first_ext_value(&map, "updateFrequency", |value| syn.frequency = value.parse().unwrap());
with_first_ext_value(&map, "updateBase", |value| syn.base = value.to_owned());

syn
}
}

0 comments on commit baa9b36

Please sign in to comment.