Remove rel="nofollow" for certain domains

JasonBarnabe edited this page Feb 21, 2012 · 1 revision

This transformer will remove the rel="nofollow" added by the basic configuration for relative URLs and predefined domains.

yes_follow = lambda do |env|
	follow_domains = ['yourdomains.com', 'gohere.com']
	return unless env[:node_name] == 'a'
	node = env[:node]
	href = nil
	href = node['href'].downcase unless node['href'].nil?
	follow = false	
	if href.nil?
		# missing the href, we don't want a rel here
		follow = true
	elsif href =~ Sanitize::REGEX_PROTOCOL
		# external link, let's figure out the domain if it's http or https
		match = /https?:\/\/([^\/]+).*/.match(href)
		# check domain against our list, including subdomains
		if !match.nil?
			follow_domains.each do |d|
				if match[1] == d or match[1].ends_with?('.' + d)
					follow = true
					break
				end
			end
		end
	else
		# internal link
		follow = true
	end
	if follow
		# take out any rel value the user may have provided
		node.delete('rel')
	else
		node['rel'] = 'nofollow'
	end

	# make a config that allows the rel attribute and does not include this transformer
	# do a deep copy of anything we're going to change
	config_allows_rel = env[:config].dup
	config_allows_rel[:attributes] = config_allows_rel[:attributes].dup
	config_allows_rel[:attributes]['a'] = config_allows_rel[:attributes]['a'].dup
	config_allows_rel[:attributes]['a'] << 'rel'
	config_allows_rel[:add_attributes] = config_allows_rel[:add_attributes].dup
	config_allows_rel[:add_attributes]['a'] = config_allows_rel[:add_attributes]['a'].dup
	config_allows_rel[:add_attributes]['a'].delete('rel')
	config_allows_rel[:transformers] = config_allows_rel[:transformers].dup
	config_allows_rel[:transformers].delete(yes_follow)

	Sanitize.clean_node!(node, config_allows_rel)

	# whitelist so the initial clean call doesn't strip the rel
	return {:node_whitelist => [node]}
end