Skip to content

Commit

Permalink
Normalize redirect urls (removes e.g. :443). Remove more tracking codes.
Browse files Browse the repository at this point in the history
  • Loading branch information
stefansundin committed Apr 29, 2017
1 parent 5276357 commit e734c84
Showing 1 changed file with 8 additions and 7 deletions.
15 changes: 8 additions & 7 deletions config/initializers/05-string.rb
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ def short_host
end

def resolve_url(force=false)
url = Addressable::URI.parse(self).normalize!.to_s
url = Addressable::URI.parse(self).normalize.to_s
if !force
dest = @@url_cache[url]
if dest
Expand Down Expand Up @@ -106,7 +106,7 @@ def resolve_url(force=false)
# bad redirect
throw :done
end
redirect_url = Addressable::URI.escape(redirect_url) # Some redirects do not url encode properly, such as http://amzn.to/2aDg49F
redirect_url = Addressable::URI.parse(redirect_url).normalize.to_s # Some redirects do not url encode properly, such as http://amzn.to/2aDg49F
if %w[
://www.youtube.com/das_captcha
://www.nytimes.com/glogin
Expand Down Expand Up @@ -135,21 +135,22 @@ def resolve_url(force=false)
dest = dest.gsub(tracking, "")
end
# Remove youtu.be crap
dest = dest.gsub(/&feature=youtu\.be(?=&|#|$)/, "")
dest = dest.gsub(/(?<=[?&])feature=youtu\.be(?=&|#|$)/, "")
# Remove mysterious prclt tracking code
dest = dest.gsub(/(?:__)?prclt[=-][^&]+/, "")
# Remove utm_ and sc_ codes
dest = dest.gsub(/(?<=[?&])(?:__)?prclt[=-][^&#]+/, "")
# Remove Amazon tracking codes
# https://aws.amazon.com/podcasts/aws-podcast/?utm_content=bufferf4ae0&utm_medium=social&utm_source=twitter.com&utm_campaign=buffer
# https://aws.amazon.com/about-aws/whats-new/2016/09/aws-config-console-now-displays-api-events-associated-with-configuration-changes/?sc_channel=sm&sc_campaign=launch_Config_ead85f34&sc_publisher=tw_go&sc_content=AWS_Config_add_support_for_viewing_CloudTrail_API_events_from_Config_console&sc_geo=globaly
dest = dest.gsub(/(?:utm|sc)_[^&]+/, "")
# https://aws.amazon.com/summits/washington-dc/?trkCampaign=DCSummit2017&trk=sm_twitter&adbsc=social_20170427_71906466&adbid=z123jzf53ojbjbm0l221ez2jtoeqijchx04&adbpl=gp&adbpr=100017971115449920316
dest = dest.gsub(/(?<=[?&])(?:(?:utm|sc)_[a-z]+|adb(?:sc|id|pr|pl)|trk(?:Campaign)?)=[^&#]+/, "")
# Remove #_=_
dest = dest.gsub(/#_=_$/, "")
# Remove #. tracking codes
dest = dest.gsub(/#\..*$/, "")
# Remove unnecessary ampersands (possibly caused by the above)
dest = dest.gsub(/\?&+/, "?")
# Remove trailing ?&#
dest = dest.gsub(/[?&#]+$/, "")
dest = dest.gsub(/[?&#]+($|(?=#))/, "")

if url == dest
# save some space
Expand Down

0 comments on commit e734c84

Please sign in to comment.