Permalink
Fetching contributors…
Cannot retrieve contributors at this time
1091 lines (1076 sloc) 12.9 KB
# Domains that don't support webmentions. Mainly just the silos.
# Subdomains are automatically blacklisted too.
# TLDs
#
# Google bought .app on 2015-02-25. No announcement on availability yet.
# https://en.wikipedia.org/wiki/.app_%28gTLD%29
# https://www.registry.google/
app
# Google owns .dev and plans to only use it internally.
# http://www.theregister.co.uk/2015/03/13/google_developer_gtld_domain_icann/
dev
# we don't support Tor. (yet! :P)
onion
# these come from the text of tweets. we also pull the expanded URL
# from the tweet entities, so ignore these instead of resolving them.
t.co
t
twitpic.com
# these show up in the categories and tags sections of wordpress.com blog
# posts. superfeedr doesn't filter them out of its 'content' field.
feeds.wordpress.com
stats.wordpress.com
# her comment spam filter rejects all of bridgy's webmentions.
# reported: https://twitter.com/schnarfed/status/558739076996554752
# blog.mahabali.me
# requested by aaronpk since they're in his twitter bio, so otherwise we'd send
# them homepage webmentions whenever he's @-mentioned on twitter.
micropub.net
oauth.net
# these have been down for a while (or always), and their owners are still
# active on silos.
kash.space
# serves big pages, e.g. >4MB, and App Engine prod doesn't get Content-Length header
# http://www.scoop.it/t/e-learning-inclusivo/p/4087456888/2017/10/24/editors-choice-where-is-the-humanity-in-the-computer-science-curriculum
scoop.it
24sessions.com
500px.com
abakus-internet-marketing.de
about.me
ador.com
amazon.com
amazon.in
amzn.com
app.net
android.com
appbrain.com
ask.fm
asdf.com
audioscrobbler.com
awriterz.org
backtype.com
battlefield.com
behance.net
belong.io
bit.ly
bitbucket.org
blogger.com
brightkite.com
cdninstagram.com
change.org
claimid.com
cliqset.com
codepen.io
codeschool.com
conferize.com
connect.me
coursera.org
dailymotion.com
del.icio.us
delicious.com
deviantart.com
diasp.org
digg.com
disqus.com
dopplr.com
dribbble.com
drupal.org
eat.ly
edison.com
elgg.org
ello.co
etsy.com
evernote.com
example.com
facebook.com
fb.com
feedburner.com
ffffound.com
findings.com
flattr.com
flickr.com
flipboard.com
foursquare.com
friendfeed.com
friendster.com
getglue.com
getsatisfaction.com
genius.com
ggpht.com
github.com
gitorious.org
gmail.com
gnolia.com
godudu.com
goodreads.com
google.com
gowalla.com
gu.com
guardian.co.uk
gumroad.com
here.com
hootsuite.com
huffduffer.com
hulu.com
hyves.nl
identi.ca
igraal.com
ink361.com
instagr.am
instagram.com
intensedebate.com
ipersonic.de
iwantmyname.com
jaiku.com
jamendo.com
joindiaspora.com
keybase.io
khanacademy.org
klout.com
kmworld.com
lanyrd.com
last.fm
lastfm.de
linkd.in
linkedin.com
live.com
livestream.com
medium.com
meetin.gs
meetup.com
mixcloud.com
mixx.com
myopenid.com
myspace.com
myvideo.de
news.ycombinator.com
openstreetmap.org
orkut.com
pandora.com
paper.li
patreon.com
personalinfocloud.com
# links here often tweeted along with '@T COM', which mentions @t
pet.t-com.ne.jp
photobucket.com
photoshop.com
pinboard.in
pinterest.com
plancast.com
plaxo.com
plazes.com
plinky.com
plurk.com
podhost.de
podspot.de
pownce.com
prezi.com
qik.com
quora.com
qype.com
raptr.com
rdio.com
readernaut.com
readlists.com
readmill.com
rebelmouse.com
reddit.com
researchgate.net
rhapsody.com
scribd.com
secondlife.com
seesmic.com
shelfari.com
shutterfly.com
skitch.com
slideshare.net
snookerblog.de
so.cl
socialmedian.com
soundcloud.com
speakerdeck.com
spotify.com
stackexchange.com
stackoverflow.com
steamcommunity.com
striking.ly
strikingly.com
stumbleupon.com
technorati.com
tent.is
theonion.com
thesession.org
tinyletter.com
tribe.net
tripit.com
tweakers.net
twit.tv
twitch.com
twitch.tv
twitter.com
twittercounter.com
typepad.com
ubuntu.com
userscripts.org
ustream.tv
v.gd
vanderwal.net
viadeo.com
viddler.com
vimeo.com
vodspot.tv
wikipedia.org
xda-developers.com
xfire.com
xing.com
yahoo.com
ycombinator.com
yelp.com
yelp.de
youtu.be
youtube.com
wordpress.org
# URL shortener domains. Gratefully stolen from http://longurl.org/services
# See also: http://uribl.com/, https://github.com/piwik/referrer-spam-blacklist
0rz.tw
1link.in
1url.com
2.gp
2big.at
2tu.us
3.ly
307.to
4ms.me
4sq.com
4url.cc
6url.com
7.ly
a.gg
a.nf
aa.cx
abcurl.net
ad.vu
adf.ly
adjix.com
afx.cc
all.fuseurl.com
alturl.com
amzn.to
ar.gy
arst.ch
atu.ca
azc.cc
b23.ru
b2l.me
bacn.me
bcool.bz
binged.it
bit.ly
bizj.us
bloat.me
bravo.ly
bsa.ly
budurl.com
canurl.com
chilp.it
chzb.gr
cl.lk
cl.ly
clck.ru
cli.gs
cliccami.info
clickthru.ca
clop.in
conta.cc
cort.as
cot.ag
crks.me
ctvr.us
cutt.us
dai.ly
decenturl.com
dfl8.me
digbig.com
digg.com
disq.us
dld.bz
dlvr.it
do.my
doiop.com
dopen.us
easyuri.com
easyurl.net
eepurl.com
eweri.com
fa.by
fav.me
fb.me
fbshare.me
ff.im
fff.to
fire.to
firsturl.de
firsturl.net
flic.kr
flq.us
fly2.ws
fon.gs
freak.to
fuseurl.com
fuzzy.to
fwd4.me
fwib.net
g.ro.lt
gizmo.do
gl.am
go.9nl.com
go.ign.com
go.usa.gov
goo.gl
goshrink.com
gurl.es
hex.io
hiderefer.com
hmm.ph
href.in
hsblinks.com
htxt.it
huff.to
hulu.com
hurl.me
hurl.ws
icanhaz.com
idek.net
ilix.in
is.gd
its.my
ix.lt
j.mp
jijr.com
kl.am
klck.me
korta.nu
krunchd.com
l9k.net
lat.ms
liip.to
liltext.com
linkbee.com
linkbun.ch
liurl.cn
ln-s.net
ln-s.ru
lnk.gd
lnk.ms
lnkd.in
lnkurl.com
lru.jp
lt.tl
lurl.no
macte.ch
mash.to
merky.de
migre.me
miniurl.com
minurl.fr
mke.me
moby.to
moourl.com
mrte.ch
myloc.me
myurl.in
n.pr
nbc.co
nblo.gs
nn.nf
not.my
notlong.com
nsfw.in
nutshellurl.com
nxy.in
nyti.ms
o-x.fr
oc1.us
om.ly
omf.gd
omoikane.net
on.cnn.com
on.mktw.net
onforb.es
orz.se
ow.ly
ping.fm
pli.gs
pnt.me
politi.co
post.ly
pp.gg
profile.to
ptiturl.com
pub.vitrue.com
qlnk.net
qte.me
qu.tc
qy.fi
r.im
rb6.me
read.bi
readthis.ca
reallytinyurl.com
redir.ec
redirects.ca
redirx.com
retwt.me
ri.ms
rickroll.it
riz.gd
rt.nu
ru.ly
rubyurl.com
rurl.org
rww.tw
s4c.in
s7y.us
safe.mn
sameurl.com
sdut.us
shar.es
shink.de
shorl.com
short.ie
short.to
shortlinks.co.uk
shorturl.com
shout.to
show.my
shrinkify.com
shrinkr.com
shrt.fr
shrt.st
shrten.com
shrunkin.com
simurl.com
slate.me
smallr.com
smsh.me
smurl.name
sn.im
snipr.com
snipurl.com
snurl.com
sp2.ro
spedr.com
srnk.net
srs.li
starturl.com
su.pr
surl.co.uk
surl.hu
t.cn
t.co
t.lh.com
ta.gd
tbd.ly
tcrn.ch
tgr.me
tgr.ph
tighturl.com
tiniuri.com
tiny.cc
tiny.ly
tiny.pl
tinylink.in
tinyuri.ca
tinyurl.com
tk.
tl.gd
tmi.me
tnij.org
tnw.to
tny.com
to.
to.ly
togoto.us
totc.us
toysr.us
tpm.ly
tr.im
tra.kz
trunc.it
twhub.com
twirl.at
twitclicks.com
twitterurl.net
twitterurl.org
twiturl.de
twurl.cc
twurl.nl
u.mavrev.com
u.nu
u76.org
ub0.cc
ulu.lu
updating.me
ur1.ca
url.az
url.co.uk
url.ie
url360.me
url4.eu
urlborg.com
urlbrief.com
urlcover.com
urlcut.com
urlenco.de
urli.nl
urls.im
urlshorteningservicefortwitter.com
urlx.ie
urlzen.com
usat.ly
use.my
vb.ly
vgn.am
vl.am
vm.lc
w55.de
wapo.st
wapurl.co.uk
wipi.es
wp.me
x.vu
xr.com
xrl.in
xrl.us
xurl.es
xurl.jp
y.ahoo.it
yatuc.com
ye.pe
yep.it
yfrog.com
yhoo.it
yiyd.com
youtu.be
yuarel.com
z0p.de
zi.ma
zi.mu
zipmyurl.com
zud.me
zurl.ws
zz.gd
zzang.kr
›.ws
✩.ws
✿.ws
❥.ws
➔.ws
➞.ws
➡.ws
➨.ws
➯.ws
➹.ws
➽.ws
# top 500 web sites by incoming links by domain, as of jan 2014
# gratefully stolen from https://moz.com/top500
facebook.com
twitter.com
google.com
youtube.com
wordpress.org
adobe.com
# blogspot.com
wikipedia.org
linkedin.com
# wordpress.com
yahoo.com
amazon.com
flickr.com
pinterest.com
# tumblr.com
w3.org
apple.com
myspace.com
vimeo.com
microsoft.com
youtu.be
qq.com
digg.com
baidu.com
stumbleupon.com
addthis.com
statcounter.com
feedburner.com
miibeian.gov.cn
delicious.com
nytimes.com
reddit.com
weebly.com
bbc.co.uk
blogger.com
msn.com
macromedia.com
goo.gl
instagram.com
gov.uk
icio.us
yandex.ru
cnn.com
webs.com
google.de
t.co
livejournal.com
imdb.com
mail.ru
jimdo.com
sourceforge.net
go.com
tinyurl.com
vk.com
google.co.jp
fc2.com
free.fr
joomla.org
creativecommons.org
typepad.com
networkadvertising.org
technorati.com
sina.com.cn
hugedomains.com
about.com
theguardian.com
yahoo.co.jp
nih.gov
huffingtonpost.com
google.co.uk
mozilla.org
51.la
aol.com
ebay.com
ameblo.jp
wsj.com
europa.eu
taobao.com
bing.com
rambler.ru
guardian.co.uk
tripod.com
godaddy.com
issuu.com
gnu.org
geocities.com
slideshare.net
wix.com
mapquest.com
washingtonpost.com
homestead.com
reuters.com
163.com
photobucket.com
forbes.com
clickbank.net
weibo.com
etsy.com
amazon.co.uk
dailymotion.com
soundcloud.com
usatoday.com
yelp.com
cnet.com
posterous.com
telegraph.co.uk
archive.org
google.fr
constantcontact.com
phoca.cz
phpbb.com
latimes.com
e-recht24.de
rakuten.co.jp
amazon.de
opera.com
miitbeian.gov.cn
php.net
scribd.com
bbb.org
parallels.com
ning.com
dailymail.co.uk
cdc.gov
sohu.com
wikimedia.org
deviantart.com
# mit.edu
sakura.ne.jp
altervista.org
addtoany.com
time.com
google.it
# stanford.edu
live.com
alibaba.com
squidoo.com
# harvard.edu
gravatar.com
histats.com
nasa.gov
npr.org
ca.gov
eventbrite.com
wired.com
amazon.co.jp
nbcnews.com
# blog.com
amazonaws.com
bloomberg.com
narod.ru
blinklist.com
imageshack.us
kickstarter.com
hatena.ne.jp
nifty.com
angelfire.com
google.es
ocn.ne.jp
over-blog.com
dedecms.com
google.ca
a8.net
weather.com
pbs.org
ibm.com
cpanel.net
prweb.com
bandcamp.com
barnesandnoble.com
mozilla.com
noaa.gov
goo.ne.jp
comsenz.com
xrea.com
cbsnews.com
foxnews.com
discuz.net
eepurl.com
businessweek.com
# berkeley.edu
newsvine.com
bluehost.com
geocities.jp
loc.gov
yolasite.com
apache.org
mashable.com
usda.gov
nationalgeographic.com
whitehouse.gov
tripadvisor.com
ted.com
sfgate.com
biglobe.ne.jp
epa.gov
vkontakte.ru
oracle.com
seesaa.net
examiner.com
# cornell.edu
hp.com
nps.gov
disqus.com
alexa.com
mysql.com
house.gov
sphinn.com
boston.com
techcrunch.com
un.org
# squarespace.com
icq.com
freewebs.com
ezinearticles.com
ucoz.ru
independent.co.uk
mediafire.com
xinhuanet.com
google.nl
reverbnation.com
imgur.com
irs.gov
webnode.com
wunderground.com
bizjournals.com
who.int
soup.io
cloudflare.com
people.com.cn
ustream.tv
senate.gov
cbslocal.com
ycombinator.com
opensource.org
spiegel.de
oaic.gov.au
nature.com
businessinsider.com
drupal.org
last.fm
privacy.gov.au
skype.com
wikia.com
about.me
webmd.com
youku.com
gmpg.org
fda.gov
redcross.org
github.com
cbc.ca
# umich.edu
jugem.jp
shinystat.com
google.com.br
ifeng.com
mac.com
wiley.com
discovery.com
topsy.com
paypal.com
google.cn
surveymonkey.com
moonfruit.com
dropbox.com
exblog.jp
google.pl
prnewswire.com
ft.com
uol.com.br
behance.net
goodreads.com
netvibes.com
auda.org.au
marketwatch.com
ed.gov
networksolutions.com
state.gov
sitemeter.com
liveinternet.ru
ftc.gov
census.gov
quantcast.com
economist.com
nydailynews.com
zdnet.com
cafepress.com
ow.ly
meetup.com
netscape.com
chicagotribune.com
theatlantic.com
google.com.au
1688.com
skyrock.com
list-manage.com
pagesperso-orange.fr
cdbaby.com
friendfeed.com
ehow.com
patch.com
# upenn.edu
engadget.com
diigo.com
com.com
slashdot.org
# washington.edu
# columbia.edu
nhs.uk
abc.net.au
elegantthemes.com
# utexas.edu
# yale.edu
marriott.com
bigcartel.com
# ucla.edu
usgs.gov
jigsy.com
hexun.com
hubpages.com
slate.com
purevolume.com
# umn.edu
bloglines.com
so-net.ne.jp
wikispaces.com
cargocollective.com
howstuffworks.com
plala.or.jp
infoseek.co.jp
jiathis.com
usnews.com
xing.com
flavors.me
desdev.cn
hc360.com
usa.gov
edublogs.org
lycos.com
# wisc.edu
thetimes.co.uk
state.tx.us
example.com
shareasale.com
biblegateway.com
is.gd
yellowbook.com
samsung.com
businesswire.com
g.co
dion.ne.jp
dagondesign.com
theglobeandmail.com
booking.com
storify.com
salon.com
ucoz.com
gizmodo.com
# psu.edu
smh.com.au
reference.com
sun.com
unicef.org
devhub.com
artisteer.com
unesco.org
istockphoto.com
answers.com
trellian.com
cocolog-nifty.com
i2i.jp
t-online.de
intel.com
1und1.de
ebay.co.uk
sciencedaily.com
paginegialle.it
ask.com
springer.com
canalblog.com
timesonline.co.uk
de.vu
deliciousdays.com
smugmug.com
wufoo.com
globo.com
# cmu.edu
domainmarket.com
odnoklassniki.ru
twitpic.com
ovh.net
home.pl
naver.com
google.ru
# si.edu
newyorker.com
blogs.com
sciencedirect.com
hibu.com
hud.gov
hhs.gov
dmoz.org
dot.gov
cyberchimps.com
google.com.hk
jalbum.net
craigslist.org
zimbio.com
chronoengine.com
cnbc.com
# uiuc.edu
vistaprint.com
symantec.com
prlog.org
360.cn
indiatimes.com
mtv.com
webeden.co.uk
java.com
cisco.com
japanpost.jp
4shared.com
# github.io
mayoclinic.com
studiopress.com
admin.ch
# virginia.edu
printfriendly.com
mlb.com
omniture.com
simplemachines.org
dell.com
accuweather.com
# princeton.edu
fotki.com
comcast.net
chron.com
# nyu.edu
# wp.com
merriam-webster.com
nba.com
shop-pro.jp
lulu.com
furl.net
indiegogo.com
buzzfeed.com
tuttocitta.it
ox.ac.uk
mapy.cz
army.mil
csmonitor.com
bravesites.com
# tamu.edu
rediff.com
toplist.cz
yellowpages.com
va.gov
tiny.cc
netlog.com
elpais.com
oakley.com
multiply.com
tmall.com
hostgator.com
nymag.com
fema.gov
blogtalkradio.com
china.com.cn
unblog.fr
fastcompany.com
# earthlink.net
vinaora.com
# msu.edu
aboutads.info
# ucsd.edu
sogou.com
seattletimes.com
# dyndns.org
123-reg.co.uk
sbwire.com
tinypic.com
acquirethisname.com
shutterfly.com
walmart.com
# pen.io
# arizona.edu
woothemes.com
scientificamerican.com
themeforest.net
spotify.com
cam.ac.uk
# unc.edu
arstechnica.com
hao123.com
# illinois.edu
bloglovin.com
nsw.gov.au
ihg.com
pcworld.com