Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Newer
Older
100755 302 lines (240 sloc) 7.861 kb
6d6feab Raphael Geissert Initial import
authored
1 #!/usr/bin/perl -w
2
3 ####################
4 # Copyright (C) 2011 by Raphael Geissert <geissert@debian.org>
5 #
6 # This file is free software: you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation, either version 3 of the License, or
9 # (at your option) any later version.
10 #
11 # This file is distributed in the hope that it will be useful,
12 # but WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 # GNU General Public License for more details.
15 #
16 # You should have received a copy of the GNU General Public License
17 # along with this file If not, see <http://www.gnu.org/licenses/>.
18 #
19 # On Debian systems, the complete text of the GNU General
20 # Public License 3 can be found in '/usr/share/common-licenses/GPL-3'.
21 ####################
22
23 use strict;
24 use warnings;
25
26 # Usage: redir.pl?mirror=(archive|backports|...)&url=/debian/dists/sid/...
27 # Test (make sure -debug1 is set below):
28 # ./redir.pl mirror=...
29 # REMOTE_ADDR=1.2.3.4 ./redir.pl mirror=...
30 use CGI::Simple qw(-debug1);
19a650b Raphael Geissert Disable POST data and explicitly disable file uploads
authored
31 $CGI::Simple::POST_MAX = 0;
32 $CGI::Simple::DISABLE_UPLOADS = 1;
6d6feab Raphael Geissert Initial import
authored
33 my $q = new CGI::Simple;
34
27f23fb Raphael Geissert abort POST requests ASAP
authored
35 # abort POST requests ASAP
36 if ($q->request_method() eq 'POST') {
37 print "Status: 501 Not Implemented\r\n\r\n";
38 exit;
39 }
40
6d6feab Raphael Geissert Initial import
authored
41 use Geo::IP;
42 use Storable qw(retrieve);
65c386e Raphael Geissert Randomly choose a mirror from among the valid candidates
authored
43 use List::Util qw(shuffle);
6d6feab Raphael Geissert Initial import
authored
44
9e7b920 Raphael Geissert Determine the priority based on geo distance
authored
45 our $metric = ''; # alt: taxicab (default) | euclidean
35d60ea Raphael Geissert Allow disabling the inclusion of extra headers
authored
46 our $xtra_headers = 1;
723442f Raphael Geissert Also make link rels optional
authored
47 my $add_links = 1;
3f24721 Raphael Geissert Allow disabling the random sort order too
authored
48 my $random_sort = 1;
6d6feab Raphael Geissert Initial import
authored
49 my $db_store = 'db';
aaa365e Raphael Geissert Ignore disabled mirrors
authored
50 our $mirror_type = 'archive';
6d6feab Raphael Geissert Initial import
authored
51
6be728c Raphael Geissert Add a list of "nearby" continents, in case there's no match
authored
52 my %nearby_continents = (
53 'AF' => [ qw(EU AS) ],
54 'SA' => [ qw(NA EU) ],
55 'OC' => [ qw(NA AS) ],
56 'AS' => [ qw(EU) ],
57 'NA' => [ qw(EU) ],
58 'EU' => [ qw(NA) ],
59 );
60
6d6feab Raphael Geissert Initial import
authored
61 my $g_city = Geo::IP->open('geoip/GeoLiteCity.dat', GEOIP_MMAP_CACHE);
62 my $g_as = Geo::IP->open('geoip/GeoIPASNum.dat', GEOIP_MMAP_CACHE);
63
64 # TODO: support IPv6. Basic implementation is to lookup the country in
65 # the geoipv6 db and wish the user our best
66
67 sub fullfils_request($$$$);
9e7b920 Raphael Geissert Determine the priority based on geo distance
authored
68 sub calculate_distance($$$$);
d409639 Raphael Geissert Select a subset of hosts from the main set based on stddevp
authored
69 sub stddevp;
35d60ea Raphael Geissert Allow disabling the inclusion of extra headers
authored
70 sub print_xtra($$);
b1963f7 Raphael Geissert Some cleanup
authored
71 sub find_arch($@);
6d6feab Raphael Geissert Initial import
authored
72
788ac30 Raphael Geissert Recognise architecture of cdimage requests
authored
73 my @ARCHITECTURES_REGEX;
74
75 $mirror_type = $q->param('mirror') || 'archive';
1e4594b Raphael Geissert Accept 'cd' as an equivalent to 'cdimage'
authored
76 $mirror_type = 'cdimage' if ($mirror_type eq 'cd');
788ac30 Raphael Geissert Recognise architecture of cdimage requests
authored
77
78 if ($mirror_type eq 'cdimage') {
79 @ARCHITECTURES_REGEX = (
80 qr'^(?:\d|current)[^/]*/([^/]+)/',
81 );
82 } else {
83 @ARCHITECTURES_REGEX = (
84 qr'^dists/(?:[^/]+/){2,3}binary-([^/]+)/',
09095a5 Raphael Geissert Recognise udebs in arch-detection regexes
authored
85 qr'^pool/(?:[^/]+/){3,4}.+_([^.]+)\.u?deb$',
788ac30 Raphael Geissert Recognise architecture of cdimage requests
authored
86 qr'^dists/(?:[^/]+/){1,2}Contents-([^.]+)\.gz$',
87 qr'^indices/files(?:/components)?/arch-([^.]+).*$',
88 qr'^dists/(?:[^/]+/){2}installer-([^/]+)/',
89 );
90 }
6d6feab Raphael Geissert Initial import
authored
91
92 our $db = retrieve($db_store);
93
94 ####
95 my $IP = $ENV{'REMOTE_ADDR'} || '127.0.0.1';
96 # for testing purposes
399f2bf Raphael Geissert Make it work when connecting locally
authored
97 $IP = '8.8.8.8' if ($IP eq '127.0.1.1');
98 $IP = `wget -O- -q http://myip.dnsomatic.com/` if ($IP eq '127.0.0.1');
6d6feab Raphael Geissert Initial import
authored
99 ####
100
101 # Make a shortcut
102 my $rdb = $db->{$mirror_type} or die("Invalid mirror type: $mirror_type");
103
104 my $ipv6 = ($IP =~ m/::/);
105 my $r = $g_city->record_by_addr($IP);
106 my ($as) = split /\s+/, ($g_as->org_by_addr($IP) || '');
107 my $arch = '';
108
604223a Raphael Geissert $as, even if empty, is always defined
authored
109 if (!defined($r)) {
8939004 Raphael Geissert Throw an error if the geoip lookup failed
authored
110 # sadly, we really depend on it. throw an error for now
b4168d5 Raphael Geissert In fact, let's better print the status ourselves
authored
111 print "Status: 501 Not Implemented\r\n\r\n";
8939004 Raphael Geissert Throw an error if the geoip lookup failed
authored
112 exit;
3bcccea Raphael Geissert And of course, add our missing redirection status code
authored
113 } else {
114 print "Status: 307 Temporary Redirect\r\n";
6d6feab Raphael Geissert Initial import
authored
115 }
116
8939004 Raphael Geissert Throw an error if the geoip lookup failed
authored
117 print_xtra('IP', $IP);
35d60ea Raphael Geissert Allow disabling the inclusion of extra headers
authored
118 print_xtra('AS', $as);
6d6feab Raphael Geissert Initial import
authored
119
120 my $url = $q->param('url') || '';
79275bd Raphael Geissert Be more tolerant on the input URL
authored
121 $url =~ s,//,/,g;
122 $url =~ s,^/,,;
53473a3 Raphael Geissert No file name contains the space character, replace it with +
authored
123 $url =~ s, ,+,g;
79275bd Raphael Geissert Be more tolerant on the input URL
authored
124
35d60ea Raphael Geissert Allow disabling the inclusion of extra headers
authored
125 print_xtra('URL', $url);
6d6feab Raphael Geissert Initial import
authored
126
b1963f7 Raphael Geissert Some cleanup
authored
127 $arch = find_arch($url, @ARCHITECTURES_REGEX);
4266c44 Raphael Geissert Assume i386 when multi-arch (cd image) is requested
authored
128 $arch = 'i386' if ($arch eq 'multi-arch');
35d60ea Raphael Geissert Allow disabling the inclusion of extra headers
authored
129 print_xtra('Arch', $arch);
6d6feab Raphael Geissert Initial import
authored
130
131 my $host = '';
65c386e Raphael Geissert Randomly choose a mirror from among the valid candidates
authored
132 my %hosts;
a671833 Raphael Geissert Cleanup while adding more info bits
authored
133 my $match_type = '';
6d6feab Raphael Geissert Initial import
authored
134
135 # match by AS
136 foreach my $match (@{$rdb->{'AS'}{$as}}) {
137 my $mirror = $db->{'all'}{$match};
138
139 next unless fullfils_request($rdb, $match, $arch, $ipv6);
140
141 $host = $mirror->{'site'}.$mirror->{$mirror_type.'-http'};
65c386e Raphael Geissert Randomly choose a mirror from among the valid candidates
authored
142 $hosts{$host} = 1;
a671833 Raphael Geissert Cleanup while adding more info bits
authored
143 $match_type = 'AS';
6d6feab Raphael Geissert Initial import
authored
144 }
145
35d60ea Raphael Geissert Allow disabling the inclusion of extra headers
authored
146 print_xtra('Country', $r->country_code);
6d6feab Raphael Geissert Initial import
authored
147 # match by country
a671833 Raphael Geissert Cleanup while adding more info bits
authored
148 if (!$match_type) {
6d6feab Raphael Geissert Initial import
authored
149 foreach my $match (keys %{$rdb->{'country'}{$r->country_code}}) {
150 my $mirror = $db->{'all'}{$match};
151
152 next unless fullfils_request($rdb, $match, $arch, $ipv6);
153
154 $host = $mirror->{'site'}.$mirror->{$mirror_type.'-http'};
9e7b920 Raphael Geissert Determine the priority based on geo distance
authored
155 $hosts{$host} = calculate_distance($mirror->{'lon'}, $mirror->{'lat'},
156 $r->longitude, $r->latitude);
a671833 Raphael Geissert Cleanup while adding more info bits
authored
157 $match_type = 'country';
6d6feab Raphael Geissert Initial import
authored
158 }
159 }
160
35d60ea Raphael Geissert Allow disabling the inclusion of extra headers
authored
161 print_xtra('Continent', $r->continent_code);
062e6d8 Raphael Geissert Add a per-continent index
authored
162 # match by continent
a671833 Raphael Geissert Cleanup while adding more info bits
authored
163 if (!$match_type) {
6be728c Raphael Geissert Add a list of "nearby" continents, in case there's no match
authored
164 my @continents = ($r->continent_code, @{$nearby_continents{$r->continent_code}});
062e6d8 Raphael Geissert Add a per-continent index
authored
165
6be728c Raphael Geissert Add a list of "nearby" continents, in case there's no match
authored
166 for my $continent (@continents) {
031fe4e Raphael Geissert Only check the nearby continents if necessary
authored
167 last if ($match_type);
6be728c Raphael Geissert Add a list of "nearby" continents, in case there's no match
authored
168 foreach my $match (keys %{$rdb->{'continent'}{$continent}}) {
169 my $mirror = $db->{'all'}{$match};
062e6d8 Raphael Geissert Add a per-continent index
authored
170
6be728c Raphael Geissert Add a list of "nearby" continents, in case there's no match
authored
171 next unless fullfils_request($rdb, $match, $arch, $ipv6);
172
173 $host = $mirror->{'site'}.$mirror->{$mirror_type.'-http'};
174 $hosts{$host} = calculate_distance($mirror->{'lon'}, $mirror->{'lat'},
175 $r->longitude, $r->latitude);
176
177 if ($continent eq $r->continent_code) {
178 $match_type = 'continent';
179 } else {
180 $match_type = 'nearby-continent';
181 }
182 }
062e6d8 Raphael Geissert Add a per-continent index
authored
183 }
184 }
185
6d6feab Raphael Geissert Initial import
authored
186 # something went awry, we don't know how to handle this user, we failed
187 # let's make another attempt:
a671833 Raphael Geissert Cleanup while adding more info bits
authored
188 if (!$match_type && $mirror_type eq 'archive') {
73da606 Raphael Geissert Make the cdn.d.n fallback still work
authored
189 $hosts{'cdn.debian.net/debian/'} = 1;
a671833 Raphael Geissert Cleanup while adding more info bits
authored
190 $match_type = 'catch-all';
6d6feab Raphael Geissert Initial import
authored
191 }
192
193 # TODO: if ($host eq '') { not a request for archive, but we don't know
194 # where we should redirect the user to }
195
d409639 Raphael Geissert Select a subset of hosts from the main set based on stddevp
authored
196 my @sorted_hosts = sort { $hosts{$a} <=> $hosts{$b} } keys %hosts;
197 my @close_hosts;
198 my $dev = stddevp(values %hosts);
199
200 # Closest host (or one of many), to use as the base distance
201 $host = $sorted_hosts[0];
202
35d60ea Raphael Geissert Allow disabling the inclusion of extra headers
authored
203 print_xtra('Std-Dev', $dev);
204 print_xtra('Population', scalar(@sorted_hosts));
205 print_xtra('Closest-Distance', $hosts{$host});
d409639 Raphael Geissert Select a subset of hosts from the main set based on stddevp
authored
206
207 for my $h (@sorted_hosts) {
208 # NOTE: this might need some additional work, as we should probably
209 # guarantee a certain amount of alt hosts to choose from
210 if (($hosts{$h} - $hosts{$host}) <= $dev) {
211 push @close_hosts, $h;
d5e016a Raphael Geissert Break a loop as soon as possible
authored
212 } else {
213 # the list is sorted, if we didn't accept this one won't accept
214 # the next
215 last;
d409639 Raphael Geissert Select a subset of hosts from the main set based on stddevp
authored
216 }
217 }
218
3f24721 Raphael Geissert Allow disabling the random sort order too
authored
219 $host = (shuffle (@close_hosts))[0]
220 if ($random_sort);
35d60ea Raphael Geissert Allow disabling the inclusion of extra headers
authored
221 print_xtra('Distance', $hosts{$host});
222 print_xtra('Match-Type', $match_type);
fa1173d Raphael Geissert Add RFC6249-like link rels
authored
223 print "Location: http://".$host.$url."\r\n";
224
723442f Raphael Geissert Also make link rels optional
authored
225 if ($add_links) {
226 # RFC6249-like link rels
227 # A client strictly adhering to the RFC would ignore these since we
228 # don't provide a digest, and we wont.
229 for my $host (@close_hosts) {
230 my $priority = $hosts{$host};
9e7b920 Raphael Geissert Determine the priority based on geo distance
authored
231
723442f Raphael Geissert Also make link rels optional
authored
232 $priority *= 100;
65d5902 Raphael Geissert Priority 0 is invalid as per the RFC
authored
233 $priority = 1 if ($priority == 0);
723442f Raphael Geissert Also make link rels optional
authored
234 $priority = sprintf("%.0f", $priority);
9e7b920 Raphael Geissert Determine the priority based on geo distance
authored
235
723442f Raphael Geissert Also make link rels optional
authored
236 print "Link: http://".$host.$url."; rel=duplicate; pri=$priority\r\n";
237 }
fa1173d Raphael Geissert Add RFC6249-like link rels
authored
238 }
239
240 print "\r\n";
6d6feab Raphael Geissert Initial import
authored
241
242 exit;
243
244 sub fullfils_request($$$$) {
245 my ($rdb, $id, $arch, $ipv6) = @_;
246
247 my $mirror = $db->{'all'}{$id};
248
aaa365e Raphael Geissert Ignore disabled mirrors
authored
249 return 0 if (exists($mirror->{$mirror_type.'-disabled'}));
250
6d6feab Raphael Geissert Initial import
authored
251 return 0 if ($ipv6 && !exists($rdb->{'ipv6'}{$id}));
252
253 return 0 if ($arch ne '' && !exists($rdb->{'arch'}{$arch}{$id}) && !exists($rdb->{'arch'}{'any'}{$id}));
254
aaa365e Raphael Geissert Ignore disabled mirrors
authored
255 return 0 if ($arch ne '' && exists($mirror->{$mirror_type.'-'.$arch.'-disabled'}));
256
6d6feab Raphael Geissert Initial import
authored
257 return 1;
258 }
9e7b920 Raphael Geissert Determine the priority based on geo distance
authored
259
260 sub calculate_distance($$$$) {
261 my ($x1, $y1, $x2, $y2) = @_;
262
263 if ($metric eq 'euclidean') {
264 return sqrt(($x1-$x2)**2 + ($y1-$y2)**2);
265 } else {
7e3a19f Raphael Geissert Correct the taxicab formula
authored
266 return (abs($x1-$x2) + abs($y1-$y2));
9e7b920 Raphael Geissert Determine the priority based on geo distance
authored
267 }
268 }
d409639 Raphael Geissert Select a subset of hosts from the main set based on stddevp
authored
269
270 sub stddevp {
271 my ($avg, $var, $stddev) = (0, 0, 0);
272 local $_;
273
274 for (@_) {
275 $avg += $_;
276 }
277 $avg /= scalar(@_);
278
279 for (@_) {
280 $var += $_**2;
281 }
282 $var /= scalar(@_);
283 $var -= $avg**2;
284
285 $stddev = sqrt($var);
1975793 Raphael Geissert Add a return for consistency
authored
286 return $stddev;
d409639 Raphael Geissert Select a subset of hosts from the main set based on stddevp
authored
287 }
35d60ea Raphael Geissert Allow disabling the inclusion of extra headers
authored
288
289 sub print_xtra($$) {
290 print "X-$_[0]: $_[1]\r\n"
291 if ($xtra_headers);
292 }
b1963f7 Raphael Geissert Some cleanup
authored
293
294 sub find_arch($@) {
295 my $url = shift;
296 local $_;
297
298 foreach (@_) {
299 return $1 if ($url =~ m/$_/);
300 }
301 return '';
302 }
Something went wrong with that request. Please try again.