Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Newer
Older
100755 289 lines (228 sloc) 7.448 kb
6d6feab @rgeissert Initial import
authored
1 #!/usr/bin/perl -w
2
3 ####################
4 # Copyright (C) 2011 by Raphael Geissert <geissert@debian.org>
5 #
6 # This file is free software: you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation, either version 3 of the License, or
9 # (at your option) any later version.
10 #
11 # This file is distributed in the hope that it will be useful,
12 # but WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 # GNU General Public License for more details.
15 #
16 # You should have received a copy of the GNU General Public License
17 # along with this file If not, see <http://www.gnu.org/licenses/>.
18 #
19 # On Debian systems, the complete text of the GNU General
20 # Public License 3 can be found in '/usr/share/common-licenses/GPL-3'.
21 ####################
22
23 use strict;
24 use warnings;
25
26 # Usage: redir.pl?mirror=(archive|backports|...)&url=/debian/dists/sid/...
27 # Test (make sure -debug1 is set below):
28 # ./redir.pl mirror=...
29 # REMOTE_ADDR=1.2.3.4 ./redir.pl mirror=...
30 use CGI::Simple qw(-debug1);
31 my $q = new CGI::Simple;
32
33 use Geo::IP;
34 use Storable qw(retrieve);
65c386e @rgeissert Randomly choose a mirror from among the valid candidates
authored
35 use List::Util qw(shuffle);
6d6feab @rgeissert Initial import
authored
36
9e7b920 @rgeissert Determine the priority based on geo distance
authored
37 our $metric = ''; # alt: taxicab (default) | euclidean
35d60ea @rgeissert Allow disabling the inclusion of extra headers
authored
38 our $xtra_headers = 1;
723442f @rgeissert Also make link rels optional
authored
39 my $add_links = 1;
3f24721 @rgeissert Allow disabling the random sort order too
authored
40 my $random_sort = 1;
6d6feab @rgeissert Initial import
authored
41 my $db_store = 'db';
aaa365e @rgeissert Ignore disabled mirrors
authored
42 our $mirror_type = 'archive';
6d6feab @rgeissert Initial import
authored
43
6be728c @rgeissert Add a list of "nearby" continents, in case there's no match
authored
44 my %nearby_continents = (
45 'AF' => [ qw(EU AS) ],
46 'SA' => [ qw(NA EU) ],
47 'OC' => [ qw(NA AS) ],
48 'AS' => [ qw(EU) ],
49 'NA' => [ qw(EU) ],
50 'EU' => [ qw(NA) ],
51 );
52
6d6feab @rgeissert Initial import
authored
53 my $g_city = Geo::IP->open('geoip/GeoLiteCity.dat', GEOIP_MMAP_CACHE);
54 my $g_as = Geo::IP->open('geoip/GeoIPASNum.dat', GEOIP_MMAP_CACHE);
55
56 # TODO: support IPv6. Basic implementation is to lookup the country in
57 # the geoipv6 db and wish the user our best
58
59 sub fullfils_request($$$$);
9e7b920 @rgeissert Determine the priority based on geo distance
authored
60 sub calculate_distance($$$$);
d409639 @rgeissert Select a subset of hosts from the main set based on stddevp
authored
61 sub stddevp;
35d60ea @rgeissert Allow disabling the inclusion of extra headers
authored
62 sub print_xtra($$);
b1963f7 @rgeissert Some cleanup
authored
63 sub find_arch($@);
6d6feab @rgeissert Initial import
authored
64
788ac30 @rgeissert Recognise architecture of cdimage requests
authored
65 my @ARCHITECTURES_REGEX;
66
67 $mirror_type = $q->param('mirror') || 'archive';
68
69 if ($mirror_type eq 'cdimage') {
70 @ARCHITECTURES_REGEX = (
71 qr'^(?:\d|current)[^/]*/([^/]+)/',
72 );
73 } else {
74 @ARCHITECTURES_REGEX = (
75 qr'^dists/(?:[^/]+/){2,3}binary-([^/]+)/',
09095a5 @rgeissert Recognise udebs in arch-detection regexes
authored
76 qr'^pool/(?:[^/]+/){3,4}.+_([^.]+)\.u?deb$',
788ac30 @rgeissert Recognise architecture of cdimage requests
authored
77 qr'^dists/(?:[^/]+/){1,2}Contents-([^.]+)\.gz$',
78 qr'^indices/files(?:/components)?/arch-([^.]+).*$',
79 qr'^dists/(?:[^/]+/){2}installer-([^/]+)/',
80 );
81 }
6d6feab @rgeissert Initial import
authored
82
83 our $db = retrieve($db_store);
84
85 ####
86 my $IP = $ENV{'REMOTE_ADDR'} || '127.0.0.1';
87 # for testing purposes
399f2bf @rgeissert Make it work when connecting locally
authored
88 $IP = '8.8.8.8' if ($IP eq '127.0.1.1');
89 $IP = `wget -O- -q http://myip.dnsomatic.com/` if ($IP eq '127.0.0.1');
6d6feab @rgeissert Initial import
authored
90 ####
91
92 # Make a shortcut
93 my $rdb = $db->{$mirror_type} or die("Invalid mirror type: $mirror_type");
94
95 my $ipv6 = ($IP =~ m/::/);
96 my $r = $g_city->record_by_addr($IP);
97 my ($as) = split /\s+/, ($g_as->org_by_addr($IP) || '');
98 my $arch = '';
99
35d60ea @rgeissert Allow disabling the inclusion of extra headers
authored
100 print_xtra('IP', $IP);
604223a @rgeissert $as, even if empty, is always defined
authored
101 if (!defined($r)) {
6d6feab @rgeissert Initial import
authored
102 # TODO: handle error
103 $r = undef;
104 }
105
35d60ea @rgeissert Allow disabling the inclusion of extra headers
authored
106 print_xtra('AS', $as);
6d6feab @rgeissert Initial import
authored
107
108 my $url = $q->param('url') || '';
79275bd @rgeissert Be more tolerant on the input URL
authored
109 $url =~ s,//,/,g;
110 $url =~ s,^/,,;
53473a3 @rgeissert No file name contains the space character, replace it with +
authored
111 $url =~ s, ,+,g;
79275bd @rgeissert Be more tolerant on the input URL
authored
112
35d60ea @rgeissert Allow disabling the inclusion of extra headers
authored
113 print_xtra('URL', $url);
6d6feab @rgeissert Initial import
authored
114
b1963f7 @rgeissert Some cleanup
authored
115 $arch = find_arch($url, @ARCHITECTURES_REGEX);
4266c44 @rgeissert Assume i386 when multi-arch (cd image) is requested
authored
116 $arch = 'i386' if ($arch eq 'multi-arch');
35d60ea @rgeissert Allow disabling the inclusion of extra headers
authored
117 print_xtra('Arch', $arch);
6d6feab @rgeissert Initial import
authored
118
119 my $host = '';
65c386e @rgeissert Randomly choose a mirror from among the valid candidates
authored
120 my %hosts;
a671833 @rgeissert Cleanup while adding more info bits
authored
121 my $match_type = '';
6d6feab @rgeissert Initial import
authored
122
123 # match by AS
124 foreach my $match (@{$rdb->{'AS'}{$as}}) {
125 my $mirror = $db->{'all'}{$match};
126
127 next unless fullfils_request($rdb, $match, $arch, $ipv6);
128
129 $host = $mirror->{'site'}.$mirror->{$mirror_type.'-http'};
65c386e @rgeissert Randomly choose a mirror from among the valid candidates
authored
130 $hosts{$host} = 1;
a671833 @rgeissert Cleanup while adding more info bits
authored
131 $match_type = 'AS';
6d6feab @rgeissert Initial import
authored
132 }
133
35d60ea @rgeissert Allow disabling the inclusion of extra headers
authored
134 print_xtra('Country', $r->country_code);
6d6feab @rgeissert Initial import
authored
135 # match by country
a671833 @rgeissert Cleanup while adding more info bits
authored
136 if (!$match_type) {
6d6feab @rgeissert Initial import
authored
137 foreach my $match (keys %{$rdb->{'country'}{$r->country_code}}) {
138 my $mirror = $db->{'all'}{$match};
139
140 next unless fullfils_request($rdb, $match, $arch, $ipv6);
141
142 $host = $mirror->{'site'}.$mirror->{$mirror_type.'-http'};
9e7b920 @rgeissert Determine the priority based on geo distance
authored
143 $hosts{$host} = calculate_distance($mirror->{'lon'}, $mirror->{'lat'},
144 $r->longitude, $r->latitude);
a671833 @rgeissert Cleanup while adding more info bits
authored
145 $match_type = 'country';
6d6feab @rgeissert Initial import
authored
146 }
147 }
148
35d60ea @rgeissert Allow disabling the inclusion of extra headers
authored
149 print_xtra('Continent', $r->continent_code);
062e6d8 @rgeissert Add a per-continent index
authored
150 # match by continent
a671833 @rgeissert Cleanup while adding more info bits
authored
151 if (!$match_type) {
6be728c @rgeissert Add a list of "nearby" continents, in case there's no match
authored
152 my @continents = ($r->continent_code, @{$nearby_continents{$r->continent_code}});
062e6d8 @rgeissert Add a per-continent index
authored
153
6be728c @rgeissert Add a list of "nearby" continents, in case there's no match
authored
154 for my $continent (@continents) {
031fe4e @rgeissert Only check the nearby continents if necessary
authored
155 last if ($match_type);
6be728c @rgeissert Add a list of "nearby" continents, in case there's no match
authored
156 foreach my $match (keys %{$rdb->{'continent'}{$continent}}) {
157 my $mirror = $db->{'all'}{$match};
062e6d8 @rgeissert Add a per-continent index
authored
158
6be728c @rgeissert Add a list of "nearby" continents, in case there's no match
authored
159 next unless fullfils_request($rdb, $match, $arch, $ipv6);
160
161 $host = $mirror->{'site'}.$mirror->{$mirror_type.'-http'};
162 $hosts{$host} = calculate_distance($mirror->{'lon'}, $mirror->{'lat'},
163 $r->longitude, $r->latitude);
164
165 if ($continent eq $r->continent_code) {
166 $match_type = 'continent';
167 } else {
168 $match_type = 'nearby-continent';
169 }
170 }
062e6d8 @rgeissert Add a per-continent index
authored
171 }
172 }
173
6d6feab @rgeissert Initial import
authored
174 # something went awry, we don't know how to handle this user, we failed
175 # let's make another attempt:
a671833 @rgeissert Cleanup while adding more info bits
authored
176 if (!$match_type && $mirror_type eq 'archive') {
73da606 @rgeissert Make the cdn.d.n fallback still work
authored
177 $hosts{'cdn.debian.net/debian/'} = 1;
a671833 @rgeissert Cleanup while adding more info bits
authored
178 $match_type = 'catch-all';
6d6feab @rgeissert Initial import
authored
179 }
180
181 # TODO: if ($host eq '') { not a request for archive, but we don't know
182 # where we should redirect the user to }
183
d409639 @rgeissert Select a subset of hosts from the main set based on stddevp
authored
184 my @sorted_hosts = sort { $hosts{$a} <=> $hosts{$b} } keys %hosts;
185 my @close_hosts;
186 my $dev = stddevp(values %hosts);
187
188 # Closest host (or one of many), to use as the base distance
189 $host = $sorted_hosts[0];
190
35d60ea @rgeissert Allow disabling the inclusion of extra headers
authored
191 print_xtra('Std-Dev', $dev);
192 print_xtra('Population', scalar(@sorted_hosts));
193 print_xtra('Closest-Distance', $hosts{$host});
d409639 @rgeissert Select a subset of hosts from the main set based on stddevp
authored
194
195 for my $h (@sorted_hosts) {
196 # NOTE: this might need some additional work, as we should probably
197 # guarantee a certain amount of alt hosts to choose from
198 if (($hosts{$h} - $hosts{$host}) <= $dev) {
199 push @close_hosts, $h;
d5e016a @rgeissert Break a loop as soon as possible
authored
200 } else {
201 # the list is sorted, if we didn't accept this one won't accept
202 # the next
203 last;
d409639 @rgeissert Select a subset of hosts from the main set based on stddevp
authored
204 }
205 }
206
3f24721 @rgeissert Allow disabling the random sort order too
authored
207 $host = (shuffle (@close_hosts))[0]
208 if ($random_sort);
35d60ea @rgeissert Allow disabling the inclusion of extra headers
authored
209 print_xtra('Distance', $hosts{$host});
210 print_xtra('Match-Type', $match_type);
fa1173d @rgeissert Add RFC6249-like link rels
authored
211 print "Location: http://".$host.$url."\r\n";
212
723442f @rgeissert Also make link rels optional
authored
213 if ($add_links) {
214 # RFC6249-like link rels
215 # A client strictly adhering to the RFC would ignore these since we
216 # don't provide a digest, and we wont.
217 for my $host (@close_hosts) {
218 my $priority = $hosts{$host};
9e7b920 @rgeissert Determine the priority based on geo distance
authored
219
723442f @rgeissert Also make link rels optional
authored
220 $priority *= 100;
221 $priority = sprintf("%.0f", $priority);
9e7b920 @rgeissert Determine the priority based on geo distance
authored
222
723442f @rgeissert Also make link rels optional
authored
223 print "Link: http://".$host.$url."; rel=duplicate; pri=$priority\r\n";
224 }
fa1173d @rgeissert Add RFC6249-like link rels
authored
225 }
226
227 print "\r\n";
6d6feab @rgeissert Initial import
authored
228
229 exit;
230
231 sub fullfils_request($$$$) {
232 my ($rdb, $id, $arch, $ipv6) = @_;
233
234 my $mirror = $db->{'all'}{$id};
235
aaa365e @rgeissert Ignore disabled mirrors
authored
236 return 0 if (exists($mirror->{$mirror_type.'-disabled'}));
237
6d6feab @rgeissert Initial import
authored
238 return 0 if ($ipv6 && !exists($rdb->{'ipv6'}{$id}));
239
240 return 0 if ($arch ne '' && !exists($rdb->{'arch'}{$arch}{$id}) && !exists($rdb->{'arch'}{'any'}{$id}));
241
aaa365e @rgeissert Ignore disabled mirrors
authored
242 return 0 if ($arch ne '' && exists($mirror->{$mirror_type.'-'.$arch.'-disabled'}));
243
6d6feab @rgeissert Initial import
authored
244 return 1;
245 }
9e7b920 @rgeissert Determine the priority based on geo distance
authored
246
247 sub calculate_distance($$$$) {
248 my ($x1, $y1, $x2, $y2) = @_;
249
250 if ($metric eq 'euclidean') {
251 return sqrt(($x1-$x2)**2 + ($y1-$y2)**2);
252 } else {
7e3a19f @rgeissert Correct the taxicab formula
authored
253 return (abs($x1-$x2) + abs($y1-$y2));
9e7b920 @rgeissert Determine the priority based on geo distance
authored
254 }
255 }
d409639 @rgeissert Select a subset of hosts from the main set based on stddevp
authored
256
257 sub stddevp {
258 my ($avg, $var, $stddev) = (0, 0, 0);
259 local $_;
260
261 for (@_) {
262 $avg += $_;
263 }
264 $avg /= scalar(@_);
265
266 for (@_) {
267 $var += $_**2;
268 }
269 $var /= scalar(@_);
270 $var -= $avg**2;
271
272 $stddev = sqrt($var);
1975793 @rgeissert Add a return for consistency
authored
273 return $stddev;
d409639 @rgeissert Select a subset of hosts from the main set based on stddevp
authored
274 }
35d60ea @rgeissert Allow disabling the inclusion of extra headers
authored
275
276 sub print_xtra($$) {
277 print "X-$_[0]: $_[1]\r\n"
278 if ($xtra_headers);
279 }
b1963f7 @rgeissert Some cleanup
authored
280
281 sub find_arch($@) {
282 my $url = shift;
283 local $_;
284
285 foreach (@_) {
286 return $1 if ($url =~ m/$_/);
287 }
288 return '';
289 }
Something went wrong with that request. Please try again.