Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Newer
Older
100755 267 lines (211 sloc) 6.899 kb
6d6feab @rgeissert Initial import
authored
1 #!/usr/bin/perl -w
2
3 ####################
4 # Copyright (C) 2011 by Raphael Geissert <geissert@debian.org>
5 #
6 # This file is free software: you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation, either version 3 of the License, or
9 # (at your option) any later version.
10 #
11 # This file is distributed in the hope that it will be useful,
12 # but WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 # GNU General Public License for more details.
15 #
16 # You should have received a copy of the GNU General Public License
17 # along with this file If not, see <http://www.gnu.org/licenses/>.
18 #
19 # On Debian systems, the complete text of the GNU General
20 # Public License 3 can be found in '/usr/share/common-licenses/GPL-3'.
21 ####################
22
23 use strict;
24 use warnings;
25
26 # Usage: redir.pl?mirror=(archive|backports|...)&url=/debian/dists/sid/...
27 # Test (make sure -debug1 is set below):
28 # ./redir.pl mirror=...
29 # REMOTE_ADDR=1.2.3.4 ./redir.pl mirror=...
30 use CGI::Simple qw(-debug1);
31 my $q = new CGI::Simple;
32
33 use Geo::IP;
34 use Storable qw(retrieve);
65c386e @rgeissert Randomly choose a mirror from among the valid candidates
authored
35 use List::Util qw(shuffle);
6d6feab @rgeissert Initial import
authored
36
9e7b920 @rgeissert Determine the priority based on geo distance
authored
37 our $metric = ''; # alt: taxicab (default) | euclidean
35d60ea @rgeissert Allow disabling the inclusion of extra headers
authored
38 our $xtra_headers = 1;
723442f @rgeissert Also make link rels optional
authored
39 my $add_links = 1;
3f24721 @rgeissert Allow disabling the random sort order too
authored
40 my $random_sort = 1;
6d6feab @rgeissert Initial import
authored
41 my $db_store = 'db';
42 my $mirror_type = 'archive';
43 my %mirror_prefixes = (
44 'archive' => '',
45 'security' => '-security',
46 'cdimage' => '-cd',
47 'volatile' => '-volatile',
48 'backports' => '-backports',
49 );
50
51 my $g_city = Geo::IP->open('geoip/GeoLiteCity.dat', GEOIP_MMAP_CACHE);
52 my $g_as = Geo::IP->open('geoip/GeoIPASNum.dat', GEOIP_MMAP_CACHE);
53
54 # TODO: support IPv6. Basic implementation is to lookup the country in
55 # the geoipv6 db and wish the user our best
56
57 sub fullfils_request($$$$);
9e7b920 @rgeissert Determine the priority based on geo distance
authored
58 sub calculate_distance($$$$);
d409639 @rgeissert Select a subset of hosts from the main set based on stddevp
authored
59 sub stddevp;
35d60ea @rgeissert Allow disabling the inclusion of extra headers
authored
60 sub print_xtra($$);
6d6feab @rgeissert Initial import
authored
61
788ac30 @rgeissert Recognise architecture of cdimage requests
authored
62 my @ARCHITECTURES_REGEX;
63
64 $mirror_type = $q->param('mirror') || 'archive';
65
66 if ($mirror_type eq 'cdimage') {
67 @ARCHITECTURES_REGEX = (
68 qr'^(?:\d|current)[^/]*/([^/]+)/',
69 );
70 } else {
71 @ARCHITECTURES_REGEX = (
72 qr'^dists/(?:[^/]+/){2,3}binary-([^/]+)/',
09095a5 @rgeissert Recognise udebs in arch-detection regexes
authored
73 qr'^pool/(?:[^/]+/){3,4}.+_([^.]+)\.u?deb$',
788ac30 @rgeissert Recognise architecture of cdimage requests
authored
74 qr'^dists/(?:[^/]+/){1,2}Contents-([^.]+)\.gz$',
75 qr'^indices/files(?:/components)?/arch-([^.]+).*$',
76 qr'^dists/(?:[^/]+/){2}installer-([^/]+)/',
77 );
78 }
6d6feab @rgeissert Initial import
authored
79
80 our $db = retrieve($db_store);
81
82 ####
83 my $IP = $ENV{'REMOTE_ADDR'} || '127.0.0.1';
84 # for testing purposes
399f2bf @rgeissert Make it work when connecting locally
authored
85 $IP = '8.8.8.8' if ($IP eq '127.0.1.1');
86 $IP = `wget -O- -q http://myip.dnsomatic.com/` if ($IP eq '127.0.0.1');
6d6feab @rgeissert Initial import
authored
87 ####
88
89 # Make a shortcut
90 my $rdb = $db->{$mirror_type} or die("Invalid mirror type: $mirror_type");
91
92 my $ipv6 = ($IP =~ m/::/);
93 my $r = $g_city->record_by_addr($IP);
94 my ($as) = split /\s+/, ($g_as->org_by_addr($IP) || '');
95 my $arch = '';
96
35d60ea @rgeissert Allow disabling the inclusion of extra headers
authored
97 print_xtra('IP', $IP);
6d6feab @rgeissert Initial import
authored
98 if (!defined($r) || !defined($as)) {
99 # TODO: handle error
100 $as = '';
101 $r = undef;
102 }
103
35d60ea @rgeissert Allow disabling the inclusion of extra headers
authored
104 print_xtra('AS', $as);
6d6feab @rgeissert Initial import
authored
105
106 my $url = $q->param('url') || '';
79275bd @rgeissert Be more tolerant on the input URL
authored
107 $url =~ s,//,/,g;
108 $url =~ s,^/,,;
109
6d6feab @rgeissert Initial import
authored
110 if (defined($mirror_prefixes{$mirror_type})) {
111 # FIXME: ugly
112 $url =~ s,^debian$mirror_prefixes{$mirror_type}/,,;
113 }
114
35d60ea @rgeissert Allow disabling the inclusion of extra headers
authored
115 print_xtra('URL', $url);
6d6feab @rgeissert Initial import
authored
116
117 foreach my $r (@ARCHITECTURES_REGEX) {
118 if ($url =~ m/$r/) {
119 $arch = $1;
120 last;
121 }
122 }
35d60ea @rgeissert Allow disabling the inclusion of extra headers
authored
123 print_xtra('Arch', $arch);
6d6feab @rgeissert Initial import
authored
124
125 my $host = '';
65c386e @rgeissert Randomly choose a mirror from among the valid candidates
authored
126 my %hosts;
a671833 @rgeissert Cleanup while adding more info bits
authored
127 my $match_type = '';
6d6feab @rgeissert Initial import
authored
128
129 # match by AS
130 foreach my $match (@{$rdb->{'AS'}{$as}}) {
131 my $mirror = $db->{'all'}{$match};
132
133 next unless fullfils_request($rdb, $match, $arch, $ipv6);
134
135 $host = $mirror->{'site'}.$mirror->{$mirror_type.'-http'};
65c386e @rgeissert Randomly choose a mirror from among the valid candidates
authored
136 $hosts{$host} = 1;
a671833 @rgeissert Cleanup while adding more info bits
authored
137 $match_type = 'AS';
6d6feab @rgeissert Initial import
authored
138 }
139
35d60ea @rgeissert Allow disabling the inclusion of extra headers
authored
140 print_xtra('Country', $r->country_code);
6d6feab @rgeissert Initial import
authored
141 # match by country
a671833 @rgeissert Cleanup while adding more info bits
authored
142 if (!$match_type) {
6d6feab @rgeissert Initial import
authored
143 foreach my $match (keys %{$rdb->{'country'}{$r->country_code}}) {
144 my $mirror = $db->{'all'}{$match};
145
146 next unless fullfils_request($rdb, $match, $arch, $ipv6);
147
148 $host = $mirror->{'site'}.$mirror->{$mirror_type.'-http'};
9e7b920 @rgeissert Determine the priority based on geo distance
authored
149 $hosts{$host} = calculate_distance($mirror->{'lon'}, $mirror->{'lat'},
150 $r->longitude, $r->latitude);
a671833 @rgeissert Cleanup while adding more info bits
authored
151 $match_type = 'country';
6d6feab @rgeissert Initial import
authored
152 }
153 }
154
35d60ea @rgeissert Allow disabling the inclusion of extra headers
authored
155 print_xtra('Continent', $r->continent_code);
062e6d8 @rgeissert Add a per-continent index
authored
156 # match by continent
a671833 @rgeissert Cleanup while adding more info bits
authored
157 if (!$match_type) {
062e6d8 @rgeissert Add a per-continent index
authored
158 foreach my $match (keys %{$rdb->{'continent'}{$r->continent_code}}) {
159 my $mirror = $db->{'all'}{$match};
160
161 next unless fullfils_request($rdb, $match, $arch, $ipv6);
162
163 $host = $mirror->{'site'}.$mirror->{$mirror_type.'-http'};
9e7b920 @rgeissert Determine the priority based on geo distance
authored
164 $hosts{$host} = calculate_distance($mirror->{'lon'}, $mirror->{'lat'},
165 $r->longitude, $r->latitude);
a671833 @rgeissert Cleanup while adding more info bits
authored
166 $match_type = 'continent';
062e6d8 @rgeissert Add a per-continent index
authored
167 }
168 }
169
6d6feab @rgeissert Initial import
authored
170 # something went awry, we don't know how to handle this user, we failed
171 # let's make another attempt:
a671833 @rgeissert Cleanup while adding more info bits
authored
172 if (!$match_type && $mirror_type eq 'archive') {
73da606 @rgeissert Make the cdn.d.n fallback still work
authored
173 $hosts{'cdn.debian.net/debian/'} = 1;
a671833 @rgeissert Cleanup while adding more info bits
authored
174 $match_type = 'catch-all';
6d6feab @rgeissert Initial import
authored
175 }
176
177 # TODO: if ($host eq '') { not a request for archive, but we don't know
178 # where we should redirect the user to }
179
d409639 @rgeissert Select a subset of hosts from the main set based on stddevp
authored
180 my @sorted_hosts = sort { $hosts{$a} <=> $hosts{$b} } keys %hosts;
181 my @close_hosts;
182 my $dev = stddevp(values %hosts);
183
184 # Closest host (or one of many), to use as the base distance
185 $host = $sorted_hosts[0];
186
35d60ea @rgeissert Allow disabling the inclusion of extra headers
authored
187 print_xtra('Std-Dev', $dev);
188 print_xtra('Population', scalar(@sorted_hosts));
189 print_xtra('Closest-Distance', $hosts{$host});
d409639 @rgeissert Select a subset of hosts from the main set based on stddevp
authored
190
191 for my $h (@sorted_hosts) {
192 # NOTE: this might need some additional work, as we should probably
193 # guarantee a certain amount of alt hosts to choose from
194 if (($hosts{$h} - $hosts{$host}) <= $dev) {
195 push @close_hosts, $h;
196 }
197 }
198
3f24721 @rgeissert Allow disabling the random sort order too
authored
199 $host = (shuffle (@close_hosts))[0]
200 if ($random_sort);
35d60ea @rgeissert Allow disabling the inclusion of extra headers
authored
201 print_xtra('Distance', $hosts{$host});
202 print_xtra('Match-Type', $match_type);
fa1173d @rgeissert Add RFC6249-like link rels
authored
203 print "Location: http://".$host.$url."\r\n";
204
723442f @rgeissert Also make link rels optional
authored
205 if ($add_links) {
206 # RFC6249-like link rels
207 # A client strictly adhering to the RFC would ignore these since we
208 # don't provide a digest, and we wont.
209 for my $host (@close_hosts) {
210 my $priority = $hosts{$host};
9e7b920 @rgeissert Determine the priority based on geo distance
authored
211
723442f @rgeissert Also make link rels optional
authored
212 $priority *= 100;
213 $priority = sprintf("%.0f", $priority);
9e7b920 @rgeissert Determine the priority based on geo distance
authored
214
723442f @rgeissert Also make link rels optional
authored
215 print "Link: http://".$host.$url."; rel=duplicate; pri=$priority\r\n";
216 }
fa1173d @rgeissert Add RFC6249-like link rels
authored
217 }
218
219 print "\r\n";
6d6feab @rgeissert Initial import
authored
220
221 exit;
222
223 sub fullfils_request($$$$) {
224 my ($rdb, $id, $arch, $ipv6) = @_;
225
226 my $mirror = $db->{'all'}{$id};
227
228 return 0 if ($ipv6 && !exists($rdb->{'ipv6'}{$id}));
229
230 return 0 if ($arch ne '' && !exists($rdb->{'arch'}{$arch}{$id}) && !exists($rdb->{'arch'}{'any'}{$id}));
231
232 return 1;
233 }
9e7b920 @rgeissert Determine the priority based on geo distance
authored
234
235 sub calculate_distance($$$$) {
236 my ($x1, $y1, $x2, $y2) = @_;
237
238 if ($metric eq 'euclidean') {
239 return sqrt(($x1-$x2)**2 + ($y1-$y2)**2);
240 } else {
7e3a19f @rgeissert Correct the taxicab formula
authored
241 return (abs($x1-$x2) + abs($y1-$y2));
9e7b920 @rgeissert Determine the priority based on geo distance
authored
242 }
243 }
d409639 @rgeissert Select a subset of hosts from the main set based on stddevp
authored
244
245 sub stddevp {
246 my ($avg, $var, $stddev) = (0, 0, 0);
247 local $_;
248
249 for (@_) {
250 $avg += $_;
251 }
252 $avg /= scalar(@_);
253
254 for (@_) {
255 $var += $_**2;
256 }
257 $var /= scalar(@_);
258 $var -= $avg**2;
259
260 $stddev = sqrt($var);
261 }
35d60ea @rgeissert Allow disabling the inclusion of extra headers
authored
262
263 sub print_xtra($$) {
264 print "X-$_[0]: $_[1]\r\n"
265 if ($xtra_headers);
266 }
Something went wrong with that request. Please try again.