-
Notifications
You must be signed in to change notification settings - Fork 8
/
nph-proxy.cgi
5966 lines (4851 loc) · 244 KB
/
nph-proxy.cgi
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
e:\Perl\bin\perl.exe
#
# CGIProxy 2.0
#
# nph-proxy.cgi-- CGIProxy 2.0: a proxy in the form of a CGI script.
# Retrieves the resource at any HTTP or FTP URL, updating embedded URLs
# in HTML resources to point back through this script. By default, no
# user info is sent to the server. Options include text-only proxying
# to save bandwidth, cookie filtering, ad filtering, script removal,
# user-defined encoding of the target URL, and more. Requires Perl 5.
#
# Copyright (C) 1996, 1998-2002 by James Marshall, james@jmarshall.com
# All rights reserved.
#
# For the latest, see http://www.jmarshall.com/tools/cgiproxy/
#
#
# IMPORTANT NOTE ABOUT ANONYMOUS BROWSING:
# CGIProxy was originally made for indirect browsing more than
# anonymity, but since people are using it for anonymity, I've tried
# to make it as anonymous as possible. Suggestions welcome. For best
# anonymity, browse with JavaScript turned off. In fact, that's the
# only reliable way, in spite of what certain anonymity vendors claim.
# Anonymity is pretty good, but may not be bulletproof. For example,
# if even a single JavaScript statement can be run, your anonymity can
# be compromised. I've tried to remove JS from every place it can
# exist, but please tell me if I missed any. Also, browser plugins or
# other executable extensions may be able to reveal you to a server.
# If you find any way your anonymity can be compromised even with scripts
# turned off, please let me know.
#
#
# CONFIGURATION:
#
# None required in most situations. On some servers, these might be
# required (all in the "user configuration" section):
# . If this is running on an SSL server that doesn't use port 443, set
# $RUNNING_ON_SSL_SERVER=1 (otherwise, the default of '' is fine).
# . If this is running on Windows, set $RUNNING_ON_WINDOWS=1 to work
# around a couple of Windows and IIS bugs.
# . If you're using another HTTP proxy, set $HTTP_PROXY and $NO_PROXY.
# If that proxy uses authentication, set $PROXY_AUTH.
#
# Options include:
# . Set $TEXT_ONLY, $REMOVE_COOKIES, $REMOVE_SCRIPTS, $FILTER_ADS,
# $HIDE_REFERER, and $INSERT_ENTRY_FORM as desired. Set
# $REMOVE_SCRIPTS if anonymity is important.
# . To let the user choose all of those settings (except $TEXT_ONLY),
# set $ALLOW_USER_CONFIG=1.
# . To change the encoding format of the URL, modify the
# proxy_encode() and proxy_decode() routines. The default
# routines are suitable for simple PATH_INFO compliance.
# . To encode cookies, modify the cookie_encode() and cookie_decode()
# routines.
# . You can restrict which servers this proxy will access, with
# @ALLOWED_SERVERS and @BANNED_SERVERS.
# . Similarly, you can specify allowed and denied server lists for
# both cookies and scripts.
# . For security, you can ban access to private IP ranges, with
# @BANNED_NETWORKS.
# . If filtering ads, you can customize this with a few settings.
# . To insert your own block of HTML into each page, set $INSERT_HTML
# or $INSERT_FILE.
# . As a last resort, if you really can't run this script as NPH,
# you can try to run it as non-NPH by setting $NOT_RUNNING_AS_NPH=1.
# BUT, read the notes and warnings above that line. Caveat surfor.
# . For crude load-balancing among a set of proxies, set @PROXY_GROUP.
# . Other config is possible; see the user configuration section.
# . If heavy use of this proxy puts a load on your server, see the
# "NOTES ON PERFORMANCE" section below.
#
# For more info, read the comments regarding any config options you set.
#
# This script MUST be installed as a non-parsed header (NPH) script.
# In Apache and many other servers, this is done by simply starting the
# filename with "nph-". It MAY be possible to fake it as a non-NPH
# script, MOST of the time, by using the $NOT_RUNNING_AS_NPH feature.
# This is not advised. See the comments by that option for warnings.
#
#
# TO USE:
# Start a browsing session by calling the script with no parameters.
# You can bookmark pages you browse to through the proxy, or link to
# the URLs that are generated.
#
#
# NOTES ON PERFORMANCE:
# Unfortunately, this has gotten slower through the versions, mostly
# because of optional new features. Configured equally, version 1.3
# takes 25% longer to run than 1.0 or 1.1 (based on *cough* highly
# abbreviated testing). Compiling takes about 50% longer.
# Leaving $REMOVE_SCRIPTS=1 adds 25-50% to the running time.
# Remember that we're talking about tenths of a second here. Most of
# the delay experienced by the user is from waiting on two network
# connections. These performance issues only matter if your server
# CPU is getting overloaded. Also, these only matter when retrieving
# HTML, because it's the HTML modification that takes all the time.
# If you can, use mod_perl. Starting with version 1.3.1, this should
# work under mod_perl, which requires Perl 5.004 or later. If you use
# mod_perl, be careful to install this as an NPH script, i.e. set the
# "PerlSendHeader Off" configuration directive. For more info, see the
# mod_perl documentation.
# If you use mod_perl and modify this script, see the note near the
# "reset 'a-z'" line below, regarding UPPER_CASE and lower_case
# variables.
#
#
# TO DO:
# What I want to hear about:
# . Any HTML tags not being converted here.
# . Any method of introducing JavaScript or other script, that's not
# being filtered out here.
# . Any script MIME types other than those already in @SCRIPT_MIME_TYPES.
# . Any MIME types other than text/html that have links that need to
# be converted.
#
# plug any other script holes (e.g. MSIE-proprietary, other MIME types?)
# This could use cleaner URL-encoding all over ($base_url, etc.)
# more error checking?
# find a simple encryption technique for proxy_encode()
# support more protocols, like mailto: or gopher:
# For ad filtering, add option to disable images from servers other than
# that of the containing HTML page? Is it worth it?
#
#
# BUGS:
# Anonymity may not not perfect. In particular, there may be some remaining
# JavaScript holes.
# URLs generated by JavaScript or similar mechanisms won't be re-proxy'ed
# correctly. JavaScript in general may not work as expected.
# Since ALL of your cookies are sent to this script (which then chooses
# the relevant ones), some cookies could conceivably be dropped if
# you accumulate a whole lot. I haven't seen this happen yet.
#
#
# I first wrote this in 1996 as an experiment to allow indirect browsing.
# The original seed was a program I wrote for Rich Morin's article
# in the June 1996 issue of Unix Review, online at
# http://www.cfcl.com/tin/P/199606.shtml.
#
# Confession: I didn't originally write this with the spec for HTTP
# proxies in mind, and there are probably some violations of the protocol
# (at least for proxies). This whole thing is one big violation of the
# proxy model anyway, so I hereby rationalize that the spec can be widely
# interpreted here. If there is demand, I can make it more conformant.
# The HTTP client and server components should be fine; it's just the
# special requirements for proxies that may not be followed.
#
#--------------------------------------------------------------------------
use strict ;
use Socket ;
# First block below is config variables, second block is sort-of config
# variables, third block is persistent constants, fourth block is would-be
# persistent constants (not set until needed), and last block is variables.
use vars qw(
$TEXT_ONLY
$REMOVE_COOKIES $REMOVE_SCRIPTS $FILTER_ADS $HIDE_REFERER
$INSERT_ENTRY_FORM $ALLOW_USER_CONFIG
@ALLOWED_SERVERS @BANNED_SERVERS @BANNED_NETWORKS
$NO_COOKIE_WITH_IMAGE @ALLOWED_COOKIE_SERVERS @BANNED_COOKIE_SERVERS
@ALLOWED_SCRIPT_SERVERS @BANNED_SCRIPT_SERVERS
@BANNED_IMAGE_URL_PATTERNS $RETURN_EMPTY_GIF
$INSERT_HTML $INSERT_FILE $ANONYMIZE_INSERTION $FORM_AFTER_INSERTION
$INSERTION_FRAME_HEIGHT
$RUNNING_ON_SSL_SERVER $RUNNING_ON_WINDOWS $NOT_RUNNING_AS_NPH
$HTTP_PROXY $NO_PROXY $PROXY_AUTH
$MINIMIZE_CACHING $SESSION_COOKIES_ONLY
@PROXY_GROUP
$USER_AGENT $USE_PASSIVE_FTP_MODE $SHOW_FTP_WELCOME $USE_POST_ON_START
$REMOVE_TITLES $NO_BROWSE_THROUGH_SELF $NO_LINK_TO_START $MAX_REQUEST_SIZE
$QUIETLY_EXIT_PROXY_SESSION
$OVERRIDE_SECURITY
$PROXIFY_SCRIPTS $PROXIFY_COMMENTS
@SCRIPT_MIME_TYPES @OTHER_TYPES_TO_REGISTER @TYPES_TO_HANDLE
$NON_TEXT_EXTENSIONS
$PROXY_VERSION
@MONTH @WEEKDAY %UN_MONTH
@BANNED_NETWORK_ADDRS
$NO_CACHE_HEADERS
@ALL_TYPES %MIME_TYPE_ID $SCRIPT_TYPE_REGEX $TYPES_TO_HANDLE_REGEX
$THIS_HOST $ENV_SERVER_PORT $ENV_SCRIPT_NAME $THIS_SCRIPT_URL
$HAS_BEGUN
$CUSTOM_INSERTION
$HTTP_VERSION $HTTP_1_X
$URL
$now
$packed_flags $encoded_URL $doing_insert_here $env_accept
$e_remove_cookies $e_remove_scripts $e_filter_ads $e_insert_entry_form
$e_hide_referer
$images_are_banned_here $scripts_are_banned_here $cookies_are_banned_here
$scheme $authority $path $host $port $username $password
$cookie_to_server %auth
$script_url $url_start $url_start_inframe $url_start_noframe
$is_in_frame $expected_type
$base_url $base_scheme $base_host $base_path $base_unframes
$default_style_type $default_script_type
$status $headers $body $is_html $response_sent
$debug ) ;
# Under mod_perl, persistent constants only need to be initialized once, so
# use this one-time block to do so.
unless ($HAS_BEGUN) {
#--------------------------------------------------------------------------
# user configuration
#--------------------------------------------------------------------------
# If set, then proxy traffic will be restricted to text data only, to save
# bandwidth (though it can still be circumvented with uuencode, etc.).
$TEXT_ONLY= 0 ; # set to 1 to allow only text data, 0 to allow all
# If set, then prevent all cookies from passing through the proxy. To allow
# cookies from some servers, set this to 0 and see @ALLOWED_COOKIE_SERVERS
# and @BANNED_COOKIE_SERVERS below. You can also prevent cookies with
# images by setting $NO_COOKIE_WITH_IMAGE below.
# Note that this only affects cookies from the target server. The proxy
# script sends its own cookies for other reasons too, like to support
# authentication. This flag does not stop these cookies from being sent.
$REMOVE_COOKIES= 0 ;
# If set, then remove as much scripting as possible. If anonymity is
# important, this is strongly recommended! Better yet, turn off script
# support in your browser.
# On the HTTP level:
# . prevent transmission of script MIME types (which only works if the server
# marks them as such, so a malicious server could get around this, but
# then the browser probably wouldn't execute the script).
# . remove Link: headers that link to a resource of a script MIME type.
# Within HTML resources:
# . remove <script>...</script> .
# . remove intrinsic event attributes from tags, i.e. attributes whose names
# begin with "on".
# . remove <style>...</style> where "type" attribute is a script MIME type.
# . remove various HTML tags that appear to link to a script MIME type.
# . remove script macros (aka Netscape-specific "JavaScript entities"),
# i.e. any attributes containing the string "&{" .
# . remove "JavaScript conditional comments".
# . remove MSIE-specific "dynamic properties".
# To allow scripts from some sites but not from others, set this to 0 and
# see @ALLOWED_SCRIPT_SERVERS and @BANNED_SCRIPT_SERVERS below.
# See @SCRIPT_MIME_TYPES below for a list of which MIME types are filtered out.
# I do NOT know for certain that this removes all script content! It removes
# all that I know of, but I don't have a definitive list of places scripts
# can exist. If you do, please send it to me. EVEN RUNNING A SINGLE
# JAVASCRIPT STATEMENT CAN COMPROMISE YOUR ANONYMITY! Just so you know.
# Richard Smith has a good test site for anonymizing proxies, at
# http://users.rcn.com/rms2000/anon/test.htm
# Note that turning this on removes most popup ads! :)
$REMOVE_SCRIPTS= 1 ;
# If set, then filter out images that match one of @BANNED_IMAGE_URL_PATTERNS,
# below. Also removes cookies attached to images, as if $NO_COOKIE_WITH_IMAGE
# is set.
# To remove most popup advertisements, also set $REMOVE_SCRIPTS=1 above.
$FILTER_ADS= 0 ;
# If set, then don't send a Referer: [sic] header with each request
# (i.e. something that tells the server which page you're coming from
# that linked to it). This is a minor privacy issue, but a few sites
# won't send you pages or images if the Referer: is not what they're
# expecting. If a page is loading without images or a link seems to be
# refused, then try turning this off, and a correct Referer: header will
# be sent.
# This is only a problem in a VERY small percentage of sites, so few that
# I'm kinda hesitant to put this in the entry form. Other arrangements
# have their own problems, though.
$HIDE_REFERER= 1 ;
# If set, insert a compact version of the URL entry form at the top of each
# page. This will also display the URL currently being viewed.
# When viewing a page with frames, then a new top frame is created and the
# insertion goes there.
# If you want to customize the appearance of the form, modify the routine
# mini_start_form() near the end of the script.
# If you want to insert something other than this form, see $INSERT_HTML and
# $INSERT_FILE below.
# Users should realize that options changed via the form only take affect when
# the form is submitted by entering a new URL or pressing the "Go" button.
# Selecting an option, then following a link on the page, will not cause
# the option to take effect.
# Users should also realize that anything inserted into a page may throw
# off any precise layout. The insertion will also be subject to
# background colors and images, and any other page-wide settings.
$INSERT_ENTRY_FORM= 1 ;
# If set, then allow the user to control $REMOVE_COOKIES, $REMOVE_SCRIPTS,
# $FILTER_ADS, $HIDE_REFERER, and $INSERT_ENTRY_FORM. Note that they
# can't fine-tune any related options, such as the various @ALLOWED... and
# @BANNED... lists.
$ALLOW_USER_CONFIG= 1 ;
# Create your own proxy_encode() and proxy_decode() to tranform the target
# URL to and from the format that will be stored in PATH_INFO. The encoded
# form should only contain characters that are legal in PATH_INFO. This
# varies by server, but using only printable chars, no "?" or "#", and no
# two adjacent slashes ("//") works on most servers. Don't let PATH_INFO
# contain the strings "./", "/.", "../", or "/..", or else it may get
# compressed like a pathname somewhere. Try not to make the resulting
# string too long, either.
# Of course, proxy_decode() must exactly undo whatever proxy_encode() does.
# Make proxy_encode() as fast as possible-- it's a major bottleneck for the
# whole program.
# Because of the simplified absolute URL resolution in full_url(), there may
# be ".." segments in the default encoding here, notably in the first path
# segment. Normally, that's just an HTML mistake, but please tell me if
# you see any privacy exploit with it.
# Note that a few sites have embedded applications (like applets or Shockwave)
# that expect to access URLs relative to the page's URL. This means they
# may not work if the encoded target URL can't be treated like a base URL,
# e.g. that it can't be appended with something like "../data/foo.data"
# to get that expected data file. In such cases, the default encoding below
# should let these sites work fine, as should any other encoding that can
# support URLs relative to it.
sub proxy_encode {
my($URL)= @_ ;
$URL=~ s#^([\w+.-]+)://#$1/# ; # http://xxx -> http/xxx
# $URL=~ s/(.)/ sprintf('%02x',ord($1)) /ge ; # each char -> 2-hex
# $URL=~ tr/a-zA-Z/n-za-mN-ZA-M/ ; # rot-13
return $URL ;
}
sub proxy_decode {
my($enc_URL)= @_ ;
# $enc_URL=~ tr/a-zA-Z/n-za-mN-ZA-M/ ; # rot-13
# $enc_URL=~ s/([0-9A-Fa-f]{2})/ sprintf("%c",hex($1)) /ge ;
$enc_URL=~ s#^([\w+.-]+)/#$1://# ; # http/xxx -> http://xxx
return $enc_URL ;
}
# Encode cookies before they're sent back to the user.
# The return value must only contain characters that are legal in cookie
# names and values, i.e. only printable characters, and no ";", ",", "=",
# or white space.
# cookie_encode() is called twice for each cookie: once to encode the cookie
# name, and once to encode the cookie value. The two are then joined with
# "=" and sent to the user.
# cookie_decode() must exactly undo whatever cookie_encode() does.
# Also, cookie_encode() must always encode a given input string into the
# same output string. This is because browsers need the cookie name to
# identify and manage a cookie, so the name must be consistent.
# This is not a bottleneck like proxy_encode() is, so speed is not critical.
sub cookie_encode {
my($cookie)= @_ ;
# $cookie=~ s/(.)/ sprintf('%02x',ord($1)) /ge ; # each char -> 2-hex
# $cookie=~ tr/a-zA-Z/n-za-mN-ZA-M/ ; # rot-13
$cookie=~ s/(\W)/ '%' . sprintf('%02x',ord($1)) /ge ; # simple URL-encoding
return $cookie ;
}
sub cookie_decode {
my($enc_cookie)= @_ ;
$enc_cookie=~ s/%([\da-fA-F]{2})/ pack('C', hex($1)) /ge ; # URL-decode
# $enc_cookie=~ tr/a-zA-Z/n-za-mN-ZA-M/ ; # rot-13
# $enc_cookie=~ s/([0-9A-Fa-f]{2})/ sprintf("%c",hex($1)) /ge ;
return $enc_cookie ;
}
# Use @ALLOWED_SERVERS and @BANNED_SERVERS to restrict which servers a user
# can visit through this proxy. Any URL at a host matching a pattern in
# @BANNED_SERVERS will be forbidden. In addition, if @ALLOWED_SERVERS is
# not empty, then access is allowed *only* to servers that match a pattern
# in it. In other words, @BANNED_SERVERS means "ban these servers", and
# @ALLOWED_SERVERS (if not empty) means "allow only these servers". If a
# server matches both lists, it is banned.
# These are each a list of Perl 5 regular expressions (aka patterns or
# regexes), not literal host names. To turn a hostname into a pattern,
# replace every "." with "\.", add "^" to the beginning, and add "$" to the
# end. For example, "www.example.com" becomes "^www\.example\.com$". To
# match *every* host ending in something, leave out the "^". For example,
# "\.example\.com$" matches every host ending in ".example.com". For more
# details about Perl regular expressions, see the Perl documentation. (They
# may seem cryptic at first, but they're very powerful once you know how to
# use them.)
@ALLOWED_SERVERS= () ;
@BANNED_SERVERS= () ;
# If @BANNED_NETWORKS is set, then forbid access to these hosts or networks.
# This is done by IP address, not name, so it provides more certain security
# than @BANNED_SERVERS above.
# Specify each element as a decimal IP address-- all four integers for a host,
# or one to three integers for a network. For example, '127.0.0.1' bans
# access to the local host, and '192.168' bans access to all IP addresses
# in the 192.168 network. Sorry, no banning yet for subnets other than
# 8, 16, or 24 bits.
# IF YOU'RE RUNNING THIS ON OR INSIDE A FIREWALL, THIS SETTING IS STRONGLY
# RECOMMENDED!! In particular, you should ban access to other machines
# inside the firewall that the firewall machine itself may have access to.
# Otherwise, external users will be able to access any internal hosts that
# the firewall can access. Even if that's what you intend, you should ban
# access to any hosts that you don't explicitly want to expose to outside
# users.
# In addition to the recommended defaults below, add all IP addresses of your
# server machine if you want to protect it like this.
# After you set this, YOU SHOULD TEST to verify that the proxy can't access
# the IP addresses you're banning!
# This feature is simple now but will be more complete in future releases.
# How would you like this to be extended? What would be useful to you?
@BANNED_NETWORKS= ('127.0.0.1', '192.168', '10') ;
# Settings to fine-tune cookie filtering, if cookies are not banned altogether
# (by user checkbox or $REMOVE_COOKIES above).
# Use @ALLOWED_COOKIE_SERVERS and @BANNED_COOKIE_SERVERS to restrict which
# servers can send cookies through this proxy. They work like
# @ALLOWED_SERVERS and @BANNED_SERVERS above, both in how their precedence
# works, and that they're lists of Perl 5 regular expressions. See the
# comments there for details.
# If non-empty, only allow cookies from servers matching one of these patterns.
# Comment this out to allow all cookies (subject to @BANNED_COOKIE_SERVERS).
#@ALLOWED_COOKIE_SERVERS= ('\bslashdot\.org$') ;
# Reject cookies from servers matching these patterns.
@BANNED_COOKIE_SERVERS= (
'\.doubleclick\.net$',
'\.preferences\.com$',
'\.imgis\.com$',
'\.adforce\.com$',
'\.focalink\.com$',
'\.flycast\.com$',
'\.go\.com$',
'\.avenuea\.com$',
'\.linkexchange\.com$',
'\.pathfinder\.com$',
'\.burstnet\.com$',
'\btripod\.com$',
'\bgeocities\.yahoo\.com$',
'\.mediaplex\.com$',
) ;
# Set this to reject cookies returned with images. This actually prevents
# cookies returned with any non-text resource.
$NO_COOKIE_WITH_IMAGE= 1 ;
# Settings to fine-tune script filtering, if scripts are not banned altogether
# (by user checkbox or $REMOVE_SCRIPTS above).
# Use @ALLOWED_SCRIPT_SERVERS and @BANNED_SCRIPT_SERVERS to restrict which
# servers you'll allow scripts from. They work like @ALLOWED_SERVERS and
# @BANNED_SERVERS above, both in how their precedence works, and that
# they're lists of Perl 5 regular expressions. See the comments there for
# details.
@ALLOWED_SCRIPT_SERVERS= () ;
@BANNED_SCRIPT_SERVERS= () ;
# Various options to help filter ads and stop cookie-based privacy invasion.
# These are only effective if $FILTER_ADS is set above.
# @BANNED_IMAGE_URL_PATTERNS uses Perl patterns. If an image's URL
# matches one of the patterns, it will not be downloaded (typically for
# ad-filtering). For more information on Perl regular expressions, see
# the Perl documentation.
# Note that most popup ads will be removed if scripts are removed (see
# $REMOVE_SCRIPTS above).
# If ad-filtering is your primary motive, consider using one of the many
# proxies that specialize in that. The classic is from JunkBusters, at
# http://www.junkbusters.com .
# Reject images whose URL matches any of these patterns. This is just a
# sample list; add more depending on which sites you visit.
@BANNED_IMAGE_URL_PATTERNS= (
'ad\.doubleclick\.net/ad/',
'\b[a-z](\d+)?\.doubleclick\.net(:\d*)?/',
'\.imgis\.com\b',
'\.adforce\.com\b',
'\.avenuea\.com\b',
'\.go\.com(:\d*)?/ad/',
'\.eimg\.com\b',
'\bexcite\.netscape\.com(:\d*)?/.*/promo/',
'/excitenetscapepromos/',
'\.yimg\.com(:\d*)?.*/promo/',
'\bus\.yimg\.com/[a-z]/(\w\w)/\1',
'\bus\.yimg\.com/[a-z]/\d-/',
'\bpromotions\.yahoo\.com(:\d*)?/promotions/',
'\bcnn\.com(:\d*)?/ads/',
'ads\.msn\.com\b',
'\blinkexchange\.com\b',
'\badknowledge\.com\b',
'/SmartBanner/',
'\bdeja\.com/ads/',
'\bimage\.pathfinder\.com/sponsors',
'ads\.tripod\.com',
'ar\.atwola\.com/image/',
'\brealcities\.com/ads/',
'\bnytimes\.com/ad[sx]/',
'\busatoday\.com/sponsors/',
'\busatoday\.com/RealMedia/ads/',
'\bmsads\.net/ads/',
'\batdmt\.com/[a-z]/',
) ;
# If set, replace banned images with 1x1 transparent GIF.
$RETURN_EMPTY_GIF= 1 ;
# If either $INSERT_HTML or $INSERT_FILE is set, then that HTML text or the
# contents of that named file (respectively) will be inserted into any HTML
# page retrieved through this proxy. $INSERT_HTML takes precedence over
# $INSERT_FILE.
# When viewing a page with frames, a new top frame is created and the
# insertions go there.
# NOTE: Any HTML you insert should not have relative URLs in it! The problem
# is that there is no appropriate base URL to resolve them with. So only use
# absolute URLs in your insertion. (If you use relative URLs anyway, then
# a) if $ANONYMIZE_INSERTION is set, they'll be resolved relative to this
# script's URL, which isn't great, or b) if $ANONYMIZE_INSERTION==0,
# they'll be unchanged and the browser will simply resolve them relative
# to the current page, which is usually worse.)
# The frame handling means that it's fairly easy for a surfer to bypass this
# insertion, by pretending in effect to be in a frame. There's not much we
# can do about that, since a page is retrieved the same way regardless of
# whether it's in a frame. This script uses a parameter in the URL to
# communicate to itself between calls, but the user can merely change that
# URL to make the script think it's retrieving a page for a frame. Also,
# many browsers let the user expand a frame's contents into a full window.
# [The warning in earlier versions about setting $INSERT_HTML to '' when using
# mod_perl and $INSERT_FILE no longer applies. It's all handled elsewhere.]
# As with $INSERT_ENTRY_FORM, note that any insertion may throw off any
# precise layout, and the insertion is subject to background colors and
# other page-wide settings.
#$INSERT_HTML= "<h1>This is an inserted header</h1><hr>" ;
#$INSERT_FILE= 'insert_file_name' ;
# If your insertion has links that you want anonymized along with the rest
# of the downloaded HTML, then set this to 1. Otherwise leave it at 0.
$ANONYMIZE_INSERTION= 0 ;
# If there's both a URL entry form and an insertion via $INSERT_HTML or
# $INSERT_FILE on the same page, the entry form normally goes at the top.
# Set this to put it after the other insertion.
$FORM_AFTER_INSERTION= 0 ;
# If the insertion is put in a top frame, then this is how many pixels high
# the frame is. If the default of 80 or 50 pixels is too big or too small
# for your insertion, change this. You can use percentage of screen height
# if you prefer, e.g. "20%". (Unfortunately, you can't just tell the
# browser to "make it as high as it needs to be", but at least the frame
# will be resizable by the user.)
# This affects insertions by $INSERT_ENTRY_FORM, $INSERT_HTML, and $INSERT_FILE.
# The default here usually works for the inserted entry form, which varies in
# size depending on $ALLOW_USER_CONFIG. It also varies by browser.
$INSERTION_FRAME_HEIGHT= $ALLOW_USER_CONFIG ? 80 : 50 ;
# Set this to 1 if the script is running on an SSL server, i.e. it is
# accessed through a URL starting with "https:"; set this to 0 if it's not
# running on an SSL server. This is needed to know how to route URLs back
# through the proxy. Regrettably, standard CGI does not yet provide a way
# for scripts to determine this without help.
# If this variable is set to '' or left undefined, then the program will
# guess: SSL is assumed if and only if SERVER_PORT is 443. This fails
# if SSL is used on another port, or (less commonly) a non-SSL server uses
# port 443, but usually it works. Besides being a good default, it lets
# you install the script where both a secure server and a non-secure server
# will serve it, and it will work correctly through either server.
# This has nothing to do with retrieving pages that are on SSL servers.
$RUNNING_ON_SSL_SERVER= '' ;
# If you're running this on a Windows machine, set this. Windows lacks some
# features that exist on Unix, and there are bugs in the IIS server. If
# this variable is set, the script tries to work around these limitations.
$RUNNING_ON_WINDOWS= 0 ;
# If your server doesn't support NPH scripts, then set this variable to true
# and try running the script as a normal non-NPH script. HOWEVER, this
# won't work as well as running it as NPH; there may be bugs, maybe some
# privacy holes, and results may not be consistent. It's a hack.
# Try to install the script as NPH before you use this option, because
# this may not work. NPH is supported on almost all servers, and it's
# usually very easy to install a script as NPH (on Apache, for example,
# you just need to name the script something starting with "nph-").
# One example of a problem is that Location: headers may get messed up,
# because they mean different things in an NPH and a non-NPH script.
# You have been warned.
# For this to work, your server MUST support the "Status:" CGI response
# header.
$NOT_RUNNING_AS_NPH= 0 ;
# Set HTTP proxy if needed. Also see $USE_PASSIVE_FTP_MODE below.
# These examples are appropriate environment variables to default to, if
# they are available.
# Note that earlier versions of this script used the environment variables
# directly, instead of the $HTTP_PROXY and $NO_PROXY variables we use now.
#$HTTP_PROXY= $ENV{'http_proxy'} ;
#$NO_PROXY= $ENV{'no_proxy'} ;
# If your HTTP proxy requires authentication, this script supports it in a
# limited way: you can have a single username/password pair to authenticate
# with, regardless of realm. In other words, multiple realms aren't
# supported for proxy authentication (though they are for normal server
# authentication, elsewhere).
# Set $PROXY_AUTH either in the form of "username:password", or to the actual
# base64 string that gets sent in the Proxy-Authorization: header.
#$PROXY_AUTH= 'Aladdin:open sesame' ;
# Here's an experimental feature that may or may not be useful. It's trivial
# to add, so I added it. It was inspired in part by Mike Reiter's and Avi
# Rubin's "Crowds", at http://www.research.att.com/projects/crowds/ .
# Let me know if you find a use for it.
# The idea is that you have a number of mutually-trusting, cooperating
# proxies that you list in @PROXY_GROUP(). If that is set, then instead
# of rerouting all URLs back through this proxy, the script will choose
# one of these proxies at random to reroute all URLs through, for each
# run. This could be used to balance the load among several proxies, for
# example. Under certain conditions it could conceivably help privacy by
# making it harder to track a user's session, but under certain other
# conditions it could make it easier, depending on how many people,
# proxies, and proxy servers are involved. For each page, both its
# included images and followed links will go through the same proxy, so a
# clever target server could determine which proxy servers are in each
# group.
# proxy_encode() and proxy_decode() must be the same for all proxies in the
# group. Same goes for pack_flags() and unpack_flags() if you modified them,
# and probably certain other routines and configuration options.
# Cookies and Basic authentication can't be supported with this, sorry, since
# cookies can only be sent back to the proxy that created them.
# Set this to a list of absolute URLs of proxies, ending with "nph-proxy.cgi"
# (or whatever you named the script). Be sure to include the URL of this
# proxy, or it will never redirect back through here. Each proxy in the
# group should have the same @PROXY_GROUP.
# Alternately, you could set each proxy's @PROXY_GROUP differently for more
# creative configuration, such as to balance the load unevenly, or to send
# users through a "round-robin" cycle of proxies.
@PROXY_GROUP= ('http://www.rcsi-usa.com/icronticshard/nph-proxy.cgi') ;
# Normally, your browser stores all pages you download in your computer's
# hard drive and memory, in the "cache". This saves a lot of time and
# bandwidth the next time you view the page (especially with images, which
# are bigger and may be shared among several pages). However, in some
# situations you may not want the pages you've visited to be stored. If
# $MINIMIZE_CACHING is set, then this proxy will try its best to prevent any
# caching of anything retrieved through it.
# NOTE: This cannot guarantee that no caching will happen. All we can do is
# instruct the browser not to cache anything. A faulty or malicious browser
# could cache things anyway if it chose to.
# NOTE: This has nothing to do with your browser's "history list", which may
# also store a list of URLs you've visited.
# NOTE: If you use this, you will use a lot more bandwidth than without it,
# and pages will seemingly load slower, because if a browser can't cache
# anything locally then it has to load everything across the network every
# time it needs something.
$MINIMIZE_CACHING= 0 ;
# Normally, each cookie includes an expiration time/date, and the cookie stays
# in effect until then, even after you exit your browser and restart it
# (which normally means the cookie is stored on the hard drive). Any cookie
# that has no explicit expiration date is a "session cookie", and stays in
# effect only as long as the browser is running, and presumably is forgotten
# after that. If you set $SESSION_COOKIES_ONLY=1, then *all* cookies that
# pass through this proxy will be changed to session cookies. This is useful
# at a public terminal, or wherever you don't want your cookies to remain
# after you exit the browser.
# NOTE: The clock on the server where this runs must be correct for this
# option to work right! It doesn't have to be exact, but don't have it off
# by hours or anything like that. The problem is that we must not alter any
# cookies set to expire in the past, because that's how sites delete cookies.
# If a cookie is being deleted, we DON'T want to turn it into a session
# cookie. So this script will not alter any cookies set to expire before the
# current time according to the system clock.
$SESSION_COOKIES_ONLY= 0 ;
# Set $USER_AGENT to something generic like this if you want to be extra
# careful. Conceivably, revealing which browser you're using may be a
# slight privacy or security risk.
# However, note that some URLs serve different pages depending on which
# browser you're using, so some pages will change if you set this.
# This defaults to the user's HTTP_USER_AGENT.
#$USER_AGENT= 'Mozilla/4.05 [en] (X11; I; Linux 2.0.34 i586)' ;
# FTP transfers can happen in either passive or non-passive mode. Passive
# mode works better if the client (this script) is behind a firewall. Some
# people consider passive mode to be more secure, too. But in certain
# network configurations, if this script has trouble connecting to FTP
# servers, you can turn this off to try non-passive mode.
# See http://cr.yp.to/ftp/security.html for a discussion of security issues
# regarding passive and non-passive FTP.
$USE_PASSIVE_FTP_MODE= 1 ;
# Unlike a normal browser which can keep an FTP session open between requests,
# this script must make a new connection with each request. Thus, the
# FTP welcome message (e.g. the README file) will be received every time;
# there's no way for this script to know if you've been here before. Set
# $SHOW_FTP_WELCOME to true to always show the welcome message, or false
# to never show it.
$SHOW_FTP_WELCOME= 1 ;
# Apparently, some censoring filters search outgoing request URIs, but not
# POST request bodies. Set this to make the initial input form submit
# using POST instead of GET.
$USE_POST_ON_START= 1 ;
# Apparently, some censoring filters look at titles on HTML pages. Set this
# to remove HTML page titles.
$REMOVE_TITLES= 0 ;
# If set, this option prevents a user from calling the proxy through the
# proxy itself, i.e. looping. It's normally a mistake on the user's part,
# and a waste of resources.
# This isn't foolproof; it just catches the obvious mistakes. It's probably
# pretty easy for a malicious user to make the script call itself, or s/he
# can always use two proxies to call each other in a loop. This doesn't
# account for IP addresses or multiple hostnames for the same server.
$NO_BROWSE_THROUGH_SELF= 0 ;
# Set this to leave out the "Restart" link at the bottom of error pages, etc.
# In some situations this could make it harder for search engines to find the
# start page.
$NO_LINK_TO_START= 0 ;
# For the obscure case when a POST must be repeated because of user
# authentication, this is the max size of the request body that this
# script will store locally. If CONTENT_LENGTH is bigger than this,
# the body's not saved at all-- the first POST will be correct, but
# the second will not happen at all (since a partial POST is worse than
# nothing).
$MAX_REQUEST_SIZE= 4194304 ; # that's 4 Meg to you and me
# Normally, if a user tries to access a banned server or use an unsupported
# scheme (protocol), this script will alert the user with a warning page, and
# either allow the user to click through to the URL unprotected (i.e. without
# using the proxy), or ban access altogether. However, in some VPN-like
# installations, it may more desirable to let users follow links from
# protected pages (e.g. within an intranet) that lead to unprotected,
# unproxified pages (e.g. pages outside of the intranet), with no breaks in
# the browsing experience. (This example assumes the proxy owner intends it
# to be used for browsing only the intranet and not the Internet at large.)
# Set $QUIETLY_EXIT_PROXY_SESSION to skip any warning message and let the
# user surf directly to unproxified pages from proxified pages. Note that
# this somewhat changes the meaning of @ALLOWED_SERVERS and @BANNED_SERVERS--
# they're not allowed or banned per se, it's just whether this proxy is
# willing to handle their traffic. @BANNED_NETWORKS is unaffected, however,
# since the IP ranges it contains often make no sense outside of the LAN.
# WARNING: DO *NOT* SET THIS FLAG IF ANONYMITY IS IMPORTANT AT ALL!!! IT IS
# NOT MEANT FOR THAT KIND OF INSTALLATION. IF THIS IS SET, THEN USERS WILL
# SURF INTO UNPROXIFIED, UNANONYMIZED PAGES WITH NO WARNING, AND THEIR
# PRIVACY WILL BE COMPROMISED; THEY MAY NOT EVEN NOTICE FOR A LONG TIME.
# THIS IS EXACTLY WHAT ANONYMIZING PROXIES ARE CREATED TO AVOID.
$QUIETLY_EXIT_PROXY_SESSION= 0 ;
# WARNING:
# EXCEPT UNDER RARE CIRCUMSTANCES, ANY PROXY WHICH HANDLES SSL REQUESTS
# SHOULD *ONLY* RUN ON AN SSL SERVER!!! OTHERWISE, YOU'RE RETRIEVING
# PROTECTED PAGES BUT SENDING THEM BACK TO THE USER UNPROTECTED. THIS
# COULD EXPOSE ANY INFORMATION IN THOSE PAGES, OR ANY INFORMATION THE
# USER SUBMITS TO A SECURE SERVER. THIS COULD HAVE SERIOUS CONSEQUENCES,
# EVEN LEGAL CONSEQUENCES. IT UNDERMINES THE WHOLE PURPOSE OF SECURE
# SERVERS.
# THE *ONLY* EXCEPTION IS WHEN YOU HAVE *COMPLETE* TRUST OF THE LINK
# BETWEEN THE BROWSER AND THE SERVER THAT RUNS THE SSL-HANDLING PROXY,
# SUCH AS ON A CLOSED LAN, OR IF THE PROXY RUNS ON THE SAME MACHINE AS
# THE BROWSER.
# IF YOU ARE ABSOLUTELY SURE THAT YOU YOU TRUST THE USER-TO-PROXY LINK, YOU
# CAN OVERRIDE THE AUTOMATIC SECURITY MEASURE BY SETTING THE FLAG BELOW.
# CONSIDER THE CONSEQUENCES VERY CAREFULLY BEFORE YOU RUN THIS SSL-ACCESSING
# PROXY ON AN INSECURE SERVER!!!
$OVERRIDE_SECURITY= 0 ;
# Stuff below here you probably shouldn't modify unless you're messing with
# the code.
# The framework is in place to modify script content to pass back through the
# proxy, though the actual code that modifies a single script block of a
# given type are not done. If you want to, say, modify JavaScript in
# certain ways that work for your purpose, then see the routine
# proxify_block(). If you set this $PROXIFY_SCRIPTS flag to true, then
# proxify_block() will be called for every piece of script that comes
# through this proxy.
# So, to modify script content like this: a) set this flag to true, and b) go
# write some code in proxify_block() that modifies the script content the
# way you want. You probably want to use the routine full_url(); go read
# what it does. Also see @TYPES_TO_HANDLE and @SCRIPT_MIME_TYPES below.
# Don't set this unless you actually do that programming. Without any added
# code, it won't do anything but slow down the program-- dealing with the
# script-modifying framework takes longer than merely removing scripts, and
# both take a lot longer than leaving scripts intact.
# Limited testing shows this adds 20-30% to the running time for script-heavy
# sites, and very little for script-free sites. However, this number varies
# greatly from page to page. This is only the overhead involved in
# separating out the script content to call proxify_block(); this does not
# include anything that is actually done in that routine.
# NOTE: This is still experimental. The framework should work fine, but what
# goes in proxify_block() is up to you.
# NOTE TOO: You will almost certainly not be able to anonymize JavaScript
# completely. It's not hard to do "mostly", but it turns out to be a very
# complex problem to do completely; there will almost certainly be exploits
# that a malicious server can use to get a user's identity. The purpose of
# this feature is more to allow scripts to function through the proxy, than
# to provide bulletproof anonymity. You may be able to get better anonymity
# if you remove certain script statements altogether rather than try to
# modify them, and accept that doing so may break a few scripts.
# The best advice remains: FOR BEST ANONYMITY, BROWSE WITH SCRIPTS TURNED OFF.
$PROXIFY_SCRIPTS= 0 ;
# Comments may contain HTML in them, which shouldn't be rendered but may be
# relevant in some other way. Set this flag if you want the contents of
# comments to be proxified like the rest of the page, i.e. proxify URLs,
# stylesheets, scripts, etc.
$PROXIFY_COMMENTS= 0 ;
# This lists all MIME types that could identify a script, and which will be
# filtered out as well as possible if removing scripts: HTTP responses with
# Content-Type: set to one of these will be nixed, certain HTML which links
# to one of these types will be removed, style sheets with a type here will
# be removed, and other odds and ends.
# These are used in matching, so can't contain special regex characters.
# This list is also used for the the experimental $PROXIFY_SCRIPTS function.
# This list contains all script MIME types I know of, but I can't guarantee
# it's a complete list. It's largely taken from the examples at
# http://www.robinlionheart.com/stds/html4/scripts.html
# That page describes only the first four below as valid.
# The page at ftp://ftp.isi.edu/in-notes/iana/assignments/media-types/media-types
# lists all media (MIME) types registered with the IANA, but unfortunately
# many script types (especially proprietary ones) have not registered with
# them, and that list doesn't specify which types are script content anyway.
@SCRIPT_MIME_TYPES= ('application/x-javascript', 'application/x-ecmascript',
'application/x-vbscript', 'application/x-perlscript',
'application/javascript', 'application/ecmascript',
'text/javascript', 'text/ecmascript', 'text/jscript',
'text/livescript', 'text/vbscript', 'text/vbs',
'text/perlscript', 'text/tcl',
'text/x-scriptlet', 'text/scriptlet',
'application/hta',
) ;
# All MIME types in @SCRIPT_MIME_TYPES and @OTHER_TYPES_TO_REGISTER will be
# "registered". Registration helps the script remember which MIME type is
# expected by a page when downloading embedded URLs, e.g. style sheets. Any
# MIME types that need special treatment should be listed here if they're not
# already in @SCRIPT_MIME_TYPES.
# If you write a handler for a new MIME type in proxify_block(), and that type
# isn't already listed in @SCRIPT_MIME_TYPES, then add it here.
@OTHER_TYPES_TO_REGISTER= ('text/css') ;
# These are MIME types that we *may* try to rewrite in proxify_block(), e.g.
# to send all URLs back through this script. If a type isn't on this list,
# then we know for certain it should be sent back to the user unchanged,
# which saves time.
# If you write a handler for a new MIME type in proxify_block(), then add the
# type here.
# NOT all the types here are actually supported at this time!
# text/html is not on this list because currently it's handled specially.
@TYPES_TO_HANDLE= ('text/css',
'application/x-javascript', 'application/x-ecmascript',
'application/javascript', 'application/ecmascript',
'text/javascript', 'text/ecmascript',
'text/livescript', 'text/jscript',
) ;
# This is a list of all file extensions that will be disallowed if
# $TEXT_ONLY is set. It's an inexact science. If you want to ban
# other file extensions, you can add more to this list. Note that
# removing extensions from this list won't necessarily allow those
# files through, since there are other ways $TEXT_ONLY is implemented,
# such as only allowing MIME types of text/* .
# The format of this list is one long string, with the extensions
# separated by "|". This is because the string is actually used as
# a regular expression. Don't worry if you don't know what that means.
# Extensions are roughly taken from Netscape's "Helper Preferences" screen
# (but that was in 1996). A more complete list might be made from a
# mime.types file.
$NON_TEXT_EXTENSIONS=
'gif|jpeg|jpe|jpg|tiff|tif|png|bmp|xbm' # images
. '|mp2|mp3|wav|aif|aiff|au|snd' # audios
. '|avi|qt|mov|mpeg|mpg|mpe' # videos
. '|gz|Z|exe|gtar|tar|zip|sit|hqx|pdf' # applications
. '|ram|rm|ra|swf' ; # others
# This is now set directly in footer(), the only place it's used.
# $PROXY_VERSION= '2.0' ;
#--------------------------------------------------------------------------
# End of normal user configuration.
# Now, set or adjust all globals that remain constant for all runs.
#--------------------------------------------------------------------------
# First, set various constants.
# These are used in rfc1123_date() and date_is_after().
@MONTH= qw(Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec) ;
@WEEKDAY= qw(Sun Mon Tue Wed Thu Fri Sat Sun) ;
%UN_MONTH= map { lc($MONTH[$_]), $_ } 0..$#MONTH ; # look up by month name
# Next, make copies of any constant environment variables, and fix as needed.
# SERVER_PORT and SCRIPT_NAME will be constant, and are used in several places.
# Besides, we need SCRIPT_NAME fixed before setting $THIS_SCRIPT_URL.
# SCRIPT_NAME should have a leading slash, but the old CGI "standard" from
# NCSA was unclear on that, so some servers didn't give it a leading
# slash. Here we ensure it has a leading slash.
$ENV_SERVER_PORT= $ENV{'SERVER_PORT'} ;
$ENV_SCRIPT_NAME= $ENV{'SCRIPT_NAME'} ;
$ENV_SCRIPT_NAME=~ s#^/?#/# ;
# Next, adjust config variables as needed, or create any needed constants from
# them.
# Create @BANNED_NETWORK_ADDRS from @BANNED_NETWORKS.
# No error checking; assumes the proxy owner set @BANNED_NETWORKS correctly.
@BANNED_NETWORK_ADDRS= () ;
for (@BANNED_NETWORKS) {
push(@BANNED_NETWORK_ADDRS, pack('C*', /(\d+)/g)) ;
}
# If $RUNNING_ON_SSL_SERVER is '', then guess based on SERVER_PORT.
$RUNNING_ON_SSL_SERVER= ($ENV_SERVER_PORT==443) if $RUNNING_ON_SSL_SERVER eq '' ;
# Base64-encode $PROXY_AUTH if it's not done so already.
$PROXY_AUTH= &base64($PROXY_AUTH) if $PROXY_AUTH=~ /:/ ;
# Guarantee URLs in @PROXY_GROUP have no trailing slash.
foreach (@PROXY_GROUP) { s#/$## }
# Create $NO_CACHE_HEADERS depending on $MINIMIZE_CACHING setting; it is placed
# in every response. Note that in all the "here documents" we use for error
# messages, it has to go on the same line as another header to avoid a blank
# line in the response.
$NO_CACHE_HEADERS= $MINIMIZE_CACHING
? "Cache-Control: no-cache\015\012Pragma: no-cache\015\012"
: '' ;
# Canonicalize all MIME types to lowercase.
for (@SCRIPT_MIME_TYPES) { $_= lc }
for (@OTHER_TYPES_TO_REGISTER) { $_= lc }
# Create @ALL_TYPES and %MIME_TYPE_ID, which are inverses of each other.
# This is useful e.g. to identify the MIME type expected in a given download,