Skip to content
This repository

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse code

- Adding a new class to fetch remote files through PHP.

  • Loading branch information...
commit ef5b38b267447e3be499915a3552ff8fe4315f17 1 parent 276a8df
authored November 27, 2003

Showing 1 changed file with 1,208 additions and 0 deletions. Show diff stats Hide diff stats

  1. 1,208  lib/snoopy/Snoopy.class.inc
1,208  lib/snoopy/Snoopy.class.inc
... ...
@@ -0,0 +1,1208 @@
  1
+<?php
  2
+
  3
+/*************************************************
  4
+
  5
+Snoopy - the PHP net client
  6
+Author: Monte Ohrt <monte@ispi.net>
  7
+Copyright (c): 1999-2000 ispi, all rights reserved
  8
+Version: 1.0
  9
+
  10
+ * This library is free software; you can redistribute it and/or
  11
+ * modify it under the terms of the GNU Lesser General Public
  12
+ * License as published by the Free Software Foundation; either
  13
+ * version 2.1 of the License, or (at your option) any later version.
  14
+ *
  15
+ * This library is distributed in the hope that it will be useful,
  16
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
  17
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  18
+ * Lesser General Public License for more details.
  19
+ *
  20
+ * You should have received a copy of the GNU Lesser General Public
  21
+ * License along with this library; if not, write to the Free Software
  22
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  23
+
  24
+You may contact the author of Snoopy by e-mail at:
  25
+monte@ispi.net
  26
+
  27
+Or, write to:
  28
+Monte Ohrt
  29
+CTO, ispi
  30
+237 S. 70th suite 220
  31
+Lincoln, NE 68510
  32
+
  33
+The latest version of Snoopy can be obtained from:
  34
+http://snoopy.sourceforge.com
  35
+
  36
+*************************************************/
  37
+
  38
+class Snoopy
  39
+{
  40
+	/**** Public variables ****/
  41
+	
  42
+	/* user definable vars */
  43
+
  44
+	var $host			=	"www.moodle.org";		// host name we are connecting to
  45
+	var $port			=	80;					// port we are connecting to
  46
+	var $proxy_host		=	"";					// proxy host to use
  47
+	var $proxy_port		=	"";					// proxy port to use
  48
+	var $agent			=	"Snoopy v1.0";		// agent we masquerade as
  49
+	var	$referer		=	"";					// referer info to pass
  50
+	var $cookies		=	array();			// array of cookies to pass
  51
+												// $cookies["username"]="joe";
  52
+	var	$rawheaders		=	array();			// array of raw headers to send
  53
+												// $rawheaders["Content-type"]="text/html";
  54
+
  55
+	var $maxredirs		=	5;					// http redirection depth maximum. 0 = disallow
  56
+	var $lastredirectaddr	=	"";				// contains address of last redirected address
  57
+	var	$offsiteok		=	true;				// allows redirection off-site
  58
+	var $maxframes		=	0;					// frame content depth maximum. 0 = disallow
  59
+	var $expandlinks	=	true;				// expand links to fully qualified URLs.
  60
+												// this only applies to fetchlinks()
  61
+												// or submitlinks()
  62
+	var $passcookies	=	true;				// pass set cookies back through redirects
  63
+												// NOTE: this currently does not respect
  64
+												// dates, domains or paths.
  65
+	
  66
+	var	$user			=	"";					// user for http authentication
  67
+	var	$pass			=	"";					// password for http authentication
  68
+	
  69
+	// http accept types
  70
+	var $accept			=	"image/gif, image/x-xbitmap, image/jpeg, image/pjpeg, */*";
  71
+	
  72
+	var $results		=	"";					// where the content is put
  73
+		
  74
+	var $error			=	"";					// error messages sent here
  75
+	var	$response_code	=	"";					// response code returned from server
  76
+	var	$headers		=	array();			// headers returned from server sent here
  77
+	var	$maxlength		=	500000;				// max return data length (body)
  78
+	var $read_timeout	=	0;					// timeout on read operations, in seconds
  79
+												// supported only since PHP 4 Beta 4
  80
+												// set to 0 to disallow timeouts
  81
+	var $timed_out		=	false;				// if a read operation timed out
  82
+	var	$status			=	0;					// http request status
  83
+	
  84
+	var	$curl_path		=	"/usr/local/bin/curl";
  85
+												// Snoopy will use cURL for fetching
  86
+												// SSL content if a full system path to
  87
+												// the cURL binary is supplied here.
  88
+												// set to false if you do not have
  89
+												// cURL installed. See http://curl.haxx.se
  90
+												// for details on installing cURL.
  91
+												// Snoopy does *not* use the cURL
  92
+												// library functions built into php,
  93
+												// as these functions are not stable
  94
+												// as of this Snoopy release.
  95
+	
  96
+	// send Accept-encoding: gzip?
  97
+	var $use_gzip		= true;	
  98
+	
  99
+	/**** Private variables ****/	
  100
+	
  101
+	var	$_maxlinelen	=	4096;				// max line length (headers)
  102
+	
  103
+	var $_httpmethod	=	"GET";				// default http request method
  104
+	var $_httpversion	=	"HTTP/1.0";			// default http request version
  105
+	var $_submit_method	=	"POST";				// default submit method
  106
+	var $_submit_type	=	"application/x-www-form-urlencoded";	// default submit type
  107
+	var $_mime_boundary	=   "";					// MIME boundary for multipart/form-data submit type
  108
+	var $_redirectaddr	=	false;				// will be set if page fetched is a redirect
  109
+	var $_redirectdepth	=	0;					// increments on an http redirect
  110
+	var $_frameurls		= 	array();			// frame src urls
  111
+	var $_framedepth	=	0;					// increments on frame depth
  112
+	
  113
+	var $_isproxy		=	false;				// set if using a proxy server
  114
+	var $_fp_timeout	=	30;					// timeout for socket connection
  115
+
  116
+/*======================================================================*\
  117
+	Function:	fetch
  118
+	Purpose:	fetch the contents of a web page
  119
+				(and possibly other protocols in the
  120
+				future like ftp, nntp, gopher, etc.)
  121
+	Input:		$URI	the location of the page to fetch
  122
+	Output:		$this->results	the output text from the fetch
  123
+\*======================================================================*/
  124
+
  125
+	function fetch($URI)
  126
+	{
  127
+	
  128
+		//preg_match("|^([^:]+)://([^:/]+)(:[\d]+)*(.*)|",$URI,$URI_PARTS);
  129
+		$URI_PARTS = parse_url($URI);
  130
+		if (!empty($URI_PARTS["user"]))
  131
+			$this->user = $URI_PARTS["user"];
  132
+		if (!empty($URI_PARTS["pass"]))
  133
+			$this->pass = $URI_PARTS["pass"];
  134
+				
  135
+		switch($URI_PARTS["scheme"])
  136
+		{
  137
+			case "http":
  138
+				$this->host = $URI_PARTS["host"];
  139
+				if(!empty($URI_PARTS["port"]))
  140
+					$this->port = $URI_PARTS["port"];
  141
+				if($this->_connect($fp))
  142
+				{
  143
+					if($this->_isproxy)
  144
+					{
  145
+						// using proxy, send entire URI
  146
+						$this->_httprequest($URI,$fp,$URI,$this->_httpmethod);
  147
+					}
  148
+					else
  149
+					{
  150
+						$path = $URI_PARTS["path"].(isset($URI_PARTS["query"]) ? "?".$URI_PARTS["query"] : "");
  151
+						// no proxy, send only the path
  152
+						$this->_httprequest($path, $fp, $URI, $this->_httpmethod);
  153
+					}
  154
+					
  155
+					$this->_disconnect($fp);
  156
+
  157
+					if($this->_redirectaddr)
  158
+					{
  159
+						/* url was redirected, check if we've hit the max depth */
  160
+						if($this->maxredirs > $this->_redirectdepth)
  161
+						{
  162
+							// only follow redirect if it's on this site, or offsiteok is true
  163
+							if(preg_match("|^http://".preg_quote($this->host)."|i",$this->_redirectaddr) || $this->offsiteok)
  164
+							{
  165
+								/* follow the redirect */
  166
+								$this->_redirectdepth++;
  167
+								$this->lastredirectaddr=$this->_redirectaddr;
  168
+								$this->fetch($this->_redirectaddr);
  169
+							}
  170
+						}
  171
+					}
  172
+
  173
+					if($this->_framedepth < $this->maxframes && count($this->_frameurls) > 0)
  174
+					{
  175
+						$frameurls = $this->_frameurls;
  176
+						$this->_frameurls = array();
  177
+						
  178
+						while(list(,$frameurl) = each($frameurls))
  179
+						{
  180
+							if($this->_framedepth < $this->maxframes)
  181
+							{
  182
+								$this->fetch($frameurl);
  183
+								$this->_framedepth++;
  184
+							}
  185
+							else
  186
+								break;
  187
+						}
  188
+					}					
  189
+				}
  190
+				else
  191
+				{
  192
+					return false;
  193
+				}
  194
+				return true;					
  195
+				break;
  196
+			case "https":
  197
+				if(!$this->curl_path || (!is_executable($this->curl_path)))
  198
+					return false;
  199
+				$this->host = $URI_PARTS["host"];
  200
+				if(!empty($URI_PARTS["port"]))
  201
+					$this->port = $URI_PARTS["port"];
  202
+				if($this->_isproxy)
  203
+				{
  204
+					// using proxy, send entire URI
  205
+					$this->_httpsrequest($URI,$URI,$this->_httpmethod);
  206
+				}
  207
+				else
  208
+				{
  209
+					$path = $URI_PARTS["path"].($URI_PARTS["query"] ? "?".$URI_PARTS["query"] : "");
  210
+					// no proxy, send only the path
  211
+					$this->_httpsrequest($path, $URI, $this->_httpmethod);
  212
+				}
  213
+
  214
+				if($this->_redirectaddr)
  215
+				{
  216
+					/* url was redirected, check if we've hit the max depth */
  217
+					if($this->maxredirs > $this->_redirectdepth)
  218
+					{
  219
+						// only follow redirect if it's on this site, or offsiteok is true
  220
+						if(preg_match("|^http://".preg_quote($this->host)."|i",$this->_redirectaddr) || $this->offsiteok)
  221
+						{
  222
+							/* follow the redirect */
  223
+							$this->_redirectdepth++;
  224
+							$this->lastredirectaddr=$this->_redirectaddr;
  225
+							$this->fetch($this->_redirectaddr);
  226
+						}
  227
+					}
  228
+				}
  229
+
  230
+				if($this->_framedepth < $this->maxframes && count($this->_frameurls) > 0)
  231
+				{
  232
+					$frameurls = $this->_frameurls;
  233
+					$this->_frameurls = array();
  234
+
  235
+					while(list(,$frameurl) = each($frameurls))
  236
+					{
  237
+						if($this->_framedepth < $this->maxframes)
  238
+						{
  239
+							$this->fetch($frameurl);
  240
+							$this->_framedepth++;
  241
+						}
  242
+						else
  243
+							break;
  244
+					}
  245
+				}					
  246
+				return true;					
  247
+				break;
  248
+			default:
  249
+				// not a valid protocol
  250
+				$this->error	=	'Invalid protocol "'.$URI_PARTS["scheme"].'"\n';
  251
+				return false;
  252
+				break;
  253
+		}		
  254
+		return true;
  255
+	}
  256
+
  257
+/*======================================================================*\
  258
+	Function:	submit
  259
+	Purpose:	submit an http form
  260
+	Input:		$URI	the location to post the data
  261
+				$formvars	the formvars to use.
  262
+					format: $formvars["var"] = "val";
  263
+	Output:		$this->results	the text output from the post
  264
+\*======================================================================*/
  265
+
  266
+	function submit($URI, $formvars="", $formfiles="")
  267
+	{
  268
+		unset($postdata);
  269
+		
  270
+		$postdata = $this->_prepare_post_body($formvars, $formfiles);
  271
+			
  272
+		$URI_PARTS = parse_url($URI);
  273
+		if (!empty($URI_PARTS["user"]))
  274
+			$this->user = $URI_PARTS["user"];
  275
+		if (!empty($URI_PARTS["pass"]))
  276
+			$this->pass = $URI_PARTS["pass"];
  277
+				
  278
+		switch($URI_PARTS["scheme"])
  279
+		{
  280
+			case "http":
  281
+				$this->host = $URI_PARTS["host"];
  282
+				if(!empty($URI_PARTS["port"]))
  283
+					$this->port = $URI_PARTS["port"];
  284
+				if($this->_connect($fp))
  285
+				{
  286
+					if($this->_isproxy)
  287
+					{
  288
+						// using proxy, send entire URI
  289
+						$this->_httprequest($URI,$fp,$URI,$this->_submit_method,$this->_submit_type,$postdata);
  290
+					}
  291
+					else
  292
+					{
  293
+						$path = $URI_PARTS["path"].($URI_PARTS["query"] ? "?".$URI_PARTS["query"] : "");
  294
+						// no proxy, send only the path
  295
+						$this->_httprequest($path, $fp, $URI, $this->_submit_method, $this->_submit_type, $postdata);
  296
+					}
  297
+					
  298
+					$this->_disconnect($fp);
  299
+
  300
+					if($this->_redirectaddr)
  301
+					{
  302
+						/* url was redirected, check if we've hit the max depth */
  303
+						if($this->maxredirs > $this->_redirectdepth)
  304
+						{						
  305
+							if(!preg_match("|^".$URI_PARTS["scheme"]."://|", $this->_redirectaddr))
  306
+								$this->_redirectaddr = $this->_expandlinks($this->_redirectaddr,$URI_PARTS["scheme"]."://".$URI_PARTS["host"]);						
  307
+							
  308
+							// only follow redirect if it's on this site, or offsiteok is true
  309
+							if(preg_match("|^http://".preg_quote($this->host)."|i",$this->_redirectaddr) || $this->offsiteok)
  310
+							{
  311
+								/* follow the redirect */
  312
+								$this->_redirectdepth++;
  313
+								$this->lastredirectaddr=$this->_redirectaddr;
  314
+								$this->submit($this->_redirectaddr,$formvars, $formfiles);
  315
+							}
  316
+						}
  317
+					}
  318
+
  319
+					if($this->_framedepth < $this->maxframes && count($this->_frameurls) > 0)
  320
+					{
  321
+						$frameurls = $this->_frameurls;
  322
+						$this->_frameurls = array();
  323
+						
  324
+						while(list(,$frameurl) = each($frameurls))
  325
+						{														
  326
+							if($this->_framedepth < $this->maxframes)
  327
+							{
  328
+								$this->fetch($frameurl);
  329
+								$this->_framedepth++;
  330
+							}
  331
+							else
  332
+								break;
  333
+						}
  334
+					}					
  335
+					
  336
+				}
  337
+				else
  338
+				{
  339
+					return false;
  340
+				}
  341
+				return true;					
  342
+				break;
  343
+			case "https":
  344
+				if(!$this->curl_path || (!is_executable($this->curl_path)))
  345
+					return false;
  346
+				$this->host = $URI_PARTS["host"];
  347
+				if(!empty($URI_PARTS["port"]))
  348
+					$this->port = $URI_PARTS["port"];
  349
+				if($this->_isproxy)
  350
+				{
  351
+					// using proxy, send entire URI
  352
+					$this->_httpsrequest($URI, $URI, $this->_submit_method, $this->_submit_type, $postdata);
  353
+				}
  354
+				else
  355
+				{
  356
+					$path = $URI_PARTS["path"].($URI_PARTS["query"] ? "?".$URI_PARTS["query"] : "");
  357
+					// no proxy, send only the path
  358
+					$this->_httpsrequest($path, $URI, $this->_submit_method, $this->_submit_type, $postdata);
  359
+				}
  360
+
  361
+				if($this->_redirectaddr)
  362
+				{
  363
+					/* url was redirected, check if we've hit the max depth */
  364
+					if($this->maxredirs > $this->_redirectdepth)
  365
+					{						
  366
+						if(!preg_match("|^".$URI_PARTS["scheme"]."://|", $this->_redirectaddr))
  367
+							$this->_redirectaddr = $this->_expandlinks($this->_redirectaddr,$URI_PARTS["scheme"]."://".$URI_PARTS["host"]);						
  368
+
  369
+						// only follow redirect if it's on this site, or offsiteok is true
  370
+						if(preg_match("|^http://".preg_quote($this->host)."|i",$this->_redirectaddr) || $this->offsiteok)
  371
+						{
  372
+							/* follow the redirect */
  373
+							$this->_redirectdepth++;
  374
+							$this->lastredirectaddr=$this->_redirectaddr;
  375
+							$this->submit($this->_redirectaddr,$formvars, $formfiles);
  376
+						}
  377
+					}
  378
+				}
  379
+
  380
+				if($this->_framedepth < $this->maxframes && count($this->_frameurls) > 0)
  381
+				{
  382
+					$frameurls = $this->_frameurls;
  383
+					$this->_frameurls = array();
  384
+
  385
+					while(list(,$frameurl) = each($frameurls))
  386
+					{														
  387
+						if($this->_framedepth < $this->maxframes)
  388
+						{
  389
+							$this->fetch($frameurl);
  390
+							$this->_framedepth++;
  391
+						}
  392
+						else
  393
+							break;
  394
+					}
  395
+				}					
  396
+				return true;					
  397
+				break;
  398
+				
  399
+			default:
  400
+				// not a valid protocol
  401
+				$this->error	=	'Invalid protocol "'.$URI_PARTS["scheme"].'"\n';
  402
+				return false;
  403
+				break;
  404
+		}		
  405
+		return true;
  406
+	}
  407
+
  408
+/*======================================================================*\
  409
+	Function:	fetchlinks
  410
+	Purpose:	fetch the links from a web page
  411
+	Input:		$URI	where you are fetching from
  412
+	Output:		$this->results	an array of the URLs
  413
+\*======================================================================*/
  414
+
  415
+	function fetchlinks($URI)
  416
+	{
  417
+		if ($this->fetch($URI))
  418
+		{			
  419
+
  420
+			if(is_array($this->results))
  421
+			{
  422
+				for($x=0;$x<count($this->results);$x++)
  423
+					$this->results[$x] = $this->_striplinks($this->results[$x]);
  424
+			}
  425
+			else
  426
+				$this->results = $this->_striplinks($this->results);
  427
+
  428
+			if($this->expandlinks)
  429
+				$this->results = $this->_expandlinks($this->results, $URI);
  430
+			return true;
  431
+		}
  432
+		else
  433
+			return false;
  434
+	}
  435
+
  436
+/*======================================================================*\
  437
+	Function:	fetchform
  438
+	Purpose:	fetch the form elements from a web page
  439
+	Input:		$URI	where you are fetching from
  440
+	Output:		$this->results	the resulting html form
  441
+\*======================================================================*/
  442
+
  443
+	function fetchform($URI)
  444
+	{
  445
+		
  446
+		if ($this->fetch($URI))
  447
+		{			
  448
+
  449
+			if(is_array($this->results))
  450
+			{
  451
+				for($x=0;$x<count($this->results);$x++)
  452
+					$this->results[$x] = $this->_stripform($this->results[$x]);
  453
+			}
  454
+			else
  455
+				$this->results = $this->_stripform($this->results);
  456
+			
  457
+			return true;
  458
+		}
  459
+		else
  460
+			return false;
  461
+	}
  462
+	
  463
+	
  464
+/*======================================================================*\
  465
+	Function:	fetchtext
  466
+	Purpose:	fetch the text from a web page, stripping the links
  467
+	Input:		$URI	where you are fetching from
  468
+	Output:		$this->results	the text from the web page
  469
+\*======================================================================*/
  470
+
  471
+	function fetchtext($URI)
  472
+	{
  473
+		if($this->fetch($URI))
  474
+		{			
  475
+			if(is_array($this->results))
  476
+			{
  477
+				for($x=0;$x<count($this->results);$x++)
  478
+					$this->results[$x] = $this->_striptext($this->results[$x]);
  479
+			}
  480
+			else
  481
+				$this->results = $this->_striptext($this->results);
  482
+			return true;
  483
+		}
  484
+		else
  485
+			return false;
  486
+	}
  487
+
  488
+/*======================================================================*\
  489
+	Function:	submitlinks
  490
+	Purpose:	grab links from a form submission
  491
+	Input:		$URI	where you are submitting from
  492
+	Output:		$this->results	an array of the links from the post
  493
+\*======================================================================*/
  494
+
  495
+	function submitlinks($URI, $formvars="", $formfiles="")
  496
+	{
  497
+		if($this->submit($URI,$formvars, $formfiles))
  498
+		{			
  499
+			if(is_array($this->results))
  500
+			{
  501
+				for($x=0;$x<count($this->results);$x++)
  502
+				{
  503
+					$this->results[$x] = $this->_striplinks($this->results[$x]);
  504
+					if($this->expandlinks)
  505
+						$this->results[$x] = $this->_expandlinks($this->results[$x],$URI);
  506
+				}
  507
+			}
  508
+			else
  509
+			{
  510
+				$this->results = $this->_striplinks($this->results);
  511
+				if($this->expandlinks)
  512
+					$this->results = $this->_expandlinks($this->results,$URI);
  513
+			}
  514
+			return true;
  515
+		}
  516
+		else
  517
+			return false;
  518
+	}
  519
+
  520
+/*======================================================================*\
  521
+	Function:	submittext
  522
+	Purpose:	grab text from a form submission
  523
+	Input:		$URI	where you are submitting from
  524
+	Output:		$this->results	the text from the web page
  525
+\*======================================================================*/
  526
+
  527
+	function submittext($URI, $formvars = "", $formfiles = "")
  528
+	{
  529
+		if($this->submit($URI,$formvars, $formfiles))
  530
+		{			
  531
+			if(is_array($this->results))
  532
+			{
  533
+				for($x=0;$x<count($this->results);$x++)
  534
+				{
  535
+					$this->results[$x] = $this->_striptext($this->results[$x]);
  536
+					if($this->expandlinks)
  537
+						$this->results[$x] = $this->_expandlinks($this->results[$x],$URI);
  538
+				}
  539
+			}
  540
+			else
  541
+			{
  542
+				$this->results = $this->_striptext($this->results);
  543
+				if($this->expandlinks)
  544
+					$this->results = $this->_expandlinks($this->results,$URI);
  545
+			}
  546
+			return true;
  547
+		}
  548
+		else
  549
+			return false;
  550
+	}
  551
+
  552
+	
  553
+
  554
+/*======================================================================*\
  555
+	Function:	set_submit_multipart
  556
+	Purpose:	Set the form submission content type to
  557
+				multipart/form-data
  558
+\*======================================================================*/
  559
+	function set_submit_multipart()
  560
+	{
  561
+		$this->_submit_type = "multipart/form-data";
  562
+	}
  563
+
  564
+	
  565
+/*======================================================================*\
  566
+	Function:	set_submit_normal
  567
+	Purpose:	Set the form submission content type to
  568
+				application/x-www-form-urlencoded
  569
+\*======================================================================*/
  570
+	function set_submit_normal()
  571
+	{
  572
+		$this->_submit_type = "application/x-www-form-urlencoded";
  573
+	}
  574
+
  575
+	
  576
+	
  577
+
  578
+/*======================================================================*\
  579
+	Private functions
  580
+\*======================================================================*/
  581
+	
  582
+	
  583
+/*======================================================================*\
  584
+	Function:	_striplinks
  585
+	Purpose:	strip the hyperlinks from an html document
  586
+	Input:		$document	document to strip.
  587
+	Output:		$match		an array of the links
  588
+\*======================================================================*/
  589
+
  590
+	function _striplinks($document)
  591
+	{	
  592
+		preg_match_all("'<\s*a\s+.*href\s*=\s*			# find <a href=
  593
+						([\"\'])?					# find single or double quote
  594
+						(?(1) (.*?)\\1 | ([^\s\>]+))		# if quote found, match up to next matching
  595
+													# quote, otherwise match up to next space
  596
+						'isx",$document,$links);
  597
+						
  598
+
  599
+		// catenate the non-empty matches from the conditional subpattern
  600
+
  601
+		while(list($key,$val) = each($links[2]))
  602
+		{
  603
+			if(!empty($val))
  604
+				$match[] = $val;
  605
+		}				
  606
+		
  607
+		while(list($key,$val) = each($links[3]))
  608
+		{
  609
+			if(!empty($val))
  610
+				$match[] = $val;
  611
+		}		
  612
+		
  613
+		// return the links
  614
+		return $match;
  615
+	}
  616
+
  617
+/*======================================================================*\
  618
+	Function:	_stripform
  619
+	Purpose:	strip the form elements from an html document
  620
+	Input:		$document	document to strip.
  621
+	Output:		$match		an array of the links
  622
+\*======================================================================*/
  623
+
  624
+	function _stripform($document)
  625
+	{	
  626
+		preg_match_all("'<\/?(FORM|INPUT|SELECT|TEXTAREA|(OPTION))[^<>]*>(?(2)(.*(?=<\/?(option|select)[^<>]*>[\r\n]*)|(?=[\r\n]*))|(?=[\r\n]*))'Usi",$document,$elements);
  627
+		
  628
+		// catenate the matches
  629
+		$match = implode("\r\n",$elements[0]);
  630
+				
  631
+		// return the links
  632
+		return $match;
  633
+	}
  634
+
  635
+	
  636
+	
  637
+/*======================================================================*\
  638
+	Function:	_striptext
  639
+	Purpose:	strip the text from an html document
  640
+	Input:		$document	document to strip.
  641
+	Output:		$text		the resulting text
  642
+\*======================================================================*/
  643
+
  644
+	function _striptext($document)
  645
+	{
  646
+		
  647
+		// I didn't use preg eval (//e) since that is only available in PHP 4.0.
  648
+		// so, list your entities one by one here. I included some of the
  649
+		// more common ones.
  650
+								
  651
+		$search = array("'<script[^>]*?>.*?</script>'si",	// strip out javascript
  652
+						"'<[\/\!]*?[^<>]*?>'si",			// strip out html tags
  653
+						"'([\r\n])[\s]+'",					// strip out white space
  654
+						"'&(quote|#34);'i",					// replace html entities
  655
+						"'&(amp|#38);'i",
  656
+						"'&(lt|#60);'i",
  657
+						"'&(gt|#62);'i",
  658
+						"'&(nbsp|#160);'i",
  659
+						"'&(iexcl|#161);'i",
  660
+						"'&(cent|#162);'i",
  661
+						"'&(pound|#163);'i",
  662
+						"'&(copy|#169);'i"
  663
+						);				
  664
+		$replace = array(	"",
  665
+							"",
  666
+							"\\1",
  667
+							"\"",
  668
+							"&",
  669
+							"<",
  670
+							">",
  671
+							" ",
  672
+							chr(161),
  673
+							chr(162),
  674
+							chr(163),
  675
+							chr(169));
  676
+					
  677
+		$text = preg_replace($search,$replace,$document);
  678
+								
  679
+		return $text;
  680
+	}
  681
+
  682
+/*======================================================================*\
  683
+	Function:	_expandlinks
  684
+	Purpose:	expand each link into a fully qualified URL
  685
+	Input:		$links			the links to qualify
  686
+				$URI			the full URI to get the base from
  687
+	Output:		$expandedLinks	the expanded links
  688
+\*======================================================================*/
  689
+
  690
+	function _expandlinks($links,$URI)
  691
+	{
  692
+		
  693
+		preg_match("/^[^\?]+/",$URI,$match);
  694
+
  695
+		$match = preg_replace("|/[^\/\.]+\.[^\/\.]+$|","",$match[0]);
  696
+				
  697
+		$search = array( 	"|^http://".preg_quote($this->host)."|i",
  698
+							"|^(?!http://)(\/)?(?!mailto:)|i",
  699
+							"|/\./|",
  700
+							"|/[^\/]+/\.\./|"
  701
+						);
  702
+						
  703
+		$replace = array(	"",
  704
+							$match."/",
  705
+							"/",
  706
+							"/"
  707
+						);			
  708
+				
  709
+		$expandedLinks = preg_replace($search,$replace,$links);
  710
+
  711
+		return $expandedLinks;
  712
+	}
  713
+
  714
+/*======================================================================*\
  715
+	Function:	_httprequest
  716
+	Purpose:	go get the http data from the server
  717
+	Input:		$url		the url to fetch
  718
+				$fp			the current open file pointer
  719
+				$URI		the full URI
  720
+				$body		body contents to send if any (POST)
  721
+	Output:		
  722
+\*======================================================================*/
  723
+	
  724
+	function _httprequest($url,$fp,$URI,$http_method,$content_type="",$body="")
  725
+	{
  726
+		if($this->passcookies && $this->_redirectaddr)
  727
+			$this->setcookies();
  728
+			
  729
+		$URI_PARTS = parse_url($URI);
  730
+		if(empty($url))
  731
+			$url = "/";
  732
+		$headers = $http_method." ".$url." ".$this->_httpversion."\r\n";		
  733
+		if(!empty($this->agent))
  734
+			$headers .= "User-Agent: ".$this->agent."\r\n";
  735
+		if(!empty($this->host) && !isset($this->rawheaders['Host']))
  736
+			$headers .= "Host: ".$this->host."\r\n";
  737
+		if(!empty($this->accept))
  738
+			$headers .= "Accept: ".$this->accept."\r\n";
  739
+		
  740
+		if($this->use_gzip) {
  741
+			// make sure PHP was built with --with-zlib
  742
+			// and we can handle gzipp'ed data
  743
+			if ( function_exists(gzinflate) ) {
  744
+			   $headers .= "Accept-encoding: gzip\r\n";
  745
+			}
  746
+			else {
  747
+			   trigger_error(
  748
+			   	"use_gzip is on, but PHP was built without zlib support.".
  749
+				"  Requesting file(s) without gzip encoding.", 
  750
+				E_USER_NOTICE);
  751
+			}
  752
+		}
  753
+		
  754
+		if(!empty($this->referer))
  755
+			$headers .= "Referer: ".$this->referer."\r\n";
  756
+		if(!empty($this->cookies))
  757
+		{			
  758
+			if(!is_array($this->cookies))
  759
+				$this->cookies = (array)$this->cookies;
  760
+	
  761
+			reset($this->cookies);
  762
+			if ( count($this->cookies) > 0 ) {
  763
+				$cookie_headers .= 'Cookie: ';
  764
+				foreach ( $this->cookies as $cookieKey => $cookieVal ) {
  765
+				$cookie_headers .= $cookieKey."=".urlencode($cookieVal)."; ";
  766
+				}
  767
+				$headers .= substr($cookie_headers,0,-2) . "\r\n";
  768
+			} 
  769
+		}
  770
+		if(!empty($this->rawheaders))
  771
+		{
  772
+			if(!is_array($this->rawheaders))
  773
+				$this->rawheaders = (array)$this->rawheaders;
  774
+			while(list($headerKey,$headerVal) = each($this->rawheaders))
  775
+				$headers .= $headerKey.": ".$headerVal."\r\n";
  776
+		}
  777
+		if(!empty($content_type)) {
  778
+			$headers .= "Content-type: $content_type";
  779
+			if ($content_type == "multipart/form-data")
  780
+				$headers .= "; boundary=".$this->_mime_boundary;
  781
+			$headers .= "\r\n";
  782
+		}
  783
+		if(!empty($body))	
  784
+			$headers .= "Content-length: ".strlen($body)."\r\n";
  785
+		if(!empty($this->user) || !empty($this->pass))	
  786
+			$headers .= "Authorization: BASIC ".base64_encode($this->user.":".$this->pass)."\r\n";
  787
+
  788
+		$headers .= "\r\n";
  789
+		
  790
+		// set the read timeout if needed
  791
+		if ($this->read_timeout > 0)
  792
+			socket_set_timeout($fp, $this->read_timeout);
  793
+		$this->timed_out = false;
  794
+		
  795
+		fwrite($fp,$headers.$body,strlen($headers.$body));
  796
+		
  797
+		$this->_redirectaddr = false;
  798
+		unset($this->headers);
  799
+		
  800
+		// content was returned gzip encoded?
  801
+		$is_gzipped = false;
  802
+						
  803
+		while($currentHeader = fgets($fp,$this->_maxlinelen))
  804
+		{
  805
+			if ($this->read_timeout > 0 && $this->_check_timeout($fp))
  806
+			{
  807
+				$this->status=-100;
  808
+				return false;
  809
+			}
  810
+				
  811
+			if($currentHeader == "\r\n")
  812
+				break;
  813
+						
  814
+			// if a header begins with Location: or URI:, set the redirect
  815
+			if(preg_match("/^(Location:|URI:)/i",$currentHeader))
  816
+			{
  817
+				// get URL portion of the redirect
  818
+				preg_match("/^(Location:|URI:)\s+(.*)/",chop($currentHeader),$matches);
  819
+				// look for :// in the Location header to see if hostname is included
  820
+				if(!preg_match("|\:\/\/|",$matches[2]))
  821
+				{
  822
+					// no host in the path, so prepend
  823
+					$this->_redirectaddr = $URI_PARTS["scheme"]."://".$this->host.":".$this->port;
  824
+					// eliminate double slash
  825
+					if(!preg_match("|^/|",$matches[2]))
  826
+							$this->_redirectaddr .= "/".$matches[2];
  827
+					else
  828
+							$this->_redirectaddr .= $matches[2];
  829
+				}
  830
+				else
  831
+					$this->_redirectaddr = $matches[2];
  832
+			}
  833
+		
  834
+			if(preg_match("|^HTTP/|",$currentHeader))
  835
+			{
  836
+                if(preg_match("|^HTTP/[^\s]*\s(.*?)\s|",$currentHeader, $status))
  837
+				{
  838
+					$this->status= $status[1];
  839
+                }				
  840
+				$this->response_code = $currentHeader;
  841
+			}
  842
+			
  843
+			if (preg_match("/Content-Encoding: gzip/", $currentHeader) ) {
  844
+				$is_gzipped = true;
  845
+			}
  846
+			
  847
+			$this->headers[] = $currentHeader;
  848
+		}
  849
+
  850
+		# $results = fread($fp, $this->maxlength);
  851
+		$results = "";
  852
+		while ( $data = fread($fp, $this->maxlength) ) {
  853
+		    $results .= $data;
  854
+		    if (
  855
+		        strlen($results) > $this->maxlength ) {
  856
+		        break;
  857
+		    }
  858
+		}
  859
+		
  860
+		// gunzip
  861
+		if ( $is_gzipped ) {
  862
+			// per http://www.php.net/manual/en/function.gzencode.php
  863
+			$results = substr($results, 10);
  864
+			$results = gzinflate($results);
  865
+		}
  866
+		
  867
+		if ($this->read_timeout > 0 && $this->_check_timeout($fp))
  868
+		{
  869
+			$this->status=-100;
  870
+			return false;
  871
+		}
  872
+		
  873
+		// check if there is a a redirect meta tag
  874
+		
  875
+		if(preg_match("'<meta[\s]*http-equiv[^>]*?content[\s]*=[\s]*[\"\']?\d+;[\s]+URL[\s]*=[\s]*([^\"\']*?)[\"\']?>'i",$results,$match))
  876
+		{
  877
+			$this->_redirectaddr = $this->_expandlinks($match[1],$URI);	
  878
+		}
  879
+
  880
+		// have we hit our frame depth and is there frame src to fetch?
  881
+		if(($this->_framedepth < $this->maxframes) && preg_match_all("'<frame\s+.*src[\s]*=[\'\"]?([^\'\"\>]+)'i",$results,$match))
  882
+		{
  883
+			$this->results[] = $results;
  884
+			for($x=0; $x<count($match[1]); $x++)
  885
+				$this->_frameurls[] = $this->_expandlinks($match[1][$x],$URI_PARTS["scheme"]."://".$this->host);
  886
+		}
  887
+		// have we already fetched framed content?
  888
+		elseif(is_array($this->results))
  889
+			$this->results[] = $results;
  890
+		// no framed content
  891
+		else
  892
+			$this->results = $results;
  893
+		
  894
+		return true;
  895
+	}
  896
+
  897
+/*======================================================================*\
  898
+	Function:	_httpsrequest
  899
+	Purpose:	go get the https data from the server using curl
  900
+	Input:		$url		the url to fetch
  901
+				$URI		the full URI
  902
+				$body		body contents to send if any (POST)
  903
+	Output:		
  904
+\*======================================================================*/
  905
+	
  906
+	function _httpsrequest($url,$URI,$http_method,$content_type="",$body="")
  907
+	{
  908
+		if($this->passcookies && $this->_redirectaddr)
  909
+			$this->setcookies();
  910
+
  911
+		$headers = array();		
  912
+					
  913
+		$URI_PARTS = parse_url($URI);
  914
+		if(empty($url))
  915
+			$url = "/";
  916
+		// GET ... header not needed for curl
  917
+		//$headers[] = $http_method." ".$url." ".$this->_httpversion;		
  918
+		if(!empty($this->agent))
  919
+			$headers[] = "User-Agent: ".$this->agent;
  920
+		if(!empty($this->host))
  921
+			$headers[] = "Host: ".$this->host;
  922
+		if(!empty($this->accept))
  923
+			$headers[] = "Accept: ".$this->accept;
  924
+		if(!empty($this->referer))
  925
+			$headers[] = "Referer: ".$this->referer;
  926
+		if(!empty($this->cookies))
  927
+		{			
  928
+			if(!is_array($this->cookies))
  929
+				$this->cookies = (array)$this->cookies;
  930
+	
  931
+			reset($this->cookies);
  932
+			if ( count($this->cookies) > 0 ) {
  933
+				$cookie_str = 'Cookie: ';
  934
+				foreach ( $this->cookies as $cookieKey => $cookieVal ) {
  935
+				$cookie_str .= $cookieKey."=".urlencode($cookieVal)."; ";
  936
+				}
  937
+				$headers[] = substr($cookie_str,0,-2);
  938
+			}
  939
+		}
  940
+		if(!empty($this->rawheaders))
  941
+		{
  942
+			if(!is_array($this->rawheaders))
  943
+				$this->rawheaders = (array)$this->rawheaders;
  944
+			while(list($headerKey,$headerVal) = each($this->rawheaders))
  945
+				$headers[] = $headerKey.": ".$headerVal;
  946
+		}
  947
+		if(!empty($content_type)) {
  948
+			if ($content_type == "multipart/form-data")
  949
+				$headers[] = "Content-type: $content_type; boundary=".$this->_mime_boundary;
  950
+			else
  951
+				$headers[] = "Content-type: $content_type";
  952
+		}
  953
+		if(!empty($body))	
  954
+			$headers[] = "Content-length: ".strlen($body);
  955
+		if(!empty($this->user) || !empty($this->pass))	
  956
+			$headers[] = "Authorization: BASIC ".base64_encode($this->user.":".$this->pass);
  957
+			
  958
+		for($curr_header = 0; $curr_header < count($headers); $curr_header++)
  959
+			$cmdline_params .= " -H \"".$headers[$curr_header]."\"";
  960
+		
  961
+		if(!empty($body))
  962
+			$cmdline_params .= " -d \"$body\"";
  963
+		
  964
+		if($this->read_timeout > 0)
  965
+			$cmdline_params .= " -m ".$this->read_timeout;
  966
+		
  967
+		$headerfile = uniqid(time());
  968
+
  969
+		exec($this->curl_path." -D \"/tmp/$headerfile\"".$cmdline_params." ".$URI,$results,$return);
  970
+		
  971
+		if($return)
  972
+		{
  973
+			$this->error = "Error: cURL could not retrieve the document, error $return.";
  974
+			return false;
  975
+		}
  976
+			
  977
+			
  978
+		$results = implode("\r\n",$results);
  979
+		
  980
+		$result_headers = file("/tmp/$headerfile");
  981
+						
  982
+		$this->_redirectaddr = false;
  983
+		unset($this->headers);
  984
+						
  985
+		for($currentHeader = 0; $currentHeader < count($result_headers); $currentHeader++)
  986
+		{
  987
+			
  988
+			// if a header begins with Location: or URI:, set the redirect
  989
+			if(preg_match("/^(Location: |URI: )/i",$result_headers[$currentHeader]))
  990
+			{
  991
+				// get URL portion of the redirect
  992
+				preg_match("/^(Location: |URI:)(.*)/",chop($result_headers[$currentHeader]),$matches);
  993
+				// look for :// in the Location header to see if hostname is included
  994
+				if(!preg_match("|\:\/\/|",$matches[2]))
  995
+				{
  996
+					// no host in the path, so prepend
  997
+					$this->_redirectaddr = $URI_PARTS["scheme"]."://".$this->host.":".$this->port;
  998
+					// eliminate double slash
  999
+					if(!preg_match("|^/|",$matches[2]))
  1000
+							$this->_redirectaddr .= "/".$matches[2];
  1001
+					else
  1002
+							$this->_redirectaddr .= $matches[2];
  1003
+				}
  1004
+				else
  1005
+					$this->_redirectaddr = $matches[2];
  1006
+			}
  1007
+		
  1008
+			if(preg_match("|^HTTP/|",$result_headers[$currentHeader]))
  1009
+				$this->response_code = $result_headers[$currentHeader];
  1010
+
  1011
+			$this->headers[] = $result_headers[$currentHeader];
  1012
+		}
  1013
+
  1014
+		// check if there is a a redirect meta tag
  1015
+		
  1016
+		if(preg_match("'<meta[\s]*http-equiv[^>]*?content[\s]*=[\s]*[\"\']?\d+;[\s]+URL[\s]*=[\s]*([^\"\']*?)[\"\']?>'i",$results,$match))
  1017
+		{
  1018
+			$this->_redirectaddr = $this->_expandlinks($match[1],$URI);	
  1019
+		}
  1020
+
  1021
+		// have we hit our frame depth and is there frame src to fetch?
  1022
+		if(($this->_framedepth < $this->maxframes) && preg_match_all("'<frame\s+.*src[\s]*=[\'\"]?([^\'\"\>]+)'i",$results,$match))
  1023
+		{
  1024
+			$this->results[] = $results;
  1025
+			for($x=0; $x<count($match[1]); $x++)
  1026
+				$this->_frameurls[] = $this->_expandlinks($match[1][$x],$URI_PARTS["scheme"]."://".$this->host);
  1027
+		}
  1028
+		// have we already fetched framed content?
  1029
+		elseif(is_array($this->results))
  1030
+			$this->results[] = $results;
  1031
+		// no framed content
  1032
+		else
  1033
+			$this->results = $results;
  1034
+
  1035
+		unlink("/tmp/$headerfile");
  1036
+		
  1037
+		return true;
  1038
+	}
  1039
+
  1040
+/*======================================================================*\
  1041
+	Function:	setcookies()
  1042
+	Purpose:	set cookies for a redirection
  1043
+\*======================================================================*/
  1044
+	
  1045
+	function setcookies()
  1046
+	{
  1047
+		for($x=0; $x<count($this->headers); $x++)
  1048
+		{
  1049
+		if(preg_match("/^set-cookie:[\s]+([^=]+)=([^;]+)/i", $this->headers[$x],$match))
  1050
+			$this->cookies[$match[1]] = $match[2];
  1051
+		}
  1052
+	}
  1053
+
  1054
+	
  1055
+/*======================================================================*\
  1056
+	Function:	_check_timeout
  1057
+	Purpose:	checks whether timeout has occurred
  1058
+	Input:		$fp	file pointer
  1059
+\*======================================================================*/
  1060
+
  1061
+	function _check_timeout($fp)
  1062
+	{
  1063
+		if ($this->read_timeout > 0) {
  1064
+			$fp_status = socket_get_status($fp);
  1065
+			if ($fp_status["timed_out"]) {
  1066
+				$this->timed_out = true;
  1067
+				return true;
  1068
+			}
  1069
+		}
  1070
+		return false;
  1071
+	}
  1072
+
  1073
+/*======================================================================*\
  1074
+	Function:	_connect
  1075
+	Purpose:	make a socket connection
  1076
+	Input:		$fp	file pointer
  1077
+\*======================================================================*/
  1078
+	
  1079
+	function _connect(&$fp)
  1080
+	{
  1081
+		if(!empty($this->proxy_host) && !empty($this->proxy_port))
  1082
+			{
  1083
+				$this->_isproxy = true;
  1084
+				$host = $this->proxy_host;
  1085
+				$port = $this->proxy_port;
  1086
+			}
  1087
+		else
  1088
+		{
  1089
+			$host = $this->host;
  1090
+			$port = $this->port;
  1091
+		}
  1092
+	
  1093
+		$this->status = 0;
  1094
+		
  1095
+		if($fp = fsockopen(
  1096
+					$host,
  1097
+					$port,
  1098
+					$errno,
  1099
+					$errstr,
  1100
+					$this->_fp_timeout
  1101
+					))
  1102
+		{
  1103
+			// socket connection succeeded
  1104
+
  1105
+			return true;
  1106
+		}
  1107
+		else
  1108
+		{
  1109
+			// socket connection failed
  1110
+			$this->status = $errno;
  1111
+			switch($errno)
  1112
+			{
  1113
+				case -3:
  1114
+					$this->error="socket creation failed (-3)";
  1115
+				case -4:
  1116