Skip to content
Permalink
Browse files

Bug: https://trac.macports.org/ticket/1260

Submitted by:   simon@cotsworth.com (Simon Cotsworth)
Reviewed by:    olegb@
Approved by:
Obtained from:
new port, thanks simon

git-svn-id: https://svn.macports.org/repository/macports/trunk/dports@4127 d073be05-634f-4543-b044-5fe20cf6d1d6
  • Loading branch information
Ole Guldberg Jensen
Ole Guldberg Jensen committed Dec 6, 2003
1 parent 3c08e52 commit ed513a2168b08ca9b8336b4f33ac90f35d0d82b6
@@ -0,0 +1,116 @@
# $Id: Portfile,v 1.1 2003/12/06 00:44:36 olegb Exp $

PortSystem 1.0
name htdig
version 3.1.6
categories www
platforms darwin freebsd
maintainers simon@cotsworth.com
description WWW Search Engine Software

long_description The ht://Dig system is a complete world wide web indexing and searching system \
for a domain or intranet. This system is not meant to replace the need for \
powerful internet-wide search systems like Lycos, Infoseek, Google and AltaVista. \
Instead it is meant to cover the search needs for a single company, campus, or \
even a particular sub section of a web site. As opposed to some WAIS-based or \
web-server based search engines, ht://Dig can easily span several web servers. \
The type of these different web servers doesn't matter as long as they \
understand common protocols like HTTP.

homepage http://www.htdig.org/
master_sites ${homepage}files/
checksums md5 7a2f20d8d6149efd8d119bb2ebf55f23
patchfiles patch-metadate.0 \
patch-Makefile.0 \
patch-documentation.1 \
patch-documentation.2 \
patch-Date-viewing.0 \
patch-ExternalParser-max_doc_size.0 \
patch-htnotifyNull.0 \
patch-JavaScript.0 \
patch-CONFIG.in

set doc_path "${prefix}/share/doc/${name}"
set conf_path "${prefix}/etc/${name}"
set share_path "${prefix}/share/${name}"
set db_path "${prefix}/var/${name}"
set cgi_path ""
set httpd_doc_path ""
set httpd_conf_path ""

destroot.destdir prefix=${destroot}${prefix}
variant darwin { global cgi_path httpd_conf_path httpd_doc_path
set cgi_path "/Library/WebServer/CGI-Executables"
set httpd_doc_path "/Library/WebServer/Documents"
set httpd_conf_path "/private/etc/httpd"

# apache darwinport interop
if {[ file exists ${prefix}/sbin/httpd]} {
set cgi_path "${prefix}/www/cgi-bin"
set httpd_doc_path "${prefix}/www/htdocs"
set httpd_conf_path "${prefix}/etc/apache"
}
configure.args --with-cgi-bin-dir=${destroot}${cgi_path} }

variant freebsd { global cgi_path httpd_conf_path httpd_doc_path
set cgi_path "/usr/local/www/cgi-bin"
set httpd_doc_path "/usr/local/www/data"
set httpd_conf_path "/usr/local/etc/apache"

# apache darwinport interop
if {[ file exists ${prefix}/sbin/httpd]} {
set cgi_path "${prefix}/www/cgi-bin"
set httpd_doc_path "${prefix}/www/htdocs"
set httpd_conf_path "${prefix}/etc/apache"
}
configure.args --with-cgi-bin-dir=${destroot}${cgi_path} }

post-destroot { # Empty directory hack
file mkdir ${destroot}${db_path}
system "touch ${destroot}${db_path}/.turd"

# Install documentation
file mkdir ${destroot}${doc_path}
system "cd ${worksrcpath}/htdoc && \
tar -cf - *.html *.gif *.css | \
tar -xf - -C ${destroot}${doc_path}"
system "chown -R `id -u`:`id -g` \
${destroot}${doc_path}"

# Create symbolic link in web server doc root
file mkdir ${destroot}${httpd_doc_path}
system "cd ${destroot}${httpd_doc_path} && \
ln -s ${share_path}/search.html"

# Fix paths in rundig script
reinplace "s|${destroot}${prefix}|${prefix}|g" \
"${destroot}${prefix}/bin/rundig"

# Fix paths in htdig config file
reinplace "s|${destroot}${prefix}|${prefix}|g" \
"${destroot}${conf_path}/${name}.conf"

# Make copy of htdig config file
system "install -m 644 ${destroot}${conf_path}/${name}.conf \
${destroot}${share_path}/${name}.conf.default"

# Install default htdig apache config file
file mkdir ${destroot}${httpd_conf_path}
system "install -m 644 ${portpath}/${filedir}/httpd.conf \
${destroot}${httpd_conf_path}/httpd_${name}.conf"
reinplace "s|DP_PREFIX|${prefix}|g" \
"${destroot}${httpd_conf_path}/httpd_${name}.conf"
reinplace "s|NAME|${name}|g" \
"${destroot}${httpd_conf_path}/httpd_${name}.conf"
reinplace "s|VERSION|${version}|g" \
"${destroot}${httpd_conf_path}/httpd_${name}.conf"

# Install contributed scripts
file mkdir ${destroot}${share_path}/contrib
system "cd ${worksrcpath}/contrib && \
tar -cf - * | \
tar -xf - -C ${destroot}${share_path}/contrib"
system "chown -R `id -u`:`id -g` \
${destroot}${share_path}/contrib"
}

@@ -0,0 +1,15 @@
#
# This is a default apache configuration
# file installed by DarwinPorts for:
# ht://Dig Group WWW Search Engine
# NAME - VERSION
#
Alias /NAME "DP_PREFIX/share/NAME/images"
Alias /NAME/ "DP_PREFIX/share/NAME/images/"
#
<Directory "DP_PREFIX/share/NAME/images">
Options Indexes MultiViews
AllowOverride None
Order allow,deny
Allow from all
</Directory>
@@ -0,0 +1,44 @@
--- CONFIG.in.bak Mon Oct 20 05:09:35 2003
+++ CONFIG.in Mon Oct 27 17:16:01 2003
@@ -35,20 +35,20 @@
# CONFIG_DIR
# This is the directory that contains ht://Dig configuration files
#
-CONFIG_DIR= @CONFIG_DIR@
+CONFIG_DIR= $(prefix)/etc/htdig

#
# COMMON_DIR
# This is the directory for files that can be shared between different
# databases.
#
-COMMON_DIR= @COMMON_DIR@
+COMMON_DIR= $(prefix)/share/htdig/common

#
# DATABASE_DIR
# The default directory where the search databases will reside.
#
-DATABASE_DIR= @DATABASE_DIR@
+DATABASE_DIR= $(prefix)/var/htdig

#
# DEFAULT_CONFIG_FILE
@@ -69,7 +69,7 @@
# Define this to be a place that can be accessed by your web server. This is
# where a couple of images will be placed.
#
-IMAGE_DIR= @IMAGE_DIR@
+IMAGE_DIR= $(prefix)/share/htdig/images

#
# IMAGE_URL_PREFIX
@@ -82,7 +82,7 @@
# Set this to the absolute path where you want the sample search form to
# be installed.
#
-SEARCH_DIR= @SEARCH_DIR@
+SEARCH_DIR= $(prefix)/share/htdig

#
# SEARCH_FORM
@@ -0,0 +1,11 @@
--- htdig/HTML.cc.orig Wed Jan 9 16:12:31 2002
+++ htdig/HTML.cc Fri May 17 16:00:00 2002
@@ -93,7 +93,7 @@ HTML::HTML()

// These are the name values of meta tags that carry date information.
metadatetags.IgnoreCase();
- metadatetags.Pattern("date|dc.date|dc.date.created|dc.data.modified");
+ metadatetags.Pattern("date|dc.date|dc.date.created|dc.date.modified");

// These are the name values of meta tags that carry descriptions.
StringList descrNames(config["description_meta_tag_names"], " \t");
@@ -0,0 +1,19 @@
--- htdig/ExternalParser.cc.orig Wed Jan 9 16:23:25 2002
+++ htdig/ExternalParser.cc Thu Aug 1 14:11:07 2002
@@ -535,8 +535,15 @@ ExternalParser::parse(Retriever &retriev
{
char buffer[2048];
int length;
- while ((length = fread(buffer, 1, sizeof(buffer), input)) > 0)
+ int nbytes = config.Value("max_doc_size");
+ while (nbytes > 0 &&
+ (length = fread(buffer, 1, sizeof(buffer), input)) > 0)
+ {
+ nbytes -= length;
+ if (nbytes < 0)
+ length += nbytes;
newcontent.append(buffer, length);
+ }
}
}
fclose(input);
@@ -0,0 +1,16 @@
--- htdig/HTML.cc.orig Wed Jan 9 16:12:31 2002
+++ htdig/HTML.cc Wed Sep 25 11:50:50 2002
@@ -308,6 +308,13 @@ HTML::parse(Retriever &retriever, URL &b
if (!q)
break; // Syntax error in the doc. Tag never ends.
position++;
+ if (noindex & TAGscript)
+ { // Special handling in case '<' is part of JavaScript code
+ while (isspace(*position))
+ position++;
+ if (mystrncasecmp((char *)position, "/script", 7) != 0)
+ continue;
+ }
tag = 0;
tag.append((char*)position, q - position);
while (isspace(*position))
@@ -0,0 +1,11 @@
--- Makefile.config.in.orig Thu Jan 31 17:47:14 2002
+++ Makefile.config.in Thu Feb 7 14:57:11 2002
@@ -23,7 +23,7 @@ AR= @AR@
PDF_PARSER= @PDF_PARSER@
SENDMAIL= @SENDMAIL@

-DEFINES= -DDEFAULT_CONFIG_FILE=\"$(DEFAULT_CONFIG_FILE)\"
+DEFINES= @DEFS@ -DDEFAULT_CONFIG_FILE=\"$(DEFAULT_CONFIG_FILE)\"
LIBDIRS= -L../htlib -L../htcommon -L../db/dist -L/usr/lib
INCS= -I$(top_srcdir)/htlib -I$(top_srcdir)/htcommon \
-I../db/dist -I../include
@@ -0,0 +1,73 @@
--- htdoc/cf_byname.html.orig Thu Jan 31 17:47:17 2002
+++ htdoc/cf_byname.html Thu Feb 7 14:47:49 2002
@@ -171,7 +171,7 @@
<img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#search_rewrite_rules">search_rewrite_rules</a><br>
<img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#server_aliases">server_aliases</a><br>
<img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#server_max_docs">server_max_docs</a><br>
- <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#server_wait_time">serrver_wait_time</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#server_wait_time">server_wait_time</a><br>
<img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#sort">sort</a><br>
<img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#sort_names">sort_names</a><br>
<img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#soundex_db">soundex_db</a><br>
--- htdoc/cf_byprog.html.orig Thu Jan 31 17:47:17 2002
+++ htdoc/cf_byprog.html Fri Feb 8 08:24:49 2002
@@ -181,6 +181,7 @@
<img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#iso_8601">iso_8601</a><br>
<img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#logging">logging</a><br>
<img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#match_method">match_method</a><br>
+ <img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#matches_per_page">matches_per_page</a><br>
<img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#max_excerpts">max_excerpts</a><br>
<img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#max_prefix_matches">max_prefix_matches</a><br>
<img src="dot.gif" alt="*" width=9 height=9> <a target="body" href="attrs.html#max_stars">max_stars</a><br>
--- htdoc/cf_types.html.orig Thu Jan 31 17:47:17 2002
+++ htdoc/cf_types.html Thu Feb 7 14:48:14 2002
@@ -32,10 +32,20 @@
<b>String List</b>
</dt>
<dd>
- A sequence of string separated by whitespace. Individual
- strings within the list may be quoted using double quotes.
- The quotes are needed when the individual strings contain
- whitespace.
+ A sequence of strings separated by whitespace. Individual
+ strings within the list cannot be quoted and therefore
+ cannot contain whitespace.
+ </dd>
+ <dt>
+ <b>Quoted String List</b>
+ </dt>
+ <dd>
+ A sequence of strings separated by whitespace. Individual
+ strings within the list may be quoted using single or double
+ quotes. The quotes are needed when the individual strings
+ contain whitespace. If you want a quote mark or a backslash
+ to be inserted as-is into a string, you must preceed it with
+ a backslash.
</dd>
<dt>
<b>Number</b>
--- htdoc/htmerge.html.orig Thu Jan 31 17:47:17 2002
+++ htdoc/htmerge.html Thu Feb 7 14:48:47 2002
@@ -76,8 +76,8 @@
<dd>
Merge the databases specified by merge_configfile
into the databases specified by -c or the default.
- To use multiple databases, you will need a config
- file for each database. Then each file will set the
+ You will need a separate config file for each of
+ the two databases. Then each file will set the
<a href="attrs.html#database_dir">database_dir</a> or
<a href="attrs.html#database_base">database_base</a>
attribute to change the name of the databases, so
@@ -88,6 +88,11 @@
together with this option. This is because merging
the two wordlists together requires wordlists that
have already been cleaned up by htmerge.
+ Because the -m option allows you to specify only
+ one database to be merged into the other, and only
+ one -m option may be specified, if you need to merge
+ three or more databases together you must run htmerge
+ multiple times with the -m option.
</dd>
<dt>
-s

0 comments on commit ed513a2

Please sign in to comment.
You can’t perform that action at this time.