Skip to content

Commit

Permalink
Adding new tags metadataFileNameFormat and writeOutlinks in every set…
Browse files Browse the repository at this point in the history
…tinfs.xml
  • Loading branch information
bnfklm committed Jul 20, 2016
1 parent 43cde6c commit 3f8c68d
Show file tree
Hide file tree
Showing 23 changed files with 323 additions and 8 deletions.
1 change: 1 addition & 0 deletions archive/archive-test/tests/dk/netarkivet/test-settings.xml
Original file line number Original file line Diff line number Diff line change
Expand Up @@ -132,6 +132,7 @@ National Library.
</heritrix> </heritrix>
<metadata> <metadata>
<metadataFormat>arc</metadataFormat> <metadataFormat>arc</metadataFormat>
<metadataFileNameFormat>default</metadataFileNameFormat>
</metadata> </metadata>


</harvesting> </harvesting>
Expand Down
1 change: 1 addition & 0 deletions common/common-test/tests/dk/netarkivet/test-settings.xml
Original file line number Original file line Diff line number Diff line change
Expand Up @@ -132,6 +132,7 @@ National Library.
</heritrix> </heritrix>
<metadata> <metadata>
<metadataFormat>arc</metadataFormat> <metadataFormat>arc</metadataFormat>
<metadataFileNameFormat>default</metadataFileNameFormat>
</metadata> </metadata>


</harvesting> </harvesting>
Expand Down
Original file line number Original file line Diff line number Diff line change
Expand Up @@ -658,6 +658,7 @@
<fileHeader>[ARCHIVEFILE] [Opened] [Closed] [Size]</fileHeader> <fileHeader>[ARCHIVEFILE] [Opened] [Closed] [Size]</fileHeader>
</archiveFilesReport> </archiveFilesReport>
<metadataFormat>warc</metadataFormat> <metadataFormat>warc</metadataFormat>
<metadataFileNameFormat>default</metadataFileNameFormat>
</metadata> </metadata>
</harvesting> </harvesting>
<indexserver> <indexserver>
Expand Down
Original file line number Original file line Diff line number Diff line change
Expand Up @@ -654,6 +654,7 @@
<fileHeader>[ARCHIVEFILE] [Opened] [Closed] [Size]</fileHeader> <fileHeader>[ARCHIVEFILE] [Opened] [Closed] [Size]</fileHeader>
</archiveFilesReport> </archiveFilesReport>
<metadataFormat>warc</metadataFormat> <metadataFormat>warc</metadataFormat>
<metadataFileNameFormat>default</metadataFileNameFormat>
</metadata> </metadata>
</harvesting> </harvesting>
<indexserver> <indexserver>
Expand Down
Original file line number Original file line Diff line number Diff line change
Expand Up @@ -646,6 +646,7 @@
<fileHeader>[ARCHIVEFILE] [Opened] [Closed] [Size]</fileHeader> <fileHeader>[ARCHIVEFILE] [Opened] [Closed] [Size]</fileHeader>
</archiveFilesReport> </archiveFilesReport>
<metadataFormat>arc</metadataFormat> <metadataFormat>arc</metadataFormat>
<metadataFileNameFormat>default</metadataFileNameFormat>
</metadata> </metadata>
</harvesting> </harvesting>
<indexserver> <indexserver>
Expand Down
1 change: 1 addition & 0 deletions deploy/deploy-test/tests/dk/netarkivet/test-settings.xml
Original file line number Original file line Diff line number Diff line change
Expand Up @@ -132,6 +132,7 @@ National Library.
</heritrix> </heritrix>
<metadata> <metadata>
<metadataFormat>arc</metadataFormat> <metadataFormat>arc</metadataFormat>
<metadataFileNameFormat>default</metadataFileNameFormat>
</metadata> </metadata>


</harvesting> </harvesting>
Expand Down
Original file line number Original file line Diff line number Diff line change
Expand Up @@ -215,11 +215,8 @@ public void doOneCrawl(Job job, String origHarvestName, String origHarvestDesc,
if (usingWarcAsArchiveFormat()) { if (usingWarcAsArchiveFormat()) {
log.info("As we're using WARC as archiveFormat WarcInfoMetadata is now added to the template"); log.info("As we're using WARC as archiveFormat WarcInfoMetadata is now added to the template");
HeritrixTemplate ht = job.getOrderXMLdoc(); HeritrixTemplate ht = job.getOrderXMLdoc();
if (job.getContinuationOf() != null ) { ht.insertWarcInfoMetadata(job, origHarvestName, origHarvestSchedule,Settings.get(HarvesterSettings.PERFORMER));
ht.insertWarcInfoMetadata(job, origHarvestName, origHarvestSchedule, job.setOrderXMLDoc(ht);
Settings.get(HarvesterSettings.PERFORMER));
}
job.setOrderXMLDoc(ht);
} else { } else {
log.info("As we're using ARC as archiveFormat no WarcInfoMetadata was added to the template"); log.info("As we're using ARC as archiveFormat no WarcInfoMetadata was added to the template");
} }
Expand Down
Original file line number Original file line Diff line number Diff line change
Expand Up @@ -98,7 +98,13 @@ public static String getMetadataArchiveFileName(String jobID, Long harvestID) th
String collectionName = ""; String collectionName = "";
boolean isPrefix = false; boolean isPrefix = false;
//try to retrieve settings for prefixing or not metadata files //try to retrieve settings for prefixing or not metadata files
if("prefix".equals(Settings.get(HarvesterSettings.METADATA_FILENAME_FORMAT))) { String metadataFilenameFormat = "";
try {
metadataFilenameFormat = Settings.get(HarvesterSettings.METADATA_FILENAME_FORMAT);
} catch (UnknownID e) {
//nothing
}
if("prefix".equals(metadataFilenameFormat)) {
try { try {
//try to retrieve in both <heritrix> and <heritrix3> tags //try to retrieve in both <heritrix> and <heritrix3> tags
collectionName = Settings.get(HarvesterSettings.HERITRIX_METADATA_PREFIX_COLLECTION_NAME); collectionName = Settings.get(HarvesterSettings.HERITRIX_METADATA_PREFIX_COLLECTION_NAME);
Expand All @@ -107,7 +113,11 @@ public static String getMetadataArchiveFileName(String jobID, Long harvestID) th
} }
isPrefix = true; isPrefix = true;
} catch(UnknownID e) { } catch(UnknownID e) {
//nothing try {
collectionName = Settings.get(HarvesterSettings.HERITRIX3_METADATA_PREFIX_COLLECTION_NAME);
} catch(UnknownID f) {
//nothing
}
} }
} }
if (metadataFormat == 0) { if (metadataFormat == 0) {
Expand Down
Original file line number Original file line Diff line number Diff line change
Expand Up @@ -201,6 +201,7 @@ National Library.
</archiveFilesReport> </archiveFilesReport>
<metadataFormat>warc</metadataFormat> <metadataFormat>warc</metadataFormat>
<compression>false</compression> <compression>false</compression>
<metadataFileNameFormat>default</metadataFileNameFormat>
</metadata> </metadata>
</harvesting> </harvesting>
<indexserver> <indexserver>
Expand Down
Original file line number Original file line Diff line number Diff line change
Expand Up @@ -132,6 +132,7 @@ National Library.
</heritrix> </heritrix>
<metadata> <metadata>
<metadataFormat>arc</metadataFormat> <metadataFormat>arc</metadataFormat>
<metadataFileNameFormat>default</metadataFileNameFormat>
</metadata> </metadata>


</harvesting> </harvesting>
Expand Down
Original file line number Original file line Diff line number Diff line change
@@ -0,0 +1,23 @@
filedesc://IAH20040511211249-5.arc 0.0.0.0 20040511211249 text/plain 77
1 0 InternetArchive
URL IP-address Archive-date Content-type Archive-length


http://www.fyens.dk/picturecache/article/getpicture.php?pictureid=117716&Width=198 194.255.126.118 20040511211249 image/jpeg 6669
XXXXXXXXXXXXXXX
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
XXXXXXXXXXXXXX
XXXXXXXXXXXXXXXXX
XXXXXXXXXXXXXXXXXXXXXXXX

XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
XXXXXXXXXXXXXXXXXXXXXX
XX
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX

XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
Original file line number Original file line Diff line number Diff line change
@@ -0,0 +1,102 @@
filedesc://2-2-20051214154355-00000-kb-test-har-001.kb.dk.arc.open 0.0.0.0 20051214154355 text/plain 1295
1 1 InternetArchive
URL IP-address Archive-date Content-type Archive-length
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<arcmetadata xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:dcterms="http://purl.org/dc/terms/" xmlns:arc="http://archive.org/arc/1.0/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns="http://archive.org/arc/1.0/" xsi:schemaLocation="http://archive.org/arc/1.0/ http://www.archive.org/arc/1.0/arc.xsd">
<arc:software>Heritrix null http://crawler.archive.org</arc:software>
<arc:hostname>kb-test-har-001.kb.dk</arc:hostname>
<arc:ip>130.226.228.7</arc:ip>
<dcterms:isPartOf>default_orderxml</dcterms:isPartOf>
<dc:description>Default Profile</dc:description>
<arc:operator>Admin</arc:operator>
<ns0:date xmlns:ns0="http://purl.org/dc/elements/1.1/" xsi:type="dcterms:W3CDTF">2005-11-17T18:43:23+00:00</ns0:date>
<arc:http-header-user-agent>Mozilla/5.0 (compatible; heritrix/1.5.0-200506132127 +http://netarkivet.dk/website/info.html)</arc:http-header-user-agent>
<arc:http-header-from>netarkivet-svar@netarkivet.dk</arc:http-header-from>
<arc:robots>ignore</arc:robots>
<dc:format>ARC file version 1.1</dc:format>
<dcterms:conformsTo xsi:type="dcterms:URI">http://www.archive.org/web/researcher/ArcFileFormat.php</dcterms:conformsTo>
</arcmetadata>

dns:www.kb.dk 130.226.220.16 20051214154355 text/dns 55
20051214154355
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX

dns:www.netarkivet.dk 130.226.220.16 20051214154355 text/dns 59
20051214154355
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX

dns:www.sy-jonna.dk 130.226.220.16 20051214154355 text/dns 57
20051214154355
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX

dns:www.trineogkaare.dk 130.226.220.16 20051214154355 text/dns 56
20051214154355
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX

dns:www.sulnudu.dk 130.226.220.16 20051214154355 text/dns 52
20051214154355
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX

http://www.statsbiblioteket.dk/robots.txt 130.225.24.24 20051214154356 text/plain 645
HTTP/1.1 200 OK

XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
XXXXXXXXXXXXXXXXXXXXXXXXXXX
XXXXXXXXXXXXXXXXXXXX
XXXXXXXXXXXXXXXXXXX
XXXXXXXXXXXXXXXXX
XXXXXXXXXXXXXXXXXXXXXXXX

XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
X
XXXXXXXXXXXXX
XXXXXXXXXXXXXXXXXXXXX
XXXXXXXXXXXXXXXXXXXXXX
XXXXXXXXXXXXXXXXXXXXXXXXXXXX
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX

XXXXXXXXXXXXXXXXXXX
XXXXXXXXXXXXXXXXX
XXXXXXXXXXXXXXX
XXXXXXXXXXXXXXXXXXXXXXXXXXXXX
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX


http://www.sy-jonna.dk/robots.txt 195.47.247.71 20051214154356 text/html 347
HTTP/1.1 404 Not Found

XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
XXXXXXXXXXXXXX
XXXXXXXXXXXXXXXXX
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX

XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
XXXXXXXXXXXX
XXXXXXXXXXXXXXXXXXXXXXXXXXXX
XXXXXXXXXXXXX
XXXXXXXXXXXXXXXXXX
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
XXXXXXXXXXXXXX

http://www.pligtaflevering.dk/robots.txt 130.226.231.6 20051214154356 text/html 476
HTTP/1.1 404 Not Found

XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
XXXXXXXXXXXXXXXXXXX
XXXXXXXXXXXXXXXXX
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX

XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
XXXXXXXXXXXX
XXXXXXXXXXXXXXXXXXXXXXXXXXXX
XXXXXXXXXXXXX
XXXXXXXXXXXXXXXXXX
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
XXXXXX
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
XXXXXXXXXXXXXX

Binary file not shown.
Original file line number Original file line Diff line number Diff line change
@@ -0,0 +1,23 @@
filedesc://IAH20040511211249-5.arc 0.0.0.0 20040511211249 text/plain 77
1 0 InternetArchive
URL IP-address Archive-date Content-type Archive-length


http://www.fyens.dk/picturecache/article/getpicture.php?pictureid=117716&Width=198 194.255.126.118 20040511211249 image/jpeg 6669
XXXXXXXXXXXXXXX
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
XXXXXXXXXXXXXX
XXXXXXXXXXXXXXXXX
XXXXXXXXXXXXXXXXXXXXXXXX

XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
XXXXXXXXXXXXXXXXXXXXXX
XX
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX

XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
Original file line number Original file line Diff line number Diff line change
@@ -0,0 +1,23 @@
filedesc://IAH20040511211249-5.arc 0.0.0.0 20040511211249 text/plain 77
1 0 InternetArchive
URL IP-address Archive-date Content-type Archive-length


http://www.fyens.dk/picturecache/article/getpicture.php?pictureid=117716&Width=198 194.255.126.118 20040511211249 image/jpeg 6669
XXXXXXXXXXXXXXX
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
XXXXXXXXXXXXXX
XXXXXXXXXXXXXXXXX
XXXXXXXXXXXXXXXXXXXXXXXX

XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
XXXXXXXXXXXXXXXXXXXXXX
XX
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX

XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
Original file line number Original file line Diff line number Diff line change
@@ -0,0 +1,102 @@
filedesc://2-2-20051214154355-00000-kb-test-har-001.kb.dk.arc.open 0.0.0.0 20051214154355 text/plain 1295
1 1 InternetArchive
URL IP-address Archive-date Content-type Archive-length
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<arcmetadata xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:dcterms="http://purl.org/dc/terms/" xmlns:arc="http://archive.org/arc/1.0/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns="http://archive.org/arc/1.0/" xsi:schemaLocation="http://archive.org/arc/1.0/ http://www.archive.org/arc/1.0/arc.xsd">
<arc:software>Heritrix null http://crawler.archive.org</arc:software>
<arc:hostname>kb-test-har-001.kb.dk</arc:hostname>
<arc:ip>130.226.228.7</arc:ip>
<dcterms:isPartOf>default_orderxml</dcterms:isPartOf>
<dc:description>Default Profile</dc:description>
<arc:operator>Admin</arc:operator>
<ns0:date xmlns:ns0="http://purl.org/dc/elements/1.1/" xsi:type="dcterms:W3CDTF">2005-11-17T18:43:23+00:00</ns0:date>
<arc:http-header-user-agent>Mozilla/5.0 (compatible; heritrix/1.5.0-200506132127 +http://netarkivet.dk/website/info.html)</arc:http-header-user-agent>
<arc:http-header-from>netarkivet-svar@netarkivet.dk</arc:http-header-from>
<arc:robots>ignore</arc:robots>
<dc:format>ARC file version 1.1</dc:format>
<dcterms:conformsTo xsi:type="dcterms:URI">http://www.archive.org/web/researcher/ArcFileFormat.php</dcterms:conformsTo>
</arcmetadata>

dns:www.kb.dk 130.226.220.16 20051214154355 text/dns 55
20051214154355
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX

dns:www.netarkivet.dk 130.226.220.16 20051214154355 text/dns 59
20051214154355
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX

dns:www.sy-jonna.dk 130.226.220.16 20051214154355 text/dns 57
20051214154355
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX

dns:www.trineogkaare.dk 130.226.220.16 20051214154355 text/dns 56
20051214154355
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX

dns:www.sulnudu.dk 130.226.220.16 20051214154355 text/dns 52
20051214154355
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX

http://www.statsbiblioteket.dk/robots.txt 130.225.24.24 20051214154356 text/plain 645
HTTP/1.1 200 OK

XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
XXXXXXXXXXXXXXXXXXXXXXXXXXX
XXXXXXXXXXXXXXXXXXXX
XXXXXXXXXXXXXXXXXXX
XXXXXXXXXXXXXXXXX
XXXXXXXXXXXXXXXXXXXXXXXX

XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
X
XXXXXXXXXXXXX
XXXXXXXXXXXXXXXXXXXXX
XXXXXXXXXXXXXXXXXXXXXX
XXXXXXXXXXXXXXXXXXXXXXXXXXXX
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX

XXXXXXXXXXXXXXXXXXX
XXXXXXXXXXXXXXXXX
XXXXXXXXXXXXXXX
XXXXXXXXXXXXXXXXXXXXXXXXXXXXX
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX


http://www.sy-jonna.dk/robots.txt 195.47.247.71 20051214154356 text/html 347
HTTP/1.1 404 Not Found

XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
XXXXXXXXXXXXXX
XXXXXXXXXXXXXXXXX
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX

XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
XXXXXXXXXXXX
XXXXXXXXXXXXXXXXXXXXXXXXXXXX
XXXXXXXXXXXXX
XXXXXXXXXXXXXXXXXX
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
XXXXXXXXXXXXXX

http://www.pligtaflevering.dk/robots.txt 130.226.231.6 20051214154356 text/html 476
HTTP/1.1 404 Not Found

XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
XXXXXXXXXXXXXXXXXXX
XXXXXXXXXXXXXXXXX
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX

XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
XXXXXXXXXXXX
XXXXXXXXXXXXXXXXXXXXXXXXXXXX
XXXXXXXXXXXXX
XXXXXXXXXXXXXXXXXX
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
XXXXXX
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
XXXXXXXXXXXXXX

Binary file not shown.
Loading

0 comments on commit 3f8c68d

Please sign in to comment.