Skip to content
Permalink
Browse files

Adding new tags metadataFileNameFormat and writeOutlinks in every set…

…tinfs.xml
  • Loading branch information...
bnfklm committed Jul 20, 2016
1 parent 43cde6c commit 3f8c68d77c7adb7a5bede48bb0124ea1addc66bc
Showing with 323 additions and 8 deletions.
  1. +1 −0 archive/archive-test/tests/dk/netarkivet/test-settings.xml
  2. +1 −0 common/common-test/tests/dk/netarkivet/test-settings.xml
  3. +1 −0 deploy/deploy-core/src/main/resources/dk/netarkivet/deploy/complete_settings.xml
  4. +1 −0 deploy/deploy-test/src/test/resources/complete_settings.xml
  5. +1 −0 deploy/deploy-test/tests/dk/netarkivet/deploy/data/originals/complete_settings/complete_settings.xml
  6. +1 −0 deploy/deploy-test/tests/dk/netarkivet/test-settings.xml
  7. +2 −5 harvester/harvest-scheduler/src/main/java/dk/netarkivet/harvester/scheduler/JobDispatcher.java
  8. +12 −2 .../harvester-core/src/main/java/dk/netarkivet/harvester/harvesting/metadata/MetadataFileWriter.java
  9. +1 −0 harvester/harvester-core/src/main/resources/dk/netarkivet/harvester/settings.xml
  10. +1 −0 harvester/harvester-test/tests/dk/netarkivet/test-settings.xml
  11. +23 −0 ...jobs/lost-files-1469003338365/42-117-20051212141240-00000-sb-test-har-001.statsbiblioteket.dk.arc
  12. +102 −0 ...jobs/lost-files-1469003338365/42-117-20051212141241-00001-sb-test-har-001.statsbiblioteket.dk.arc
  13. BIN ...heritrix1/heritrix1-controller/oldjobs/lost-files-1469003338709/NetarchiveSuite-netarkivet.arc.gz
  14. +23 −0 ...jobs/lost-files-1469003338796/43-117-20051212141241-00000-sb-test-har-001.statsbiblioteket.dk.arc
  15. +23 −0 ...jobs/lost-files-1469013586009/42-117-20051212141240-00000-sb-test-har-001.statsbiblioteket.dk.arc
  16. +102 −0 ...jobs/lost-files-1469013586009/42-117-20051212141241-00001-sb-test-har-001.statsbiblioteket.dk.arc
  17. BIN ...heritrix1/heritrix1-controller/oldjobs/lost-files-1469013586423/NetarchiveSuite-netarkivet.arc.gz
  18. +23 −0 ...jobs/lost-files-1469013586512/43-117-20051212141241-00000-sb-test-har-001.statsbiblioteket.dk.arc
  19. +1 −1 ...trix1/heritrix1-controller/src/main/java/dk/netarkivet/harvester/tools/CreateCDXMetadataFile.java
  20. +1 −0 harvester/heritrix1/heritrix1-frontier/src/test/resources/dk/netarkivet/test-settings.xml
  21. +1 −0 .../heritrix3/heritrix3-controller/src/main/resources/dk/netarkivet/harvester/heritrix3/settings.xml
  22. +1 −0 monitor/monitor-test/tests/dk/netarkivet/test-settings.xml
  23. +1 −0 wayback/wayback-test/tests/dk/netarkivet/test-settings.xml
@@ -132,6 +132,7 @@ National Library.
</heritrix>
<metadata>
<metadataFormat>arc</metadataFormat>
<metadataFileNameFormat>default</metadataFileNameFormat>
</metadata>

</harvesting>
@@ -132,6 +132,7 @@ National Library.
</heritrix>
<metadata>
<metadataFormat>arc</metadataFormat>
<metadataFileNameFormat>default</metadataFileNameFormat>
</metadata>

</harvesting>
@@ -658,6 +658,7 @@
<fileHeader>[ARCHIVEFILE] [Opened] [Closed] [Size]</fileHeader>
</archiveFilesReport>
<metadataFormat>warc</metadataFormat>
<metadataFileNameFormat>default</metadataFileNameFormat>
</metadata>
</harvesting>
<indexserver>
@@ -654,6 +654,7 @@
<fileHeader>[ARCHIVEFILE] [Opened] [Closed] [Size]</fileHeader>
</archiveFilesReport>
<metadataFormat>warc</metadataFormat>
<metadataFileNameFormat>default</metadataFileNameFormat>
</metadata>
</harvesting>
<indexserver>
@@ -646,6 +646,7 @@
<fileHeader>[ARCHIVEFILE] [Opened] [Closed] [Size]</fileHeader>
</archiveFilesReport>
<metadataFormat>arc</metadataFormat>
<metadataFileNameFormat>default</metadataFileNameFormat>
</metadata>
</harvesting>
<indexserver>
@@ -132,6 +132,7 @@ National Library.
</heritrix>
<metadata>
<metadataFormat>arc</metadataFormat>
<metadataFileNameFormat>default</metadataFileNameFormat>
</metadata>

</harvesting>
@@ -215,11 +215,8 @@ public void doOneCrawl(Job job, String origHarvestName, String origHarvestDesc,
if (usingWarcAsArchiveFormat()) {
log.info("As we're using WARC as archiveFormat WarcInfoMetadata is now added to the template");
HeritrixTemplate ht = job.getOrderXMLdoc();
if (job.getContinuationOf() != null ) {
ht.insertWarcInfoMetadata(job, origHarvestName, origHarvestSchedule,
Settings.get(HarvesterSettings.PERFORMER));
}
job.setOrderXMLDoc(ht);
ht.insertWarcInfoMetadata(job, origHarvestName, origHarvestSchedule,Settings.get(HarvesterSettings.PERFORMER));
job.setOrderXMLDoc(ht);
} else {
log.info("As we're using ARC as archiveFormat no WarcInfoMetadata was added to the template");
}
@@ -98,7 +98,13 @@ public static String getMetadataArchiveFileName(String jobID, Long harvestID) th
String collectionName = "";
boolean isPrefix = false;
//try to retrieve settings for prefixing or not metadata files
if("prefix".equals(Settings.get(HarvesterSettings.METADATA_FILENAME_FORMAT))) {
String metadataFilenameFormat = "";
try {
metadataFilenameFormat = Settings.get(HarvesterSettings.METADATA_FILENAME_FORMAT);
} catch (UnknownID e) {
//nothing
}
if("prefix".equals(metadataFilenameFormat)) {
try {
//try to retrieve in both <heritrix> and <heritrix3> tags
collectionName = Settings.get(HarvesterSettings.HERITRIX_METADATA_PREFIX_COLLECTION_NAME);
@@ -107,7 +113,11 @@ public static String getMetadataArchiveFileName(String jobID, Long harvestID) th
}
isPrefix = true;
} catch(UnknownID e) {
//nothing
try {
collectionName = Settings.get(HarvesterSettings.HERITRIX3_METADATA_PREFIX_COLLECTION_NAME);
} catch(UnknownID f) {
//nothing
}
}
}
if (metadataFormat == 0) {
@@ -201,6 +201,7 @@ National Library.
</archiveFilesReport>
<metadataFormat>warc</metadataFormat>
<compression>false</compression>
<metadataFileNameFormat>default</metadataFileNameFormat>
</metadata>
</harvesting>
<indexserver>
@@ -132,6 +132,7 @@ National Library.
</heritrix>
<metadata>
<metadataFormat>arc</metadataFormat>
<metadataFileNameFormat>default</metadataFileNameFormat>
</metadata>

</harvesting>
@@ -0,0 +1,23 @@
filedesc://IAH20040511211249-5.arc 0.0.0.0 20040511211249 text/plain 77
1 0 InternetArchive
URL IP-address Archive-date Content-type Archive-length


http://www.fyens.dk/picturecache/article/getpicture.php?pictureid=117716&Width=198 194.255.126.118 20040511211249 image/jpeg 6669
XXXXXXXXXXXXXXX
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
XXXXXXXXXXXXXX
XXXXXXXXXXXXXXXXX
XXXXXXXXXXXXXXXXXXXXXXXX

XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
XXXXXXXXXXXXXXXXXXXXXX
XX
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX

XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
@@ -0,0 +1,102 @@
filedesc://2-2-20051214154355-00000-kb-test-har-001.kb.dk.arc.open 0.0.0.0 20051214154355 text/plain 1295
1 1 InternetArchive
URL IP-address Archive-date Content-type Archive-length
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<arcmetadata xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:dcterms="http://purl.org/dc/terms/" xmlns:arc="http://archive.org/arc/1.0/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns="http://archive.org/arc/1.0/" xsi:schemaLocation="http://archive.org/arc/1.0/ http://www.archive.org/arc/1.0/arc.xsd">
<arc:software>Heritrix null http://crawler.archive.org</arc:software>
<arc:hostname>kb-test-har-001.kb.dk</arc:hostname>
<arc:ip>130.226.228.7</arc:ip>
<dcterms:isPartOf>default_orderxml</dcterms:isPartOf>
<dc:description>Default Profile</dc:description>
<arc:operator>Admin</arc:operator>
<ns0:date xmlns:ns0="http://purl.org/dc/elements/1.1/" xsi:type="dcterms:W3CDTF">2005-11-17T18:43:23+00:00</ns0:date>
<arc:http-header-user-agent>Mozilla/5.0 (compatible; heritrix/1.5.0-200506132127 +http://netarkivet.dk/website/info.html)</arc:http-header-user-agent>
<arc:http-header-from>netarkivet-svar@netarkivet.dk</arc:http-header-from>
<arc:robots>ignore</arc:robots>
<dc:format>ARC file version 1.1</dc:format>
<dcterms:conformsTo xsi:type="dcterms:URI">http://www.archive.org/web/researcher/ArcFileFormat.php</dcterms:conformsTo>
</arcmetadata>

dns:www.kb.dk 130.226.220.16 20051214154355 text/dns 55
20051214154355
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX

dns:www.netarkivet.dk 130.226.220.16 20051214154355 text/dns 59
20051214154355
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX

dns:www.sy-jonna.dk 130.226.220.16 20051214154355 text/dns 57
20051214154355
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX

dns:www.trineogkaare.dk 130.226.220.16 20051214154355 text/dns 56
20051214154355
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX

dns:www.sulnudu.dk 130.226.220.16 20051214154355 text/dns 52
20051214154355
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX

http://www.statsbiblioteket.dk/robots.txt 130.225.24.24 20051214154356 text/plain 645
HTTP/1.1 200 OK

XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
XXXXXXXXXXXXXXXXXXXXXXXXXXX
XXXXXXXXXXXXXXXXXXXX
XXXXXXXXXXXXXXXXXXX
XXXXXXXXXXXXXXXXX
XXXXXXXXXXXXXXXXXXXXXXXX

XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
X
XXXXXXXXXXXXX
XXXXXXXXXXXXXXXXXXXXX
XXXXXXXXXXXXXXXXXXXXXX
XXXXXXXXXXXXXXXXXXXXXXXXXXXX
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX

XXXXXXXXXXXXXXXXXXX
XXXXXXXXXXXXXXXXX
XXXXXXXXXXXXXXX
XXXXXXXXXXXXXXXXXXXXXXXXXXXXX
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX


http://www.sy-jonna.dk/robots.txt 195.47.247.71 20051214154356 text/html 347
HTTP/1.1 404 Not Found

XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
XXXXXXXXXXXXXX
XXXXXXXXXXXXXXXXX
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX

XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
XXXXXXXXXXXX
XXXXXXXXXXXXXXXXXXXXXXXXXXXX
XXXXXXXXXXXXX
XXXXXXXXXXXXXXXXXX
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
XXXXXXXXXXXXXX

http://www.pligtaflevering.dk/robots.txt 130.226.231.6 20051214154356 text/html 476
HTTP/1.1 404 Not Found

XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
XXXXXXXXXXXXXXXXXXX
XXXXXXXXXXXXXXXXX
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX

XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
XXXXXXXXXXXX
XXXXXXXXXXXXXXXXXXXXXXXXXXXX
XXXXXXXXXXXXX
XXXXXXXXXXXXXXXXXX
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
XXXXXX
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
XXXXXXXXXXXXXX

Binary file not shown.
@@ -0,0 +1,23 @@
filedesc://IAH20040511211249-5.arc 0.0.0.0 20040511211249 text/plain 77
1 0 InternetArchive
URL IP-address Archive-date Content-type Archive-length


http://www.fyens.dk/picturecache/article/getpicture.php?pictureid=117716&Width=198 194.255.126.118 20040511211249 image/jpeg 6669
XXXXXXXXXXXXXXX
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
XXXXXXXXXXXXXX
XXXXXXXXXXXXXXXXX
XXXXXXXXXXXXXXXXXXXXXXXX

XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
XXXXXXXXXXXXXXXXXXXXXX
XX
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX

XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
@@ -0,0 +1,23 @@
filedesc://IAH20040511211249-5.arc 0.0.0.0 20040511211249 text/plain 77
1 0 InternetArchive
URL IP-address Archive-date Content-type Archive-length


http://www.fyens.dk/picturecache/article/getpicture.php?pictureid=117716&Width=198 194.255.126.118 20040511211249 image/jpeg 6669
XXXXXXXXXXXXXXX
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
XXXXXXXXXXXXXX
XXXXXXXXXXXXXXXXX
XXXXXXXXXXXXXXXXXXXXXXXX

XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
XXXXXXXXXXXXXXXXXXXXXX
XX
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX

XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
@@ -0,0 +1,102 @@
filedesc://2-2-20051214154355-00000-kb-test-har-001.kb.dk.arc.open 0.0.0.0 20051214154355 text/plain 1295
1 1 InternetArchive
URL IP-address Archive-date Content-type Archive-length
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<arcmetadata xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:dcterms="http://purl.org/dc/terms/" xmlns:arc="http://archive.org/arc/1.0/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns="http://archive.org/arc/1.0/" xsi:schemaLocation="http://archive.org/arc/1.0/ http://www.archive.org/arc/1.0/arc.xsd">
<arc:software>Heritrix null http://crawler.archive.org</arc:software>
<arc:hostname>kb-test-har-001.kb.dk</arc:hostname>
<arc:ip>130.226.228.7</arc:ip>
<dcterms:isPartOf>default_orderxml</dcterms:isPartOf>
<dc:description>Default Profile</dc:description>
<arc:operator>Admin</arc:operator>
<ns0:date xmlns:ns0="http://purl.org/dc/elements/1.1/" xsi:type="dcterms:W3CDTF">2005-11-17T18:43:23+00:00</ns0:date>
<arc:http-header-user-agent>Mozilla/5.0 (compatible; heritrix/1.5.0-200506132127 +http://netarkivet.dk/website/info.html)</arc:http-header-user-agent>
<arc:http-header-from>netarkivet-svar@netarkivet.dk</arc:http-header-from>
<arc:robots>ignore</arc:robots>
<dc:format>ARC file version 1.1</dc:format>
<dcterms:conformsTo xsi:type="dcterms:URI">http://www.archive.org/web/researcher/ArcFileFormat.php</dcterms:conformsTo>
</arcmetadata>

dns:www.kb.dk 130.226.220.16 20051214154355 text/dns 55
20051214154355
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX

dns:www.netarkivet.dk 130.226.220.16 20051214154355 text/dns 59
20051214154355
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX

dns:www.sy-jonna.dk 130.226.220.16 20051214154355 text/dns 57
20051214154355
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX

dns:www.trineogkaare.dk 130.226.220.16 20051214154355 text/dns 56
20051214154355
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX

dns:www.sulnudu.dk 130.226.220.16 20051214154355 text/dns 52
20051214154355
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX

http://www.statsbiblioteket.dk/robots.txt 130.225.24.24 20051214154356 text/plain 645
HTTP/1.1 200 OK

XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
XXXXXXXXXXXXXXXXXXXXXXXXXXX
XXXXXXXXXXXXXXXXXXXX
XXXXXXXXXXXXXXXXXXX
XXXXXXXXXXXXXXXXX
XXXXXXXXXXXXXXXXXXXXXXXX

XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
X
XXXXXXXXXXXXX
XXXXXXXXXXXXXXXXXXXXX
XXXXXXXXXXXXXXXXXXXXXX
XXXXXXXXXXXXXXXXXXXXXXXXXXXX
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX

XXXXXXXXXXXXXXXXXXX
XXXXXXXXXXXXXXXXX
XXXXXXXXXXXXXXX
XXXXXXXXXXXXXXXXXXXXXXXXXXXXX
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX


http://www.sy-jonna.dk/robots.txt 195.47.247.71 20051214154356 text/html 347
HTTP/1.1 404 Not Found

XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
XXXXXXXXXXXXXX
XXXXXXXXXXXXXXXXX
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX

XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
XXXXXXXXXXXX
XXXXXXXXXXXXXXXXXXXXXXXXXXXX
XXXXXXXXXXXXX
XXXXXXXXXXXXXXXXXX
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
XXXXXXXXXXXXXX

http://www.pligtaflevering.dk/robots.txt 130.226.231.6 20051214154356 text/html 476
HTTP/1.1 404 Not Found

XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
XXXXXXXXXXXXXXXXXXX
XXXXXXXXXXXXXXXXX
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX

XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
XXXXXXXXXXXX
XXXXXXXXXXXXXXXXXXXXXXXXXXXX
XXXXXXXXXXXXX
XXXXXXXXXXXXXXXXXX
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
XXXXXX
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
XXXXXXXXXXXXXX

Binary file not shown.

0 comments on commit 3f8c68d

Please sign in to comment.
You can’t perform that action at this time.