Skip to content

Commit

Permalink
Add single file download.
Browse files Browse the repository at this point in the history
  • Loading branch information
Tommy Ingulfsen committed May 30, 2017
1 parent 2c2fa77 commit dbc1753
Show file tree
Hide file tree
Showing 4 changed files with 37 additions and 1 deletion.
6 changes: 6 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -78,3 +78,9 @@ Download all crawl files created after 2014:
Download crawl files created before 2012, into /tmp/:

`./build/install/wasapi-downloader/bin/wasapi-downloader --crawlStartBefore 2012-01-01 --outputBaseDir /tmp/`

Download a single file:

`./build/install/wasapi-downloader/bin/wasapi-downloader --filename ARCHIVEIT-5425-MONTHLY-JOB302671-20170526114117181-00049.warc.gz`

**Note:** When a `--filename` argument is present, all other request parameters (crawl start/end, collection ID, job ID) are ignored.
7 changes: 7 additions & 0 deletions src/edu/stanford/dlss/was/WasapiDownloader.java
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,13 @@ private String getFileSetRequestUrl() {

private List<String> requestParams() {
List<String> params = new ArrayList<String>();

// If a filename is provided, other arguments are ignored
if (settings.filename() != null) {
params.add("filename=" + settings.filename());
return params;
}

if (settings.collectionId() != null)
params.add("collection=" + settings.collectionId());
if (settings.crawlStartAfter() != null)
Expand Down
8 changes: 7 additions & 1 deletion src/edu/stanford/dlss/was/WasapiDownloaderSettings.java
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ public class WasapiDownloaderSettings {
public static final String CRAWL_START_BEFORE_PARAM_NAME = "crawlStartBefore";
public static final String JOB_ID_LOWER_BOUND_PARAM_NAME = "jobIdLowerBound";
public static final String OUTPUT_BASE_DIR_PARAM_NAME = "outputBaseDir";
public static final String FILENAME_PARAM_NAME = "filename";

private HelpFormatter helpFormatter;
private static Options wdsOpts;
Expand All @@ -59,7 +60,8 @@ public class WasapiDownloaderSettings {
buildArgOption(CRAWL_START_AFTER_PARAM_NAME, "only download crawl files created after this date"),
buildArgOption(CRAWL_START_BEFORE_PARAM_NAME, "only download crawl files created before this date"),
buildArgOption(JOB_ID_LOWER_BOUND_PARAM_NAME, "\"last crawl downloaded\": only download crawl files with a higher job ID (not inclusive)"),
buildArgOption(OUTPUT_BASE_DIR_PARAM_NAME, "destination directory for downloaded WARC files")
buildArgOption(OUTPUT_BASE_DIR_PARAM_NAME, "destination directory for downloaded WARC files"),
buildArgOption(FILENAME_PARAM_NAME, "single filename to download")
};

static {
Expand Down Expand Up @@ -134,6 +136,10 @@ public String outputBaseDir() {
return settings.getProperty(OUTPUT_BASE_DIR_PARAM_NAME);
}

public String filename() {
return settings.getProperty(FILENAME_PARAM_NAME);
}

public String getHelpAndSettingsMessage() {
if (helpAndSettingsMessage == null)
helpAndSettingsMessage = new StringBuilder(getCliHelpMessageCharSeq()).append(getSettingsSummaryCharSeq()).toString();
Expand Down
17 changes: 17 additions & 0 deletions test/edu/stanford/dlss/was/TestWasapiDownloader.java
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,23 @@ public void main_executesFileSetRequest_onlyUsesArgsSettings() throws Exception
verify(mockConn, Mockito.never()).jsonQuery(ArgumentMatchers.contains("crawl-start-before="));
}

@Test
public void main_singleFileDownload_onlyUsesFilename() throws Exception {
String[] args = {"--collectionId", "123", "--filename", "ARCHIVEIT-5425-MONTHLY-JOB302671-20170526114117181-00049.warc.gz" };
WasapiConnection mockConn = Mockito.mock(WasapiConnection.class);
Mockito.when(mockConn.jsonQuery(anyString())).thenReturn(null);
WasapiDownloader downloaderSpy = PowerMockito.spy(new WasapiDownloader(WasapiDownloader.SETTINGS_FILE_LOCATION, args));
PowerMockito.doReturn(mockConn).when(downloaderSpy).getWasapiConn();
PowerMockito.whenNew(WasapiDownloader.class).withAnyArguments().thenReturn(downloaderSpy);

WasapiDownloader.main(args);
verify(mockConn).jsonQuery(ArgumentMatchers.contains("filename=ARCHIVEIT-5425-MONTHLY-JOB302671-20170526114117181-00049.warc.gz"));
verify(mockConn, Mockito.never()).jsonQuery(ArgumentMatchers.contains("crawl="));
verify(mockConn, Mockito.never()).jsonQuery(ArgumentMatchers.contains("crawl-start-after="));
verify(mockConn, Mockito.never()).jsonQuery(ArgumentMatchers.contains("crawl-start-before="));
verify(mockConn, Mockito.never()).jsonQuery(ArgumentMatchers.contains("collection="));
}

@Test
public void downloadSelectedWarcs_requestsFileSetResponse() throws Exception {
WasapiConnection mockConn = Mockito.mock(WasapiConnection.class);
Expand Down

0 comments on commit dbc1753

Please sign in to comment.