Skip to content
This repository has been archived by the owner on Aug 11, 2020. It is now read-only.

Commit

Permalink
Fix for S3 Scraper
Browse files Browse the repository at this point in the history
  • Loading branch information
cstamas committed Apr 25, 2013
1 parent 639b8dc commit 8777c5c
Showing 1 changed file with 5 additions and 4 deletions.
Expand Up @@ -80,6 +80,7 @@ protected List<String> diveIn( final ScrapeContext context, final Page page )
{
String prefix = null;
Page initialPage = page;
String initialPageUrl = page.getUrl();
if ( initialPage.getHttpResponse().getStatusLine().getStatusCode() != 200 )
{
// we probably have the NoSuchKey response from S3, usually when repo root is not in bucket root
Expand All @@ -93,16 +94,16 @@ protected List<String> diveIn( final ScrapeContext context, final Page page )
return null;
}
// repo.remoteUrl does not have query parameters...
String fixedUrl =
initialPageUrl =
context.getRemoteRepositoryRootUrl().substring( 0,
context.getRemoteRepositoryRootUrl().length() - prefix.length() );
getLogger().debug( "Retrying URL {} to scrape Amazon S3 hosted repository on remote URL {}", fixedUrl,
getLogger().debug( "Retrying URL {} to scrape Amazon S3 hosted repository on remote URL {}", initialPageUrl,
context.getRemoteRepositoryRootUrl() );
initialPage = Page.getPageFor( context, fixedUrl + "?prefix=" + prefix );
initialPage = Page.getPageFor( context, initialPageUrl + "?prefix=" + prefix );
}

final HashSet<String> entries = new HashSet<String>();
diveIn( context, initialPage, initialPage.getUrl(), prefix, entries );
diveIn( context, initialPage, initialPageUrl, prefix, entries );
return new ArrayList<String>( entries );
}

Expand Down

0 comments on commit 8777c5c

Please sign in to comment.