-
Notifications
You must be signed in to change notification settings - Fork 5
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
malfunct
committed
Apr 11, 2018
1 parent
e0cdc38
commit 14e6ecf
Showing
8 changed files
with
395 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,22 @@ | ||
|
||
Microsoft Visual Studio Solution File, Format Version 12.00 | ||
# Visual Studio 15 | ||
VisualStudioVersion = 15.0.26206.0 | ||
MinimumVisualStudioVersion = 10.0.40219.1 | ||
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "SyncCollection", "SyncCollection\SyncCollection.csproj", "{D08AC687-571B-44E1-8BD0-4BA69152467C}" | ||
EndProject | ||
Global | ||
GlobalSection(SolutionConfigurationPlatforms) = preSolution | ||
Debug|Any CPU = Debug|Any CPU | ||
Release|Any CPU = Release|Any CPU | ||
EndGlobalSection | ||
GlobalSection(ProjectConfigurationPlatforms) = postSolution | ||
{D08AC687-571B-44E1-8BD0-4BA69152467C}.Debug|Any CPU.ActiveCfg = Debug|Any CPU | ||
{D08AC687-571B-44E1-8BD0-4BA69152467C}.Debug|Any CPU.Build.0 = Debug|Any CPU | ||
{D08AC687-571B-44E1-8BD0-4BA69152467C}.Release|Any CPU.ActiveCfg = Release|Any CPU | ||
{D08AC687-571B-44E1-8BD0-4BA69152467C}.Release|Any CPU.Build.0 = Release|Any CPU | ||
EndGlobalSection | ||
GlobalSection(SolutionProperties) = preSolution | ||
HideSolutionNode = FALSE | ||
EndGlobalSection | ||
EndGlobal |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
<?xml version="1.0" encoding="utf-8" ?> | ||
<configuration> | ||
<startup> | ||
<supportedRuntime version="v4.0" sku=".NETFramework,Version=v4.5.2" /> | ||
</startup> | ||
</configuration> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,45 @@ | ||
using System; | ||
using System.Collections.Generic; | ||
using System.Linq; | ||
using System.Text; | ||
using System.Threading.Tasks; | ||
|
||
namespace SyncCollection | ||
{ | ||
class InternetArchiveSearchResult | ||
{ | ||
public ResponseHeader responseHeader { get; set; } | ||
public Response response { get; set; } | ||
} | ||
|
||
public class Params | ||
{ | ||
public string q { get; set; } | ||
public string qin { get; set; } | ||
public string fl { get; set; } | ||
public string wt { get; set; } | ||
public string sort { get; set; } | ||
public string rows { get; set; } | ||
public int start { get; set; } | ||
} | ||
|
||
public class ResponseHeader | ||
{ | ||
public int status { get; set; } | ||
public int QTime { get; set; } | ||
public Params @params { get; set; } | ||
} | ||
|
||
public class Doc | ||
{ | ||
public string identifier { get; set; } | ||
public List<DateTime> oai_updatedate { get; set; } | ||
} | ||
|
||
public class Response | ||
{ | ||
public int numFound { get; set; } | ||
public int start { get; set; } | ||
public List<Doc> docs { get; set; } | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,203 @@ | ||
using Newtonsoft.Json; | ||
using System; | ||
using System.Collections.Generic; | ||
using System.IO; | ||
using System.Linq; | ||
using System.Net; | ||
using System.Net.Http; | ||
using System.Net.Http.Headers; | ||
using System.Threading.Tasks; | ||
|
||
namespace SyncCollection | ||
{ | ||
class Program | ||
{ | ||
const string Collection = "apple_ii_library_4am"; | ||
const string Rows = "2000"; | ||
|
||
static void Main(string[] args) | ||
{ | ||
string collection = Collection; | ||
if (args.Length > 0) | ||
{ | ||
collection = args[0]; | ||
} | ||
else | ||
{ | ||
Console.WriteLine($"Using default collection {collection}"); | ||
} | ||
|
||
var task = MainAsync(collection); | ||
task.Wait(); | ||
} | ||
|
||
static async Task MainAsync(string collection) | ||
{ | ||
var searchResults = await GetSearchResults(collection); | ||
|
||
var localFileList = GetListOfAlreadyDownloadedFiles(collection); | ||
|
||
DownloadFiles(searchResults, localFileList, collection); | ||
} | ||
|
||
private static void ArchiveOldDownloadList(string collection) | ||
{ | ||
string fileListPath = Path.Combine(collection, "fileList.txt"); | ||
string filePathOldList = Path.Combine(collection, "fileListOld.txt"); | ||
|
||
if (File.Exists(fileListPath)) | ||
{ | ||
if (File.Exists(filePathOldList)) | ||
{ | ||
File.Delete(filePathOldList); | ||
} | ||
|
||
File.Move(fileListPath, filePathOldList); | ||
} | ||
} | ||
|
||
private static void DownloadFiles(Dictionary<string, DateTime> searchResults, | ||
Dictionary<string, DateTime> localFileList, string collection) | ||
{ | ||
ArchiveOldDownloadList(collection); | ||
|
||
Dictionary<string, DateTime> updatedFileList = new Dictionary<string, DateTime>(localFileList); | ||
string currentlyDownloading = null; | ||
|
||
try | ||
{ | ||
string resourceBase = "https://archive.org/compress"; | ||
WebClient client = new WebClient(); | ||
|
||
foreach (var indicatorToDownload in searchResults.Keys) | ||
{ | ||
if (!localFileList.ContainsKey(indicatorToDownload) || searchResults[indicatorToDownload] > | ||
localFileList[indicatorToDownload]) | ||
{ | ||
currentlyDownloading = $"{collection}/{indicatorToDownload}.zip"; | ||
|
||
var url = $"{resourceBase}/{indicatorToDownload}"; | ||
|
||
Console.WriteLine("Downloading {0}", currentlyDownloading); | ||
Console.WriteLine("Downloading from {0}", url); | ||
|
||
bool success = false; | ||
try | ||
{ | ||
client.DownloadFile(url, currentlyDownloading); | ||
success = true; | ||
} | ||
catch (WebException e) | ||
{ | ||
// Just skip webexceptions and clean up so we can | ||
// download as much of the collection as possible | ||
Console.WriteLine("Error while downloading {0}", e.Message); | ||
|
||
// delete failed download | ||
if (File.Exists(currentlyDownloading)) | ||
{ | ||
File.Delete(currentlyDownloading); | ||
} | ||
|
||
UpdateLocalFileList(updatedFileList, collection); | ||
} | ||
|
||
if (success) | ||
{ | ||
currentlyDownloading = null; | ||
updatedFileList[indicatorToDownload] = searchResults[indicatorToDownload]; | ||
|
||
UpdateLocalFileList(updatedFileList, collection); //we churn this file a lot so we don't lose much state if process interrupted | ||
} | ||
System.Threading.Thread.Sleep(500); //forcing a sleep to be nice to archive.org? | ||
} | ||
} | ||
} | ||
catch (Exception exc) | ||
{ | ||
Console.WriteLine("Error while downloading {0}", exc.Message); | ||
} | ||
finally | ||
{ | ||
// delete failed download | ||
if (currentlyDownloading != null && File.Exists(currentlyDownloading)) | ||
{ | ||
File.Delete(currentlyDownloading); | ||
} | ||
|
||
UpdateLocalFileList(updatedFileList, collection); | ||
} | ||
} | ||
|
||
private static void UpdateLocalFileList(Dictionary<string, DateTime> updatedFileList, string collection) | ||
{ | ||
var filePath = Path.Combine(collection, "fileList.txt"); | ||
if (File.Exists(filePath)) | ||
{ | ||
File.Delete(filePath); | ||
} | ||
|
||
using (var fileWriter = new StreamWriter(filePath)) | ||
{ | ||
foreach (var key in updatedFileList.Keys) | ||
{ | ||
fileWriter.WriteLine($"{key}\t{updatedFileList[key]}"); | ||
} | ||
} | ||
} | ||
|
||
private static Dictionary<string, DateTime> GetListOfAlreadyDownloadedFiles(string collection) | ||
{ | ||
Dictionary<string, DateTime> localFileList = new Dictionary<string, DateTime>(5000); | ||
|
||
if (!Directory.Exists(collection)) | ||
{ | ||
Directory.CreateDirectory(collection); | ||
} | ||
|
||
string fileListPath = Path.Combine(collection, "fileList.txt"); | ||
|
||
if (File.Exists(fileListPath)) | ||
{ | ||
foreach (var line in File.ReadAllLines(fileListPath)) | ||
{ | ||
var split = line.Split('\t'); | ||
localFileList[split[0]] = DateTime.Parse(split[1]); | ||
} | ||
} | ||
return localFileList; | ||
} | ||
|
||
private static async Task<Dictionary<string, DateTime>> GetSearchResults(string collection) | ||
{ | ||
var url = | ||
$"https://archive.org/advancedsearch.php?q=collection%3A{collection}&fl%5B%5D=identifier&fl%5B%5D=oai_updatedate&sort%5B%5D=identifier+asc&sort%5B%5D=&sort%5B%5D=&rows={Rows}&page=1&output=json"; | ||
|
||
var httpClient = new HttpClient(); | ||
httpClient.DefaultRequestHeaders.Accept.Clear(); | ||
httpClient.DefaultRequestHeaders.Accept.Add(new MediaTypeWithQualityHeaderValue("application/json")); | ||
|
||
var response = await httpClient.GetAsync(url); | ||
|
||
if (!response.IsSuccessStatusCode) | ||
{ | ||
Console.WriteLine("Request for collection list failed with the error: ({0}) {1}", response.StatusCode, | ||
response.ReasonPhrase); | ||
throw new Exception("Request Failed"); | ||
} | ||
|
||
var jsonResult = await response.Content.ReadAsStringAsync(); | ||
|
||
var searchResult = JsonConvert.DeserializeObject<InternetArchiveSearchResult>(jsonResult); | ||
|
||
Dictionary<string, DateTime> searchResultPairs = new Dictionary<string, DateTime>(5000); | ||
|
||
foreach (var docDescriptor in searchResult.response.docs) | ||
{ | ||
searchResultPairs[docDescriptor.identifier] = | ||
docDescriptor.oai_updatedate.Last(); //Last element in oai_updatedate is the "Updated" date | ||
} | ||
return searchResultPairs; | ||
} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,36 @@ | ||
using System.Reflection; | ||
using System.Runtime.CompilerServices; | ||
using System.Runtime.InteropServices; | ||
|
||
// General Information about an assembly is controlled through the following | ||
// set of attributes. Change these attribute values to modify the information | ||
// associated with an assembly. | ||
[assembly: AssemblyTitle("Sync4am")] | ||
[assembly: AssemblyDescription("")] | ||
[assembly: AssemblyConfiguration("")] | ||
[assembly: AssemblyCompany("")] | ||
[assembly: AssemblyProduct("Sync4am")] | ||
[assembly: AssemblyCopyright("Copyright © 2017")] | ||
[assembly: AssemblyTrademark("")] | ||
[assembly: AssemblyCulture("")] | ||
|
||
// Setting ComVisible to false makes the types in this assembly not visible | ||
// to COM components. If you need to access a type in this assembly from | ||
// COM, set the ComVisible attribute to true on that type. | ||
[assembly: ComVisible(false)] | ||
|
||
// The following GUID is for the ID of the typelib if this project is exposed to COM | ||
[assembly: Guid("d08ac687-571b-44e1-8bd0-4ba69152467c")] | ||
|
||
// Version information for an assembly consists of the following four values: | ||
// | ||
// Major Version | ||
// Minor Version | ||
// Build Number | ||
// Revision | ||
// | ||
// You can specify all the values or you can default the Build and Revision Numbers | ||
// by using the '*' as shown below: | ||
// [assembly: AssemblyVersion("1.0.*")] | ||
[assembly: AssemblyVersion("1.0.0.0")] | ||
[assembly: AssemblyFileVersion("1.0.0.0")] |
Oops, something went wrong.
14e6ecf
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The explanation on your website is not really helping how to use the Downloader, that is why I can't download any other Files than the Default ones.
14e6ecf
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I have updated the readme and hopefully the added detail helps.