-
Notifications
You must be signed in to change notification settings - Fork 2
/
ChunkDownloadLargeFileAndUploadToBlob.cs
202 lines (169 loc) · 8.2 KB
/
ChunkDownloadLargeFileAndUploadToBlob.cs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
using Microsoft.Azure;
using Microsoft.WindowsAzure.Storage;
using Microsoft.WindowsAzure.Storage.Blob;
using System;
using System.Collections.Generic;
using System.Diagnostics;
using System.IO;
using System.Linq;
using System.Net;
using System.Net.Http;
using System.Security.Cryptography;
using System.Text;
using System.Threading.Tasks;
namespace AzureSampleConsoleApp1
{
public class ChunkDownloadLargeFileAndUploadToBlob
{
/// <summary>
/// Entry point.
/// </summary>
public static void Main()
{
var largeFileProcessor = new LargeFileProcessor();
largeFileProcessor.ProcessLargeFile().Wait();
Console.ReadKey();
}
}
public class LargeFileProcessor
{
/// <summary>
/// Logger instance.
/// </summary>
private ILogger logger = new Logger();
/// <summary>
/// Retry count.
/// </summary>
private int retryCount = 5;
/// <summary>
/// Time delay for retry.
/// </summary>
private TimeSpan delay = TimeSpan.FromSeconds(10);
/// <summary>
/// Download Large File as chunk and upload as chunk into BLOB.
/// </summary>
public async Task ProcessLargeFile()
{
// Create Storage account reference.
CloudStorageAccount storageAccount = CloudStorageAccount.Parse(CloudConfigurationManager.GetSetting("StorageAccount"));
// Create the blob client.
CloudBlobClient blobClient = storageAccount.CreateCloudBlobClient();
// Retrieve reference to a container.
CloudBlobContainer container = blobClient.GetContainerReference(CloudConfigurationManager.GetSetting("ContainerName"));
container.CreateIfNotExists();
// Create Blob reference.
CloudBlockBlob blob = container.GetBlockBlobReference(CloudConfigurationManager.GetSetting("BlobFileName"));
string urlToDownload = CloudConfigurationManager.GetSetting("DownloadURL"); // Provide valid URL from where the large file can be downloaded.
Stopwatch stopwatch = Stopwatch.StartNew();
try
{
using (HttpClient httpClient = new HttpClient())
{
var httpRequestMessage = new HttpRequestMessage(HttpMethod.Get, new Uri(urlToDownload))
{
// To avoid error related to 'An existing connection was forcibly closed by the remote host'. Use Http1.0 instead of Http1.1.
Version = HttpVersion.Version10
};
using (HttpResponseMessage response = await httpClient.SendAsync(httpRequestMessage, HttpCompletionOption.ResponseHeadersRead).ConfigureAwait(false))
{
using (Stream stream = await response.Content.ReadAsStreamAsync().ConfigureAwait(false))
{
const int pageSizeInBytes = 104857600; // 100MB. As Blob chunk max size is 100MB as of now.
var blockIds = new List<string>();
var sha256 = new SHA256Managed();
var bytesRemaing = response.Content.Headers.ContentLength.Value; // Read Total file size from the header.
int blockIdentifier = 0;
while (bytesRemaing > 0)
{
blockIdentifier++;
var bytesToCopy = (int)Math.Min(bytesRemaing, pageSizeInBytes);
var bytesToSend = new byte[bytesToCopy];
var bytesCountRead = await ReadStreamAndAccumulate(stream, bytesToSend, bytesToCopy);
// Instead of calculating bytes remaining to exit the While loop, we can use bytesCountRead as bytesCountRead will be 0 when there are no more bytes to read form the stream.
bytesRemaing -= bytesCountRead;
this.logger.WriteLine($"bytes read: {bytesCountRead}");
this.logger.WriteLine($"bytes remaining: {bytesRemaing}");
string base64BlockId = Convert.ToBase64String(ASCIIEncoding.ASCII.GetBytes(string.Format("largefile1BlockId{0}", blockIdentifier.ToString("0000000"))));
blockIds.Add(base64BlockId);
// Calculate the checksum value.
if (bytesRemaing <= 0)
{
sha256.TransformFinalBlock(bytesToSend, 0, bytesCountRead);
}
else
{
sha256.TransformBlock(bytesToSend, 0, bytesCountRead, bytesToSend, 0);
}
await blob.PutBlockAsync(base64BlockId, new MemoryStream(bytesToSend), null);
}
var checksum = BitConverter.ToString(sha256.Hash).Replace("-", string.Empty);
this.logger.WriteLine($"Hash value is : {checksum}");
await blob.PutBlockListAsync(blockIds);
await Task.FromResult(0);
}
}
}
}
catch (Exception ex)
{
this.logger.WriteLine(ex.Message);
throw;
}
finally
{
stopwatch.Stop();
this.logger.WriteLine($"Execution time in mins: {stopwatch.Elapsed.TotalMinutes}");
}
}
/// <summary>
/// Read the stream and accumulate till it reaches the number of bytes specified to copy.
/// </summary>
/// <param name="stream">Stream to be read from.</param>
/// <param name="bytesToSend">Target byte array that holds the bytes read.</param>
/// <param name="bytesCountToCopy">The number of bytes to be copied.</param>
/// <returns>The number of bytes read.</returns>
private async Task<int> ReadStreamAndAccumulate(Stream stream, byte[] bytesToSend, int bytesCountToCopy)
{
int bytesReadSoFar = 0;
while (bytesReadSoFar < bytesCountToCopy)
{
var currentBytesCountRead = await ReadStreamWithRetry(stream, bytesToSend, bytesCountToCopy - bytesReadSoFar, bytesReadSoFar).ConfigureAwait(false);
bytesReadSoFar += currentBytesCountRead;
}
return bytesReadSoFar;
}
/// <summary>
/// Reads the stream with retry when failed.
/// </summary>
/// <param name="stream">Stream to be read from.</param>
/// <param name="bytesToSend">Target byte array that holds the bytes read.</param>
/// <param name="bytesCountToCopy">The number of bytes to be copied.</param>
/// <param name="offset">The byte offset in buffer at which to begin writing data from the stream.</param>
/// <returns>The number of bytes read.</returns>
private async Task<int> ReadStreamWithRetry(Stream stream, byte[] bytesToSend, int bytesCountToCopy, int offset)
{
int currentRetry = 0;
for (; ; )
{
try
{
var bytesRead = await stream.ReadAsync(bytesToSend, offset, bytesCountToCopy);
return bytesRead;
}
catch (Exception ex)
{
this.logger.WriteLine($"Operation Exception : {ex.Message}");
currentRetry++;
// Check if it is within the retry count specified.
if (currentRetry > this.retryCount)
{
// Rethrow the exception if it more than the retry attempt.
throw;
}
}
// Wait to retry the operation.
await Task.Delay(delay);
}
}
}
}