Skip to content

Commit

Permalink
image download integrality check
Browse files Browse the repository at this point in the history
优化判断图片下载是否完整的逻辑
  • Loading branch information
lqs1848 committed Apr 13, 2022
1 parent 0491f9c commit 6a7422e
Showing 1 changed file with 96 additions and 47 deletions.
143 changes: 96 additions & 47 deletions wnacg/Download.cs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
using ICSharpCode.SharpZipLib.Zip;
using System;
using System.Collections.Generic;
using System.Drawing;
using System.IO;
using System.Linq;
using System.Net;
Expand Down Expand Up @@ -105,7 +106,7 @@ private void DownloadFun(object obj)
_syncContext.Post(DlTaskStart, c.Id + "|" + c.Title);

//封面
if (!HttpDownloadFile(qz + c.Cover, comicPath, Utils.parseNumName(0, 4)))
if (!HttpDownloadImage(qz + c.Cover, comicPath, Utils.parseNumName(0, 4)))
{
_syncContext.Post(DlTaskSchedule, c.Id + "|封面下载失败");
ExeLog.WriteLog("["+c.Title + "]封面下载失败\r\n" + "(" + (qz + c.Cover) + ")\r\n");
Expand All @@ -129,7 +130,7 @@ private void DownloadFun(object obj)
}
string photoUrl = qz + new Regex(@"<img id=""picarea"" class=""photo"" alt="".*?"" src=""(.*?)"" />").Match(photoPage).Groups[1].Value.Trim();

if (!HttpDownloadFile(photoUrl, comicPath, Utils.parseNumName(k, 4)))
if (!HttpDownloadImage(photoUrl, comicPath, Utils.parseNumName(k, 4)))
{
_syncContext.Post(DlTaskSchedule, c.Id + "|第"+x+"页下载失败");
ExeLog.WriteLog("[" + c.Title + "]第" + x + "页下载失败\r\n" + "(" + photoUrl + ")\r\n");
Expand Down Expand Up @@ -167,24 +168,24 @@ private void DownloadFun(object obj)



public bool HttpDownloadFile(string url, string path, string fileName)
public bool HttpDownloadImage(string url, string path, string fileName)
{
return HttpDownloadFile(url, path, fileName, 0);
return HttpDownloadImage(url, path, fileName, 0);
}

public bool HttpDownloadFile(string url, string path,string fileName,int deep)
public bool HttpDownloadImage(string url, string path,string fileName,int deep)
{
string filePath = path + fileName + Utils.getPhotoExt(url);
if (deep == 0 && !Directory.Exists(path))
Directory.CreateDirectory(path);

if (File.Exists(filePath))
if (new FileInfo(filePath).Length > 1024)
if (new FileInfo(filePath).Length > 1024 && IsCompletedImage(filePath))
return true;

try
{
using (System.Net.WebClient wc = new System.Net.WebClient())
using (WebClient wc = new WebClient())
{
if (!string.IsNullOrWhiteSpace(this._ProxyStr)) {
string[] proxys = this._ProxyStr.Split(':');
Expand All @@ -193,8 +194,8 @@ public bool HttpDownloadFile(string url, string path,string fileName,int deep)
wc.Headers.Add("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.136 YaBrowser/20.2.4.141 Yowser/2.5 Safari/537.36");
wc.DownloadFile(url, filePath);//保存到本地的文件名和路径,请自行更改
}
FileInfo fileInfo = new System.IO.FileInfo(filePath);
if (!fileInfo.Exists || fileInfo.Length <= 100)
FileInfo fileInfo = new FileInfo(filePath);
if (!fileInfo.Exists || fileInfo.Length <= 100 || !IsCompletedImage(filePath))
{
return false;
}
Expand Down Expand Up @@ -222,53 +223,101 @@ public bool HttpDownloadFile(string url, string path,string fileName,int deep)
else
{
Thread.Sleep(1000);
return HttpDownloadFile(url, path, fileName, ++deep);
return HttpDownloadImage(url, path, fileName, ++deep);
}
}
/*
try
{
// 设置参数
HttpWebRequest request = Http.GetWebRequest(url);
//发送请求并获取相应回应数据
HttpWebResponse response = request.GetResponse() as HttpWebResponse;
request.Timeout = 30 * 1000;
double dataLengthToRead = response.ContentLength;
//直到request.GetResponse()程序才开始向目标网页发送Post请求
Stream responseStream = response.GetResponseStream();
//创建本地文件写入流
Stream stream = new FileStream(path+ fileName + ".temp.wnacg", FileMode.Create);
byte[] bArr = new byte[1024 * 512];
int size = responseStream.Read(bArr, 0, (int)bArr.Length);
while (size > 0)
{
stream.Write(bArr, 0, size);
size = responseStream.Read(bArr, 0, (int)bArr.Length);
}
stream.Close();
responseStream.Close();

File.Move(path + fileName + ".temp.wnacg", filePath);
}//method

//检测图片完整性
private static bool IsCompletedImage(string strFileName)
{
try
{
FileStream fs = new FileStream(strFileName, FileMode.Open);
BinaryReader reader = new BinaryReader(fs);
try
{
byte[] szBuffer = reader.ReadBytes((int)fs.Length);
//jpg png图是根据最前面和最后面特殊字节确定. bmp根据文件长度确定
//png检查
if (szBuffer[0] == 137 && szBuffer[1] == 80 && szBuffer[2] == 78 && szBuffer[3] == 71 && szBuffer[4] == 13
&& szBuffer[5] == 10 && szBuffer[6] == 26 && szBuffer[7] == 10)
{
//&& szBuffer[szBuffer.Length - 8] == 73 && szBuffer[szBuffer.Length - 7] == 69 && szBuffer[szBuffer.Length - 6] == 78
if (szBuffer[szBuffer.Length - 5] == 68 && szBuffer[szBuffer.Length - 4] == 174 && szBuffer[szBuffer.Length - 3] == 66
&& szBuffer[szBuffer.Length - 2] == 96 && szBuffer[szBuffer.Length - 1] == 130)
return true;
}
catch (Exception ex)
//有些情况最后多了些没用的字节
for (int i = szBuffer.Length - 1; i > szBuffer.Length / 2; --i)
{
//Console.Out.Write(ex.StackTrace);
if (szBuffer[i - 5] == 68 && szBuffer[i - 4] == 174 && szBuffer[i - 3] == 66
&& szBuffer[i - 2] == 96 && szBuffer[i - 1] == 130)
return true;
}

//重试3次
if (deep > 3)

}
else if (szBuffer[0] == 66 && szBuffer[1] == 77)//bmp
{
//bmp长度
//整数转成字符串拼接
string str = Convert.ToString(szBuffer[5], 16) + Convert.ToString(szBuffer[4], 16)
+ Convert.ToString(szBuffer[3], 16) + Convert.ToString(szBuffer[2], 16);
int iLength = Convert.ToInt32("0x" + str, 16); //16进制数转成整数
if (iLength <= szBuffer.Length) //有些图比实际要长
return true;
}
else if (szBuffer[0] == 71 && szBuffer[1] == 73 && szBuffer[2] == 70 && szBuffer[3] == 56)//gif
{
//标准gif 检查00 3B
if (szBuffer[szBuffer.Length - 2] == 0 && szBuffer[szBuffer.Length - 1] == 59)
return true;
//检查含00 3B
for (int i = szBuffer.Length - 1; i > szBuffer.Length / 2; --i)
{
if (szBuffer[i] != 0)
{
return false;
// _syncContext.Post(DlTaskSchedule, key + "|无法下载");
if (szBuffer[i] == 59 && szBuffer[i - 1] == 0)
return true;
}
else
}
}
else if (szBuffer[0] == 255 && szBuffer[1] == 216) //jpg
{
//标准jpeg最后出现ff d9
if (szBuffer[szBuffer.Length - 2] == 255 && szBuffer[szBuffer.Length - 1] == 217)
return true;
else
{
//有好多jpg最后被人为补了些字符也能打得开, 算作完整jpg, ffd9出现在近末端
//jpeg开始几个是特殊字节, 所以最后大于10就行了 从最后字符遍历
//有些文件会出现两个ffd9 后半部分ffd9才行
for (int i = szBuffer.Length - 2; i > szBuffer.Length / 2; --i)
{
Thread.Sleep(1000);
return HttpDownloadFile(url, path, fileName, deep);
//检查有没有ffd9连在一起的
if (szBuffer[i] == 255 && szBuffer[i + 1] == 217)
return true;
}
}*/

}//method
}
}
}
catch
{
}
finally
{
if (fs != null)
fs.Close();
if (reader != null)
reader.Close();
}
}
catch
{
return false;
}
return false;
}
}//class
}//namespace

0 comments on commit 6a7422e

Please sign in to comment.