From 605757ce0b0fbb0b67055e3f260afc523f656fa5 Mon Sep 17 00:00:00 2001 From: sisong Date: Fri, 15 Sep 2023 11:05:18 +0800 Subject: [PATCH 1/4] saved the actual dict size; --- compress/tuz_enc.cpp | 30 ++++++++++++++++++++--- compress/tuz_enc_private/tuz_enc_code.cpp | 1 + compress/tuz_enc_private/tuz_enc_code.h | 6 +++-- decompress/tuz_types.h | 3 +++ tinyuz_demo.cpp | 1 + 5 files changed, 35 insertions(+), 6 deletions(-) diff --git a/compress/tuz_enc.cpp b/compress/tuz_enc.cpp index b5bd47a..8b2130b 100644 --- a/compress/tuz_enc.cpp +++ b/compress/tuz_enc.cpp @@ -5,6 +5,7 @@ */ #include "tuz_enc.h" #include "tuz_enc_private/tuz_enc_clip.h" +#include //std::max using namespace _tuz_private; #define tuz_kDefaultDictSize (1<<24) @@ -20,6 +21,12 @@ hpatch_StreamPos_t tuz_maxCompressedSize(hpatch_StreamPos_t data_size){ return data_size + u_size + 1+kMaxPackedPosByteSize + 4*c_count + 4; } +inline void _flush_code(const hpatch_TStreamOutput* out_code,hpatch_StreamPos_t& cur_out_pos,std::vector& code){ + checkv(out_code->write(out_code,cur_out_pos,code.data(),code.data()+code.size())); + cur_out_pos+=code.size(); + code.clear(); +} + hpatch_StreamPos_t tuz_compress(const hpatch_TStreamOutput* out_code,const hpatch_TStreamInput* data, const tuz_TCompressProps* props){ checkv(out_code&&(out_code->write)); @@ -41,11 +48,14 @@ hpatch_StreamPos_t tuz_compress(const hpatch_TStreamOutput* out_code,const hpatc hpatch_StreamPos_t cur_out_pos=0; std::vector code; {//head + assert(code.empty()); TTuzCode coder(code,selfProps.isNeedLiteralLine); checkv(selfProps.dictSize==(tuz_size_t)selfProps.dictSize); checkv(selfProps.maxSaveLength==(tuz_length_t)selfProps.maxSaveLength); coder.outDictSize(selfProps.dictSize); + _flush_code(out_code,cur_out_pos,code); } + size_t curDictSizeMax=tuz_kMinOfDictSize; { hpatch_StreamPos_t clipSize; { @@ -62,6 +72,7 @@ hpatch_StreamPos_t tuz_compress(const hpatch_TStreamOutput* out_code,const hpatc bool isToStreamEnd=(clipEnd>=data->streamSize); if (isToStreamEnd) clipEnd=data->streamSize; + assert(code.empty()); TTuzCode coder(code,selfProps.isNeedLiteralLine); if (clipBeginwrite(out_code,cur_out_pos,code.data(),code.data()+code.size())); - cur_out_pos+=code.size(); - code.clear(); + + curDictSizeMax=std::max(curDictSizeMax,coder.getCurDictSizeMax()); + _flush_code(out_code,cur_out_pos,code); if (isToStreamEnd) break; } } + + {//update dictSize + checkv(curDictSizeMax<=selfProps.dictSize); + if (curDictSizeMax_dict_size_max) _dict_size_max=saved_dict_pos; const size_t isSamePos=(_dictPos_back==saved_dict_pos)?1:0; const size_t isSavedSamePos=(isSamePos&&_isHaveData_back)?1:0; size_t len=match_len-tuz_kMinDictMatchLen; diff --git a/compress/tuz_enc_private/tuz_enc_code.h b/compress/tuz_enc_private/tuz_enc_code.h index feaf316..f6e30d2 100644 --- a/compress/tuz_enc_private/tuz_enc_code.h +++ b/compress/tuz_enc_private/tuz_enc_code.h @@ -11,15 +11,15 @@ namespace _tuz_private{ struct TTuzCode{ explicit TTuzCode(std::vector& out_code,bool _isNeedLiteralLine) :code(out_code),isNeedLiteralLine(_isNeedLiteralLine), - type_count(0),_dictPos_back(1),_isHaveData_back(false){ _init_2bit(); } + type_count(0),_dictPos_back(1),_dict_size_max(tuz_kMinOfDictSize),_isHaveData_back(false){ _init_2bit(); } - void outDictPos(size_t pos); void outDictSize(size_t dict_size); void outData(const tuz_byte* data,const tuz_byte* data_end); void outDict(size_t match_len,size_t dict_pos); void outCtrl_typesEnd(); void outCtrl_streamEnd(); void outCtrl_clipEnd(); + const size_t getCurDictSizeMax() { return _dict_size_max; } inline size_t getSavedDataBit(size_t data_len)const{ if (isNeedLiteralLine&&(data_len>=tuz_kMinLiteralLen)){ @@ -42,6 +42,7 @@ namespace _tuz_private{ else return _getSavedDictPosBit(pos)+isHaveData; } private: + void outDictPos(size_t pos); enum { _len2bit_count=1024*8, _pos2bit_count=1024*32 }; tuz_byte _pos2bit[_pos2bit_count]; tuz_byte _len2bit[_len2bit_count]; @@ -53,6 +54,7 @@ namespace _tuz_private{ size_t types_index; size_t type_count; size_t _dictPos_back; + size_t _dict_size_max; bool _isHaveData_back; void outType(size_t bit1v); void outCtrl(tuz_TCtrlType ctrl); diff --git a/decompress/tuz_types.h b/decompress/tuz_types.h index 37b1f16..dec30bd 100644 --- a/decompress/tuz_types.h +++ b/decompress/tuz_types.h @@ -117,6 +117,9 @@ extern "C" { //# define tuz_kMaxOfDictSize ((1<<24)-1) //3 bytes //# define tuz_kMaxOfDictSize ((1<<16)-1) //2 bytes #endif +#ifndef tuz_kMinOfDictSize +# define tuz_kMinOfDictSize 1 +#endif //save dictSize at the beginning of the compressed code stream, little-endian order, tuz_kDictSizeSavedBytes bytes #define __tuz_kMaxOfDictSize_MAX (1<<30) //now limit for uint32 diff --git a/tinyuz_demo.cpp b/tinyuz_demo.cpp index 4cdc648..f2f9a0a 100644 --- a/tinyuz_demo.cpp +++ b/tinyuz_demo.cpp @@ -204,6 +204,7 @@ int tinyuz_by_file(const char* inputFile,const char* outputFile,bool isCompress, check(hpatch_TFileStreamInput_open(&inputData,inputFile),TINYUZ_OPENREAD_ERROR,"open inputFile"); check(hpatch_TFileStreamOutput_open(&outputData,outputFile,~(hpatch_StreamPos_t)0), TINYUZ_OPENWRITE_ERROR,"open outputFile"); + hpatch_TFileStreamOutput_setRandomOut(&outputData,hpatch_TRUE); inputSize=inputData.base.streamSize; printf("inputSize : %" PRIu64 " Bytes\n",inputSize); if (isCompress){ From 4c90b536e3173a2557e392383e7470aecccbdaee Mon Sep 17 00:00:00 2001 From: sisong Date: Fri, 15 Sep 2023 20:32:02 +0800 Subject: [PATCH 2/4] compress support muti-thread; --- builds/vc/libtinyuz.vcxproj | 10 +- compress/tuz_enc.cpp | 174 ++++++++++++++++++++-- compress/tuz_enc_private/tuz_enc_clip.cpp | 50 ++++--- compress/tuz_enc_private/tuz_enc_clip.h | 9 +- speed_test.cpp | 11 +- tinyuz_demo.cpp | 40 ++++- 6 files changed, 251 insertions(+), 43 deletions(-) diff --git a/builds/vc/libtinyuz.vcxproj b/builds/vc/libtinyuz.vcxproj index e23d17e..5285ca5 100644 --- a/builds/vc/libtinyuz.vcxproj +++ b/builds/vc/libtinyuz.vcxproj @@ -21,6 +21,8 @@ + + @@ -110,7 +112,7 @@ Level3 Disabled - WIN32;_IS_USED_MULTITHREAD=0;_DEBUG;%(PreprocessorDefinitions) + WIN32;_DEBUG;%(PreprocessorDefinitions) NotUsing MultiThreadedDebug @@ -126,7 +128,7 @@ Level3 Disabled - WIN32;_IS_USED_MULTITHREAD=0;_DEBUG;%(PreprocessorDefinitions) + WIN32;_DEBUG;%(PreprocessorDefinitions) NotUsing MultiThreadedDebug @@ -141,7 +143,7 @@ Level3 - WIN32;_IS_USED_MULTITHREAD=0;NDEBUG;%(PreprocessorDefinitions) + WIN32;NDEBUG;%(PreprocessorDefinitions) NotUsing true MultiThreaded @@ -157,7 +159,7 @@ Level3 - WIN32;_IS_USED_MULTITHREAD=0;NDEBUG;%(PreprocessorDefinitions) + WIN32;NDEBUG;%(PreprocessorDefinitions) NotUsing true MultiThreaded diff --git a/compress/tuz_enc.cpp b/compress/tuz_enc.cpp index 8b2130b..8dc19d9 100644 --- a/compress/tuz_enc.cpp +++ b/compress/tuz_enc.cpp @@ -6,6 +6,7 @@ #include "tuz_enc.h" #include "tuz_enc_private/tuz_enc_clip.h" #include //std::max +#include "../../HDiffPatch/libParallel/parallel_channel.h" using namespace _tuz_private; #define tuz_kDefaultDictSize (1<<24) @@ -27,6 +28,135 @@ inline void _flush_code(const hpatch_TStreamOutput* out_code,hpatch_StreamPos_t& code.clear(); } +#if (_IS_USED_MULTITHREAD) + +struct _stream_mtsafe_t { + hpatch_TStreamOutput base; + const hpatch_TStreamOutput* _stream; + CHLocker _locker; + hpatch_StreamPos_t _sumWrited; + inline explicit _stream_mtsafe_t(const hpatch_TStreamOutput* stream) + :_stream(stream),_sumWrited(0) { + base.streamImport = this; + base.streamSize = stream->streamSize; + base.read_writed = (stream->read_writed) ? _read_writed : 0; + base.write = (stream->write) ? _write : 0; + } + static hpatch_BOOL _read_writed(const struct hpatch_TStreamOutput* stream, hpatch_StreamPos_t readFromPos, + unsigned char* out_data, unsigned char* out_data_end) { + _stream_mtsafe_t& self = *(_stream_mtsafe_t*)stream->streamImport; + CAutoLocker _auto_locker(self._locker.locker); + return self._stream->read_writed(self._stream, readFromPos, out_data, out_data_end); + } + static hpatch_BOOL _write(const struct hpatch_TStreamOutput* stream, hpatch_StreamPos_t writeToPos, + const unsigned char* data, const unsigned char* data_end) { + _stream_mtsafe_t& self = *(_stream_mtsafe_t*)stream->streamImport; + CAutoLocker _auto_locker(self._locker.locker); + self._sumWrited+=(size_t)(data_end-data); + return self._stream->write(self._stream,writeToPos,data,data_end); + } +}; + + struct TWorkBuf{ + hpatch_StreamPos_t clipBegin; + hpatch_StreamPos_t clipEnd; + struct TWorkBuf* next; + std::vector code; + }; + +struct TMt:public TMtByChannel{ + tuz_TCompressProps selfProps; + + _stream_mtsafe_t out_code; + _stream_mtsafe_t data; + TMt(const hpatch_TStreamOutput* _out_code,const hpatch_TStreamInput* _data) + :out_code(_out_code),data((hpatch_TStreamOutput*)_data){} + + hpatch_StreamPos_t clipSize; + hpatch_StreamPos_t curWorkClipPos; + hpatch_StreamPos_t curOutedClipPos; + hpatch_StreamPos_t curWritePos; + size_t curDictSizeMax; + TWorkBuf* workBufList; + + inline TWorkBuf* getWork(){ + if (is_on_error()||(curWorkClipPos==data.base.streamSize)) return 0; + TWorkBuf* work=(TWorkBuf*)work_chan.accept(true); + if (work==0) return 0; + + CAutoLocker _auto_locker(_locker.locker); + work->clipBegin=curWorkClipPos; + work->clipEnd=curWorkClipPos+clipSize; + if (work->clipEnd>data.base.streamSize) work->clipEnd=data.base.streamSize; + curWorkClipPos=work->clipEnd; + if (work->clipBegin>=work->clipEnd) return 0; + return work; + } + void finishWork(TWorkBuf* work,size_t _curDictSize){ + hpatch_StreamPos_t cur_out_pos; + while (true){ + { + CAutoLocker _auto_locker(_locker.locker); + curDictSizeMax=std::max(curDictSizeMax,_curDictSize); + if (work){ + if (work->clipBegin!=curOutedClipPos){ //push + TWorkBuf** insertBuf=&workBufList; + while ((*insertBuf)&&((*insertBuf)->clipBeginclipBegin)) + insertBuf=&((*insertBuf)->next); + work->next=*insertBuf; + *insertBuf=work; + work=0; + } + }else if (workBufList&&(workBufList->clipBegin==curOutedClipPos)){ //pop + work=workBufList; + workBufList=workBufList->next; + } + if (work){ + cur_out_pos=curWritePos; + curWritePos+=work->code.size(); + curOutedClipPos=work->clipEnd; + } + } + if (work==0) break; + + _flush_code(&out_code.base,cur_out_pos,work->code); + checkv(work_chan.send(work,true)); + work=0; + } + } +}; + +static void _tuz_compress_mt(int threadIndex,void* workData){ + TMt& mt=*(TMt*)workData; + TMt::TAutoThreadEnd __auto_end(mt); + const hpatch_TStreamInput* data=(hpatch_TStreamInput*)&mt.data.base; + + TDictBuf dict_buf; + try{ + while(TWorkBuf* work=mt.getWork()) { + hpatch_StreamPos_t clipBegin=work->clipBegin; + hpatch_StreamPos_t clipEnd=work->clipEnd; + std::vector& code=work->code; + bool isToStreamEnd=(clipEnd>=data->streamSize); + + assert(code.empty()); + TTuzCode coder(code,mt.selfProps.isNeedLiteralLine); + if (clipBeginwrite)); @@ -55,18 +185,44 @@ hpatch_StreamPos_t tuz_compress(const hpatch_TStreamOutput* out_code,const hpatc coder.outDictSize(selfProps.dictSize); _flush_code(out_code,cur_out_pos,code); } + size_t curDictSizeMax=tuz_kMinOfDictSize; + hpatch_StreamPos_t clipSize; + size_t threadNum=props->threadNum; { - hpatch_StreamPos_t clipSize; - { - clipSize=((hpatch_StreamPos_t)selfProps.dictSize+1)/3; - if (clipSizekMaxBestClipSize) clipSize=kMaxBestClipSize; - hpatch_StreamPos_t clipCount=(data->streamSize+clipSize)/clipSize; - clipSize=(data->streamSize+clipCount-1)/clipCount; - } + clipSize=((hpatch_StreamPos_t)selfProps.dictSize+1)/3; + if (clipSizekMaxBestClipSize) clipSize=kMaxBestClipSize; + hpatch_StreamPos_t clipCount=(data->streamSize+clipSize)/clipSize; + clipSize=(data->streamSize+clipCount-1)/clipCount; + if (threadNum>clipCount) threadNum=(size_t)clipCount; + } - hdiff_private::TAutoMem dict_buf; +#if (_IS_USED_MULTITHREAD) + if (threadNum>1){ + TMt mt(out_code,data); + mt.selfProps=selfProps; + mt.clipSize=clipSize; + mt.curWorkClipPos=0; + mt.curOutedClipPos=0; + mt.curWritePos=cur_out_pos; + mt.curDictSizeMax=curDictSizeMax; + mt.workBufList=0; + std::vector _codeList; + _codeList.resize(threadNum+1+threadNum/2); + for (size_t i=0;i<_codeList.size();++i) + checkv(mt.work_chan.send(&_codeList[i],true)); + mt.start_threads(threadNum,_tuz_compress_mt,&mt,true); + + mt.wait_all_thread_end(); + checkv(!mt.is_on_error()); + checkv(mt.curOutedClipPos==data->streamSize); + curDictSizeMax=mt.curDictSizeMax; + cur_out_pos=mt.curWritePos; + }else +#endif + { + TDictBuf dict_buf; for (hpatch_StreamPos_t clipBegin=0;true;clipBegin+=clipSize) { hpatch_StreamPos_t clipEnd=clipBegin+clipSize; bool isToStreamEnd=(clipEnd>=data->streamSize); diff --git a/compress/tuz_enc_private/tuz_enc_clip.cpp b/compress/tuz_enc_private/tuz_enc_clip.cpp index 414da24..3dde648 100644 --- a/compress/tuz_enc_private/tuz_enc_clip.cpp +++ b/compress/tuz_enc_private/tuz_enc_clip.cpp @@ -19,32 +19,36 @@ namespace _tuz_private{ } void compress_clip(TTuzCode& coder,const hpatch_TStreamInput* data,hpatch_StreamPos_t clipBegin, - hpatch_StreamPos_t clipEnd,const tuz_TCompressProps& props, - hdiff_private::TAutoMem& dict_buf){ + hpatch_StreamPos_t clipEnd,const tuz_TCompressProps& props,TDictBuf& dict_buf){ // [ |clipBegin clipEnd| ] // | dict | // | new dict | - size_t mem_size; + std::vector& data_buf(dict_buf.dictBuf); + hpatch_StreamPos_t dictBeginPos; + const size_t dictSizeBack=data_buf.size(); { - hpatch_StreamPos_t _mem_size; - assert(clipBegin>=dict_buf.size()); - _mem_size=dict_buf.size()+(clipEnd-clipBegin); - mem_size=(size_t)_mem_size; - checkv(mem_size==_mem_size); + dictBeginPos=(clipBegin<=props.dictSize)?0:(clipBegin-props.dictSize); + hpatch_StreamPos_t _mem_size=clipEnd-dictBeginPos; + checkv(_mem_size==(size_t)_mem_size); + checkv(_mem_size>=data_buf.size()); + data_buf.resize((size_t)_mem_size); } - TAutoMem data_buf(mem_size); - { - memcpy(data_buf.data(),dict_buf.data(),dict_buf.size()); - checkv(data->read(data,clipBegin,data_buf.data()+dict_buf.size(),data_buf.data_end())); - //update dict - size_t newDictSize=(props.dictSize<=clipBegin)?props.dictSize:(size_t)clipBegin; - dict_buf.realloc(newDictSize); - memcpy(dict_buf.data(),data_buf.data_end()-newDictSize,newDictSize); + {//read data + hpatch_StreamPos_t readPos=dictBeginPos; + if (dict_buf.dictEndPos>readPos){ + checkv(dict_buf.dictEndPos<=clipBegin); + size_t movLen=(size_t)(dict_buf.dictEndPos-readPos); + checkv(dictSizeBack>=movLen); + if (dictSizeBack-movLen>0) + memmove(data_buf.data(),data_buf.data()+dictSizeBack-movLen,movLen); + readPos=dict_buf.dictEndPos; + } + checkv(data->read(data,readPos,data_buf.data()+(size_t)(readPos-dictBeginPos),data_buf.data()+data_buf.size())); } - TMatch matcher(data_buf.data(),data_buf.data_end(),coder,props); + TMatch matcher(data_buf.data(),data_buf.data()+data_buf.size(),coder,props); {//match loop - const tuz_byte* end=data_buf.data_end(); + const tuz_byte* end=data_buf.data()+data_buf.size(); const tuz_byte* cur=end-(clipEnd-clipBegin); const tuz_byte* back=cur; while (cur!=end){ @@ -72,6 +76,16 @@ void compress_clip(TTuzCode& coder,const hpatch_TStreamInput* data,hpatch_Stream if (unmatched_len>0) _outData(back,unmatched_len,coder,props); } + + { //update dict + size_t newDictSize=(props.dictSize<=clipEnd)?props.dictSize:(size_t)clipEnd; + checkv(data_buf.size()>=newDictSize); + dict_buf.dictEndPos=clipEnd; + if (data_buf.size()>newDictSize){ + memmove(data_buf.data(),data_buf.data()+(data_buf.size()-newDictSize),newDictSize); + data_buf.resize(newDictSize); + } + } } } diff --git a/compress/tuz_enc_private/tuz_enc_clip.h b/compress/tuz_enc_private/tuz_enc_clip.h index 916594f..644f069 100644 --- a/compress/tuz_enc_private/tuz_enc_clip.h +++ b/compress/tuz_enc_private/tuz_enc_clip.h @@ -8,9 +8,14 @@ #include "tuz_enc_code.h" namespace _tuz_private{ + struct TDictBuf{ + hpatch_StreamPos_t dictEndPos; + std::vector dictBuf; + inline TDictBuf():dictEndPos(0){} + }; + void compress_clip(TTuzCode& out_code,const hpatch_TStreamInput* data,hpatch_StreamPos_t clipBegin, - hpatch_StreamPos_t clipEnd,const tuz_TCompressProps& props, - hdiff_private::TAutoMem& dict_buf); + hpatch_StreamPos_t clipEnd,const tuz_TCompressProps& props,TDictBuf& dict_buf); } #endif //_tuz_enc_clip_h diff --git a/speed_test.cpp b/speed_test.cpp index 64672d3..7e3d0e2 100644 --- a/speed_test.cpp +++ b/speed_test.cpp @@ -248,6 +248,7 @@ int _test_tuz_compress(unsigned char* out_data,unsigned char* out_data_end, mem_as_hStreamInput(&in_stream,src,src_end); tuz_TCompressProps props=tuz_kDefaultCompressProps; props.dictSize=_tuz_kDictSize; + props.threadNum=8; //props.maxSaveLength=255; hpatch_StreamPos_t codeSize=tuz_compress(&out_stream,&in_stream,&props); return (int)codeSize; @@ -317,12 +318,12 @@ bool _test_tuz_decompress_mem(unsigned char* out_data,unsigned char* out_data_en } static void testFile(const char* srcFileName){ - zlib_level=6; - outResult(testProc(srcFileName, zlib_compress, "", zlib_decompress, " zlib -6")); + //zlib_level=6; + //outResult(testProc(srcFileName, zlib_compress, "", zlib_decompress, " zlib -6")); + zlib_level=9; - /* if (!isDictSizeTest) { - outResult(testProc(srcFileName,zlib_compress ,"",zlib_decompress ," zlib -9")); + outResult(testProc(srcFileName,zlib_compress ,"",zlib_decompress ," zlib -9")); outResult(testProc(srcFileName,_test_tuz_compress,"",_test_tuz_decompress_stream,"tinyuz_stream")); outResult(testProc(srcFileName,_test_tuz_compress,"",_test_tuz_decompress_mem ," tinyuz_mem")); }else{ @@ -333,7 +334,7 @@ static void testFile(const char* srcFileName){ _tuz_kDictSize=tDictSizes[i]; outResult(testProc(srcFileName,_test_tuz_compress,"",_test_tuz_decompress_stream,tag.c_str())); } - }*/ + } } int main(int argc, const char * argv[]){ diff --git a/tinyuz_demo.cpp b/tinyuz_demo.cpp index f2f9a0a..c01f97d 100644 --- a/tinyuz_demo.cpp +++ b/tinyuz_demo.cpp @@ -10,6 +10,7 @@ #include #include #include +#include "../HDiffPatch/libParallel/parallel_import.h" #include "../HDiffPatch/_clock_for_demo.h" //in HDiffPatch #include "../HDiffPatch/_atosize.h" #include "../HDiffPatch/file_for_patch.h" @@ -35,7 +36,8 @@ typedef enum TTinyuzResult { } TTinyuzResult; int tinyuz_cmd_line(int argc, const char * argv[]); -int tinyuz_by_file(const char* inputFile,const char* out_inputFile,bool isCompress,size_t runWithSize,bool isNeedLiteralLine); +int tinyuz_by_file(const char* inputFile,const char* out_inputFile,bool isCompress, + bool isNeedLiteralLine,size_t runWithSize,size_t threadNum); static void printVersion(){ printf("tinyuz v" TINYUZ_VERSION_STRING "\n"); @@ -46,7 +48,13 @@ static void printHelpInfo(){ "deccompress: tinyuz -d[-cacheSize[k|m]] inputFile outputFile\n" " Note: -c is default compressor;\n" " But if your compile deccompressor source code, set tuz_isNeedLiteralLine=0,\n" - " then must used -ci compressor."); + " then must used -ci compressor.\n" +#if (_IS_USED_MULTITHREAD) + " -p-parallelThreadNumber\n" + " if parallelThreadNumber>1 then open multi-thread Parallel compress mode;\n" + " DEFAULT -p-4; multi-thread requires more memory!\n" +#endif + ); } #if (_IS_NEED_MAIN) @@ -73,9 +81,14 @@ int main(int argc,char* argv[]){ #define _kNULL_VALUE ((tuz_BOOL)(-1)) #define _kNULL_SIZE (~(size_t)0) +#define _THREAD_NUMBER_NULL 0 +#define _THREAD_NUMBER_MIN 1 +#define _THREAD_NUMBER_DEFUALT 4 +#define _THREAD_NUMBER_MAX (1<<8) + int tinyuz_cmd_line(int argc, const char * argv[]){ printVersion(); - if (argc!=4){ + if (argc<4) { printHelpInfo(); return TINYUZ_OPTIONS_ERROR; } @@ -83,6 +96,7 @@ int tinyuz_cmd_line(int argc, const char * argv[]){ tuz_BOOL isCi=tuz_FALSE; tuz_BOOL isCompress=_kNULL_VALUE; size_t runWithSize=_kNULL_SIZE; + size_t threadNum=_THREAD_NUMBER_NULL; std::vector arg_values; for (int i=1; i=_THREAD_NUMBER_MIN,"-p-?"); + } break; +#endif default: { _options_check(tuz_FALSE,"-?"); } break; @@ -121,6 +143,11 @@ int tinyuz_cmd_line(int argc, const char * argv[]){ _options_check(arg_values.size()==2,"must input two files"); _options_check(isCompress!=_kNULL_VALUE,"must run with -c or -d"); + if (threadNum==_THREAD_NUMBER_NULL) + threadNum=_THREAD_NUMBER_DEFUALT; + else if (threadNum>_THREAD_NUMBER_MAX) + threadNum=_THREAD_NUMBER_MAX; + if (runWithSize!=_kNULL_SIZE){ const size_t minSize=isCompress?1:2; const size_t maxSize=tuz_kMaxOfDictSize; @@ -132,7 +159,8 @@ int tinyuz_cmd_line(int argc, const char * argv[]){ } bool isNeedLiteralLine=(!isCi); - return tinyuz_by_file(arg_values[0],arg_values[1],isCompress?true:false,runWithSize,isNeedLiteralLine); + return tinyuz_by_file(arg_values[0],arg_values[1],isCompress?true:false, + isNeedLiteralLine,runWithSize,threadNum); } struct TTuzListener{ @@ -189,7 +217,8 @@ TTinyuzResult _tuz_decompress_stream(const hpatch_TStreamOutput* out_code, std::string erri=std::string()+errorInfo+" ERROR!\n"; \ if (!(value)){ hpatch_printStdErrPath_utf8(erri.c_str()); _check_on_error(errorType); } } -int tinyuz_by_file(const char* inputFile,const char* outputFile,bool isCompress,size_t runWithSize,bool isNeedLiteralLine){ +int tinyuz_by_file(const char* inputFile,const char* outputFile,bool isCompress, + bool isNeedLiteralLine,size_t runWithSize,size_t threadNum){ int _isInClear=tuz_FALSE; TTinyuzResult result=TINYUZ_SUCCESS; tuz_byte* temp_cache=0; @@ -214,6 +243,7 @@ int tinyuz_by_file(const char* inputFile,const char* outputFile,bool isCompress, tuz_TCompressProps props=tuz_kDefaultCompressProps; props.dictSize=runWithSize; props.isNeedLiteralLine=isNeedLiteralLine; + props.threadNum=threadNum; outputSize=tuz_compress(&outputData.base,&inputData.base,&props); assert(outputSize==outputData.out_length); }catch(const std::exception& e){ From eb7ba8dcd609ca43beca7f9579eb0bfd4323cb01 Mon Sep 17 00:00:00 2001 From: sisong Date: Sat, 16 Sep 2023 09:01:31 +0800 Subject: [PATCH 3/4] uopdate other builders: compress support muti-thread; --- Makefile | 26 ++++++++++++++++--- builds/codeblocks/tinyuz.cbp | 6 ++++- .../xcode/libtinyuz.xcodeproj/project.pbxproj | 12 +++++++-- .../xcode/speedTest.xcodeproj/project.pbxproj | 6 ++--- builds/xcode/tinyuz.xcodeproj/project.pbxproj | 7 +++-- .../xcode/unitTest.xcodeproj/project.pbxproj | 6 ++--- compress/tuz_enc.cpp | 2 +- 7 files changed, 46 insertions(+), 19 deletions(-) diff --git a/Makefile b/Makefile index 4d039c2..f194494 100644 --- a/Makefile +++ b/Makefile @@ -1,13 +1,14 @@ # args +MT := 1 STATIC_CPP := 0 # used clang? -CL := 0 +CL := 0 # build with -m32? M32 := 0 # build for out min size MINS := 0 ifeq ($(OS),Windows_NT) # mingw? - CC := gcc + CC := gcc endif @@ -17,6 +18,13 @@ HDP_OBJ := \ $(HDP_PATH)/libHDiffPatch/HPatch/patch.o \ $(HDP_PATH)/file_for_patch.o +ifeq ($(MT),0) +else + HDP_OBJ += \ + $(HDP_PATH)/libParallel/parallel_import.o \ + $(HDP_PATH)/libParallel/parallel_channel.o +endif + TINY_OBJ := \ decompress/tuz_dec.o \ compress/tuz_enc.o \ @@ -27,7 +35,15 @@ TINY_OBJ := \ $(HDP_OBJ) DEF_FLAGS := \ - -O3 -DNDEBUG -D_LARGEFILE_SOURCE -D_FILE_OFFSET_BITS=64 -D_IS_USED_MULTITHREAD=0 + -O3 -DNDEBUG -D_LARGEFILE_SOURCE -D_FILE_OFFSET_BITS=64 + +ifeq ($(MT),0) + DEF_FLAGS += -D_IS_USED_MULTITHREAD=0 +else + DEF_FLAGS += \ + -D_IS_USED_MULTITHREAD=1 \ + -D_IS_USED_CPP11THREAD=1 +endif ifeq ($(M32),0) else @@ -45,6 +61,10 @@ else endif TINY_LINK := +ifeq ($(MT),0) +else + TINY_LINK += -lpthread # link pthread +endif ifeq ($(M32),0) else TINY_LINK += -m32 diff --git a/builds/codeblocks/tinyuz.cbp b/builds/codeblocks/tinyuz.cbp index 8023ba0..9f53211 100644 --- a/builds/codeblocks/tinyuz.cbp +++ b/builds/codeblocks/tinyuz.cbp @@ -39,8 +39,10 @@ - + + + @@ -48,6 +50,8 @@ + + diff --git a/builds/xcode/libtinyuz.xcodeproj/project.pbxproj b/builds/xcode/libtinyuz.xcodeproj/project.pbxproj index e17aa6a..0f30da6 100644 --- a/builds/xcode/libtinyuz.xcodeproj/project.pbxproj +++ b/builds/xcode/libtinyuz.xcodeproj/project.pbxproj @@ -9,6 +9,8 @@ /* Begin PBXBuildFile section */ 0D7FEA82282A676D0029772D /* patch.c in Sources */ = {isa = PBXBuildFile; fileRef = 0D7FEA81282A676D0029772D /* patch.c */; }; 0D7FEA84282A67830029772D /* divsufsort.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 0D7FEA83282A67830029772D /* divsufsort.cpp */; }; + 0D7FEA86282A67850029772D /* parallel_channel.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 0D7FEA85282A67850029772D /* parallel_channel.cpp */; }; + 0D7FEA88282A67870029772D /* parallel_import.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 0D7FEA87282A67870029772D /* parallel_import.cpp */; }; D6F3B7C82427B30E0066B989 /* tuz_enc.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D6F3B7BB2427B30E0066B989 /* tuz_enc.cpp */; }; D6F3B7C92427B30E0066B989 /* tuz_enc_types.h in Headers */ = {isa = PBXBuildFile; fileRef = D6F3B7BC2427B30E0066B989 /* tuz_enc_types.h */; }; D6F3B7CA2427B30E0066B989 /* tuz_enc.h in Headers */ = {isa = PBXBuildFile; fileRef = D6F3B7BD2427B30E0066B989 /* tuz_enc.h */; }; @@ -30,6 +32,8 @@ /* Begin PBXFileReference section */ 0D7FEA81282A676D0029772D /* patch.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; name = patch.c; path = ../../../HDiffPatch/libHDiffPatch/HPatch/patch.c; sourceTree = ""; }; 0D7FEA83282A67830029772D /* divsufsort.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = divsufsort.cpp; path = ../../../HDiffPatch/libHDiffPatch/HDiff/private_diff/libdivsufsort/divsufsort.cpp; sourceTree = ""; }; + 0D7FEA85282A67850029772D /* parallel_channel.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = parallel_channel.cpp; path = ../../../HDiffPatch/libParallel/parallel_channel.cpp; sourceTree = ""; }; + 0D7FEA87282A67870029772D /* parallel_import.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = parallel_import.cpp; path = ../../../HDiffPatch/libParallel/parallel_import.cpp; sourceTree = ""; }; 0D7FEA9C282A6BC60029772D /* tinyuz.xcworkspace */ = {isa = PBXFileReference; lastKnownFileType = wrapper.workspace; path = tinyuz.xcworkspace; sourceTree = ""; }; D6F3B7B32427B2D20066B989 /* liblibtinyuz.a */ = {isa = PBXFileReference; explicitFileType = archive.ar; includeInIndex = 0; path = liblibtinyuz.a; sourceTree = BUILT_PRODUCTS_DIR; }; D6F3B7BB2427B30E0066B989 /* tuz_enc.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = tuz_enc.cpp; sourceTree = ""; }; @@ -124,6 +128,8 @@ isa = PBXGroup; children = ( 0D7FEA83282A67830029772D /* divsufsort.cpp */, + 0D7FEA85282A67850029772D /* parallel_channel.cpp */, + 0D7FEA87282A67870029772D /* parallel_import.cpp */, 0D7FEA81282A676D0029772D /* patch.c */, ); name = HDiffPatch; @@ -213,6 +219,8 @@ D6F3B7D12427B30E0066B989 /* tuz_enc_match.cpp in Sources */, D6F3B7CD2427B30E0066B989 /* tuz_enc_code.cpp in Sources */, 0D7FEA84282A67830029772D /* divsufsort.cpp in Sources */, + 0D7FEA86282A67850029772D /* parallel_channel.cpp in Sources */, + 0D7FEA88282A67870029772D /* parallel_import.cpp in Sources */, D6F3B7C82427B30E0066B989 /* tuz_enc.cpp in Sources */, ); runOnlyForDeploymentPostprocessing = 0; @@ -226,6 +234,7 @@ ALWAYS_SEARCH_USER_PATHS = NO; CLANG_ANALYZER_NONNULL = YES; CLANG_ANALYZER_NUMBER_OBJECT_CONVERSION = YES_AGGRESSIVE; + CLANG_CXX_LANGUAGE_STANDARD = "c++0x"; CLANG_CXX_LIBRARY = "libc++"; CLANG_ENABLE_MODULES = YES; CLANG_ENABLE_OBJC_ARC = YES; @@ -264,7 +273,6 @@ "DEBUG=1", "_LARGEFILE_SOURCE", "_FILE_OFFSET_BITS=64", - "_IS_USED_MULTITHREAD=0", "$(inherited)", ); GCC_WARN_64_TO_32_BIT_CONVERSION = YES; @@ -288,6 +296,7 @@ ALWAYS_SEARCH_USER_PATHS = NO; CLANG_ANALYZER_NONNULL = YES; CLANG_ANALYZER_NUMBER_OBJECT_CONVERSION = YES_AGGRESSIVE; + CLANG_CXX_LANGUAGE_STANDARD = "c++0x"; CLANG_CXX_LIBRARY = "libc++"; CLANG_ENABLE_MODULES = YES; CLANG_ENABLE_OBJC_ARC = YES; @@ -324,7 +333,6 @@ "NDEBUG", "_LARGEFILE_SOURCE", "_FILE_OFFSET_BITS=64", - "_IS_USED_MULTITHREAD=0", "$(inherited)", ); GCC_WARN_64_TO_32_BIT_CONVERSION = YES; diff --git a/builds/xcode/speedTest.xcodeproj/project.pbxproj b/builds/xcode/speedTest.xcodeproj/project.pbxproj index 468d796..697e5cc 100644 --- a/builds/xcode/speedTest.xcodeproj/project.pbxproj +++ b/builds/xcode/speedTest.xcodeproj/project.pbxproj @@ -142,7 +142,7 @@ ALWAYS_SEARCH_USER_PATHS = NO; CLANG_ANALYZER_NONNULL = YES; CLANG_ANALYZER_NUMBER_OBJECT_CONVERSION = YES_AGGRESSIVE; - CLANG_CXX_LANGUAGE_STANDARD = "gnu++14"; + CLANG_CXX_LANGUAGE_STANDARD = "c++0x"; CLANG_CXX_LIBRARY = "libc++"; CLANG_ENABLE_MODULES = YES; CLANG_ENABLE_OBJC_ARC = YES; @@ -174,7 +174,6 @@ DEBUG_INFORMATION_FORMAT = dwarf; ENABLE_STRICT_OBJC_MSGSEND = YES; ENABLE_TESTABILITY = YES; - GCC_C_LANGUAGE_STANDARD = gnu11; GCC_DYNAMIC_NO_PIC = NO; GCC_NO_COMMON_BLOCKS = YES; GCC_OPTIMIZATION_LEVEL = 0; @@ -203,7 +202,7 @@ ALWAYS_SEARCH_USER_PATHS = NO; CLANG_ANALYZER_NONNULL = YES; CLANG_ANALYZER_NUMBER_OBJECT_CONVERSION = YES_AGGRESSIVE; - CLANG_CXX_LANGUAGE_STANDARD = "gnu++14"; + CLANG_CXX_LANGUAGE_STANDARD = "c++0x"; CLANG_CXX_LIBRARY = "libc++"; CLANG_ENABLE_MODULES = YES; CLANG_ENABLE_OBJC_ARC = YES; @@ -235,7 +234,6 @@ DEBUG_INFORMATION_FORMAT = "dwarf-with-dsym"; ENABLE_NS_ASSERTIONS = NO; ENABLE_STRICT_OBJC_MSGSEND = YES; - GCC_C_LANGUAGE_STANDARD = gnu11; GCC_NO_COMMON_BLOCKS = YES; GCC_WARN_64_TO_32_BIT_CONVERSION = YES; GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR; diff --git a/builds/xcode/tinyuz.xcodeproj/project.pbxproj b/builds/xcode/tinyuz.xcodeproj/project.pbxproj index c2648de..6d411f2 100644 --- a/builds/xcode/tinyuz.xcodeproj/project.pbxproj +++ b/builds/xcode/tinyuz.xcodeproj/project.pbxproj @@ -140,7 +140,7 @@ ALWAYS_SEARCH_USER_PATHS = NO; CLANG_ANALYZER_NONNULL = YES; CLANG_ANALYZER_NUMBER_OBJECT_CONVERSION = YES_AGGRESSIVE; - CLANG_CXX_LANGUAGE_STANDARD = "gnu++17"; + CLANG_CXX_LANGUAGE_STANDARD = "c++0x"; CLANG_CXX_LIBRARY = "libc++"; CLANG_ENABLE_MODULES = YES; CLANG_ENABLE_OBJC_ARC = YES; @@ -171,7 +171,6 @@ DEBUG_INFORMATION_FORMAT = dwarf; ENABLE_STRICT_OBJC_MSGSEND = YES; ENABLE_TESTABILITY = YES; - GCC_C_LANGUAGE_STANDARD = gnu11; GCC_DYNAMIC_NO_PIC = NO; GCC_NO_COMMON_BLOCKS = YES; GCC_OPTIMIZATION_LEVEL = 0; @@ -199,7 +198,7 @@ ALWAYS_SEARCH_USER_PATHS = NO; CLANG_ANALYZER_NONNULL = YES; CLANG_ANALYZER_NUMBER_OBJECT_CONVERSION = YES_AGGRESSIVE; - CLANG_CXX_LANGUAGE_STANDARD = "gnu++17"; + CLANG_CXX_LANGUAGE_STANDARD = "c++0x"; CLANG_CXX_LIBRARY = "libc++"; CLANG_ENABLE_MODULES = YES; CLANG_ENABLE_OBJC_ARC = YES; @@ -232,7 +231,6 @@ DEPLOYMENT_POSTPROCESSING = YES; ENABLE_NS_ASSERTIONS = NO; ENABLE_STRICT_OBJC_MSGSEND = YES; - GCC_C_LANGUAGE_STANDARD = gnu11; GCC_NO_COMMON_BLOCKS = YES; GCC_PREPROCESSOR_DEFINITIONS = NDEBUG; GCC_WARN_64_TO_32_BIT_CONVERSION = YES; @@ -241,6 +239,7 @@ GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE; GCC_WARN_UNUSED_FUNCTION = YES; GCC_WARN_UNUSED_VARIABLE = YES; + LLVM_LTO = YES; MACOSX_DEPLOYMENT_TARGET = 10.9; MTL_ENABLE_DEBUG_INFO = NO; MTL_FAST_MATH = YES; diff --git a/builds/xcode/unitTest.xcodeproj/project.pbxproj b/builds/xcode/unitTest.xcodeproj/project.pbxproj index 9eb6d1b..6c16081 100644 --- a/builds/xcode/unitTest.xcodeproj/project.pbxproj +++ b/builds/xcode/unitTest.xcodeproj/project.pbxproj @@ -138,7 +138,7 @@ ALWAYS_SEARCH_USER_PATHS = NO; CLANG_ANALYZER_NONNULL = YES; CLANG_ANALYZER_NUMBER_OBJECT_CONVERSION = YES_AGGRESSIVE; - CLANG_CXX_LANGUAGE_STANDARD = "gnu++14"; + CLANG_CXX_LANGUAGE_STANDARD = "c++0x"; CLANG_CXX_LIBRARY = "libc++"; CLANG_ENABLE_MODULES = YES; CLANG_ENABLE_OBJC_ARC = YES; @@ -170,7 +170,6 @@ DEBUG_INFORMATION_FORMAT = dwarf; ENABLE_STRICT_OBJC_MSGSEND = YES; ENABLE_TESTABILITY = YES; - GCC_C_LANGUAGE_STANDARD = gnu11; GCC_DYNAMIC_NO_PIC = NO; GCC_NO_COMMON_BLOCKS = YES; GCC_OPTIMIZATION_LEVEL = 0; @@ -199,7 +198,7 @@ ALWAYS_SEARCH_USER_PATHS = NO; CLANG_ANALYZER_NONNULL = YES; CLANG_ANALYZER_NUMBER_OBJECT_CONVERSION = YES_AGGRESSIVE; - CLANG_CXX_LANGUAGE_STANDARD = "gnu++14"; + CLANG_CXX_LANGUAGE_STANDARD = "c++0x"; CLANG_CXX_LIBRARY = "libc++"; CLANG_ENABLE_MODULES = YES; CLANG_ENABLE_OBJC_ARC = YES; @@ -231,7 +230,6 @@ DEBUG_INFORMATION_FORMAT = "dwarf-with-dsym"; ENABLE_NS_ASSERTIONS = NO; ENABLE_STRICT_OBJC_MSGSEND = YES; - GCC_C_LANGUAGE_STANDARD = gnu11; GCC_NO_COMMON_BLOCKS = YES; GCC_WARN_64_TO_32_BIT_CONVERSION = YES; GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR; diff --git a/compress/tuz_enc.cpp b/compress/tuz_enc.cpp index 8dc19d9..d7d5131 100644 --- a/compress/tuz_enc.cpp +++ b/compress/tuz_enc.cpp @@ -212,7 +212,7 @@ hpatch_StreamPos_t tuz_compress(const hpatch_TStreamOutput* out_code,const hpatc _codeList.resize(threadNum+1+threadNum/2); for (size_t i=0;i<_codeList.size();++i) checkv(mt.work_chan.send(&_codeList[i],true)); - mt.start_threads(threadNum,_tuz_compress_mt,&mt,true); + mt.start_threads((int)threadNum,_tuz_compress_mt,&mt,true); mt.wait_all_thread_end(); checkv(!mt.is_on_error()); From 959061a1983287b888b5c2d8504b7ba5994fcd05 Mon Sep 17 00:00:00 2001 From: sisong Date: Sat, 16 Sep 2023 09:07:40 +0800 Subject: [PATCH 4/4] update version; fix CI make on macos; --- Makefile | 2 +- README.md | 2 +- README_cn.md | 2 +- decompress/tuz_types.h | 6 +++--- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/Makefile b/Makefile index f194494..7a6d461 100644 --- a/Makefile +++ b/Makefile @@ -84,7 +84,7 @@ else endif CFLAGS += $(DEF_FLAGS) -CXXFLAGS += $(DEF_FLAGS) +CXXFLAGS += $(DEF_FLAGS) -std=c++11 .PHONY: all install clean diff --git a/README.md b/README.md index ecac814..9b00438 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,5 @@ # [tinyuz](https://github.com/sisong/tinyuz) -[![release](https://img.shields.io/badge/release-v0.9.4-blue.svg)](https://github.com/sisong/tinyuz/releases) +[![release](https://img.shields.io/badge/release-v1.0.0-blue.svg)](https://github.com/sisong/tinyuz/releases) [![license](https://img.shields.io/badge/license-MIT-blue.svg)](https://github.com/sisong/tinyuz/blob/master/LICENSE) [![PRs Welcome](https://img.shields.io/badge/PRs-welcome-blue.svg)](https://github.com/sisong/tinyuz/pulls) [![+issue Welcome](https://img.shields.io/github/issues-raw/sisong/tinyuz?color=green&label=%2Bissue%20welcome)](https://github.com/sisong/tinyuz/issues) diff --git a/README_cn.md b/README_cn.md index d3a843b..49e4008 100644 --- a/README_cn.md +++ b/README_cn.md @@ -1,5 +1,5 @@ # [tinyuz](https://github.com/sisong/tinyuz) -[![release](https://img.shields.io/badge/release-v0.9.4-blue.svg)](https://github.com/sisong/tinyuz/releases) +[![release](https://img.shields.io/badge/release-v1.0.0-blue.svg)](https://github.com/sisong/tinyuz/releases) [![license](https://img.shields.io/badge/license-MIT-blue.svg)](https://github.com/sisong/tinyuz/blob/master/LICENSE) [![PRs Welcome](https://img.shields.io/badge/PRs-welcome-blue.svg)](https://github.com/sisong/tinyuz/pulls) [![+issue Welcome](https://img.shields.io/github/issues-raw/sisong/tinyuz?color=green&label=%2Bissue%20welcome)](https://github.com/sisong/tinyuz/issues) diff --git a/decompress/tuz_types.h b/decompress/tuz_types.h index dec30bd..1c069c9 100644 --- a/decompress/tuz_types.h +++ b/decompress/tuz_types.h @@ -37,9 +37,9 @@ extern "C" { #endif -#define TINYUZ_VERSION_MAJOR 0 -#define TINYUZ_VERSION_MINOR 9 -#define TINYUZ_VERSION_RELEASE 4 +#define TINYUZ_VERSION_MAJOR 1 +#define TINYUZ_VERSION_MINOR 0 +#define TINYUZ_VERSION_RELEASE 0 #define _TINYUZ_VERSION TINYUZ_VERSION_MAJOR.TINYUZ_VERSION_MINOR.TINYUZ_VERSION_RELEASE #define _TINYUZ_QUOTE(str) #str