Skip to content

Commit

Permalink
fixed copy of the external table at import table for index from the d…
Browse files Browse the repository at this point in the history
…ata directory; fixed #2154
  • Loading branch information
tomatolog committed May 10, 2024
1 parent ed79ea0 commit b5a3b7f
Show file tree
Hide file tree
Showing 6 changed files with 114 additions and 121 deletions.
93 changes: 93 additions & 0 deletions src/indexfiles.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
#include "fileio.h"
#include "fileutils.h"
#include "sphinxint.h"
#include "tokenizer/tokenizer.h"

static IndexFileExt_t g_dIndexFilesExts[SPH_EXT_TOTAL] =
{
Expand Down Expand Up @@ -315,3 +316,95 @@ bool IndexFiles_c::ReadKlistTargets ( StrVec_t & dTargets, const char * szType )

return true;
}


bool FixupCopyExternalFiles ( const CSphString & sDstPath, const CSphString & sSrcPath, int iPostfix, ISphTokenizer * pTokenizer, CSphDict * pDict, StrVec_t & dCopied, CSphString & sError )
{
struct Rename_t
{
CSphString m_sSrc;
CSphString m_sDst;
};

CSphVector<Rename_t> dExtFiles;
if ( pTokenizer && !pTokenizer->GetSettings().m_sSynonymsFile.IsEmpty() )
{
const char * szRenameTo = "exceptions.txt";
auto & tItem = dExtFiles.Add();
tItem.m_sSrc = pTokenizer->GetSettings().m_sSynonymsFile;
tItem.m_sDst = szRenameTo;

CSphTokenizerSettings tSettings = pTokenizer->GetSettings();
tSettings.m_sSynonymsFile = szRenameTo;
pTokenizer->Setup ( tSettings );
}

if ( pDict )
{
const CSphString & sStopwords = pDict->GetSettings().m_sStopwords;
if ( !sStopwords.IsEmpty() )
{
StringBuilder_c sNewStopwords(" ");
StrVec_t dStops = sphSplit ( sStopwords.cstr(), sStopwords.Length(), " \t," );
ARRAY_FOREACH ( i, dStops )
{
auto & tItem = dExtFiles.Add();
tItem.m_sSrc = dStops[i];
if ( iPostfix!=-1 )
tItem.m_sDst.SetSprintf ( "stopwords_chunk%d_%d.txt", iPostfix, i );
else
tItem.m_sDst.SetSprintf ( "stopwords_%d.txt", i );

sNewStopwords << tItem.m_sDst;
}

CSphDictSettings tSettings = pDict->GetSettings();
tSettings.m_sStopwords = sNewStopwords.cstr();
pDict->Setup ( tSettings );
}

if ( pDict->GetSettings().m_dWordforms.GetLength() )
{
StrVec_t dNewWordforms;
ARRAY_FOREACH ( i, pDict->GetSettings().m_dWordforms )
{
auto & tItem = dExtFiles.Add();
tItem.m_sSrc = pDict->GetSettings().m_dWordforms[i];
if ( iPostfix!=-1 )
tItem.m_sDst.SetSprintf ( "wordforms_chunk%d_%d.txt", iPostfix, i );
else
tItem.m_sDst.SetSprintf ( "wordforms_%d.txt", i );

dNewWordforms.Add ( tItem.m_sDst );
}

CSphDictSettings tSettings = pDict->GetSettings();
tSettings.m_dWordforms = dNewWordforms;
pDict->Setup ( tSettings );
}
}

StringBuilder_c sDst;
StringBuilder_c sSrc;
for ( const auto & tItem : dExtFiles )
{
sDst.Clear();
if ( !IsPathAbsolute ( tItem.m_sDst ) )
sDst.Appendf ( "%s%s", sDstPath.cstr(), tItem.m_sDst.cstr() );
else
sDst << tItem.m_sDst.cstr();

sSrc.Clear();
if ( !IsPathAbsolute ( tItem.m_sSrc ) )
sSrc.Appendf ( "%s%s", sSrcPath.cstr(), tItem.m_sSrc.cstr() );
else
sSrc << tItem.m_sSrc;

if ( !CopyFile ( sSrc.cstr(), sDst.cstr(), sError ) )
return false;

dCopied.Add ( CSphString ( sDst ) );
}

return true;
}
2 changes: 2 additions & 0 deletions src/indexfiles.h
Original file line number Diff line number Diff line change
Expand Up @@ -104,3 +104,5 @@ class IndexFiles_c : public IndexFileBase_c
// if prev op fails with fatal error - log the message and terminate
CSphString FatalMsg(const char * szMsg=nullptr);
};

bool FixupCopyExternalFiles ( const CSphString & sDstPath, const CSphString & sSrcPath, int iPostfix, ISphTokenizer * pTokenizer, CSphDict * pDict, StrVec_t & dCopied, CSphString & sError );
10 changes: 4 additions & 6 deletions src/searchdconfig.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -857,7 +857,7 @@ static std::unique_ptr<CSphIndex> TryToPreallocPq ( const CSphString & sIndex, c
}


static bool CopyExternalFiles ( const CSphString & sIndex, const CSphString & sNewIndexPath, StrVec_t & dCopied, bool & bPQ, StrVec_t & dWarnings, CSphString & sError )
static bool CopyExternalFiles ( const CSphString & sIndex, const CSphString & sNewIndexPath, const CSphString & sFromPath, StrVec_t & dCopied, bool & bPQ, StrVec_t & dWarnings, CSphString & sError )
{
bPQ = false;

Expand All @@ -875,11 +875,8 @@ static bool CopyExternalFiles ( const CSphString & sIndex, const CSphString & sN
bPQ = true;
}

if ( !pIndex->CopyExternalFiles ( 0, dCopied ) )
{
sError = pIndex->GetLastError();
if ( !pIndex->CopyExternalFiles ( -1, sFromPath, dCopied, sError ) )
return false;
}

return true;
}
Expand Down Expand Up @@ -925,7 +922,8 @@ bool CopyIndexFiles ( const CSphString & sIndex, const CSphString & sPathToIndex
dWipe.Add(sDest);
}

if ( !CopyExternalFiles ( sIndex, sNewIndexPath, dWipe, bPQ, dWarnings, sError ) )
CSphString sFromPath = GetPathOnly ( sPathToIndex );
if ( !CopyExternalFiles ( sIndex, sNewIndexPath, sFromPath, dWipe, bPQ, dWarnings, sError ) )
return false;

dWipe.Reset();
Expand Down
59 changes: 6 additions & 53 deletions src/sphinx.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1232,7 +1232,7 @@ class CSphIndex_VLN : public CSphIndex, public IndexAlterHelper_c, public DebugC
int GetFieldId ( const CSphString & sName, DocstoreDataType_e eType ) const final;
Bson_t ExplainQuery ( const CSphString & sQuery ) const final;

bool CopyExternalFiles ( int iPostfix, StrVec_t & dCopied ) final;
bool CopyExternalFiles ( int iPostfix, const CSphString & sFromPath, StrVec_t & dCopied, CSphString & sError ) final;

HistogramContainer_c * Debug_GetHistograms() const override { return m_pHistograms; }
SI::Index_i * Debug_GetSI() const override { return m_pSIdx.get(); }
Expand Down Expand Up @@ -11668,58 +11668,11 @@ Bson_t CSphIndex_VLN::ExplainQuery ( const CSphString & sQuery ) const
}


bool CSphIndex_VLN::CopyExternalFiles ( int iPostfix, StrVec_t & dCopied )
bool CSphIndex_VLN::CopyExternalFiles ( int iPostfix, const CSphString & sFromPath, StrVec_t & dCopied, CSphString & sError )
{
CSphVector<std::pair<CSphString,CSphString>> dExtFiles;
if ( m_pTokenizer && !m_pTokenizer->GetSettings().m_sSynonymsFile.IsEmpty() )
{
CSphString sRenameTo;
sRenameTo.SetSprintf ( "exceptions_chunk%d.txt", iPostfix );
dExtFiles.Add ( { m_pTokenizer->GetSettings().m_sSynonymsFile, sRenameTo } );
const_cast<CSphTokenizerSettings &>(m_pTokenizer->GetSettings()).m_sSynonymsFile = sRenameTo;
}

if ( m_pDict )
{
const CSphString & sStopwords = m_pDict->GetSettings().m_sStopwords;
if ( !sStopwords.IsEmpty() )
{
StringBuilder_c sNewStopwords(" ");
StrVec_t dStops = sphSplit ( sStopwords.cstr(), sStopwords.Length(), " \t," );
ARRAY_FOREACH ( i, dStops )
{
CSphString sTmp;
sTmp.SetSprintf ( "stopwords_chunk%d_%d.txt", iPostfix, i );
dExtFiles.Add ( { dStops[i], sTmp } );

sNewStopwords << sTmp;
}

const_cast<CSphDictSettings &>(m_pDict->GetSettings()).m_sStopwords = sNewStopwords.cstr();
}

StrVec_t dNewWordforms;
ARRAY_FOREACH ( i, m_pDict->GetSettings().m_dWordforms )
{
CSphString sTmp;
sTmp.SetSprintf ( "wordforms_chunk%d_%d.txt", iPostfix, i );
dExtFiles.Add( { m_pDict->GetSettings().m_dWordforms[i], sTmp } );
dNewWordforms.Add(sTmp);
}

const_cast<CSphDictSettings &>(m_pDict->GetSettings()).m_dWordforms = dNewWordforms;
}

CSphString sPathOnly = GetPathOnly ( GetFilebase() );
for ( const auto & i : dExtFiles )
{
CSphString sDest;
sDest.SetSprintf ( "%s%s", sPathOnly.cstr(), i.second.cstr() );
if ( !CopyFile ( i.first, sDest, m_sLastError ) )
return false;

dCopied.Add(sDest);
}
CSphString sDstPath = GetPathOnly ( GetFilebase() );
if ( !FixupCopyExternalFiles ( sDstPath, sFromPath, iPostfix, m_pTokenizer.Ptr(), m_pDict.Ptr(), dCopied, sError ) )
return false;

BuildHeader_t tBuildHeader(m_tStats);
*(DictHeader_t*)&tBuildHeader = *(DictHeader_t*)&m_tWordlist;
Expand All @@ -11736,7 +11689,7 @@ bool CSphIndex_VLN::CopyExternalFiles ( int iPostfix, StrVec_t & dCopied )
tWriteHeader.m_pFieldLens = m_dFieldLens.Begin();

// save the header
return IndexBuildDone ( tBuildHeader, tWriteHeader, GetFilename(SPH_EXT_SPH), m_sLastError );
return IndexBuildDone ( tBuildHeader, tWriteHeader, GetFilename(SPH_EXT_SPH), sError );
}

bool CSphIndex_VLN::AlterSI ( CSphString & sError )
Expand Down
2 changes: 1 addition & 1 deletion src/sphinx.h
Original file line number Diff line number Diff line change
Expand Up @@ -1283,7 +1283,7 @@ class CSphIndex : public ISphKeywordsStat, public IndexSegment_c, public Docstor

// put external files (if any) into index folder
// copy the rest of the external files to index folder
virtual bool CopyExternalFiles ( int iPostfix, StrVec_t & dCopied ) { return true; }
virtual bool CopyExternalFiles ( int iPostfix, const CSphString & sFromPath, StrVec_t & dCopied, CSphString & sError ) { return true; }

// used for query optimizer calibration
virtual HistogramContainer_c * Debug_GetHistograms() const { return nullptr; }
Expand Down
69 changes: 8 additions & 61 deletions src/sphinxrt.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1332,7 +1332,7 @@ class RtIndex_c final : public RtIndex_i, public ISphNoncopyable, public ISphWor
bool Reconfigure ( CSphReconfigureSetup & tSetup ) final;
int64_t GetLastFlushTimestamp() const final;
void IndexDeleted() final { m_bIndexDeleted = true; }
bool CopyExternalFiles ( int iPostfix, StrVec_t & dCopied ) final;
bool CopyExternalFiles ( int iPostfix, const CSphString & sFromPath, StrVec_t & dCopied, CSphString & sError ) final;
void ProhibitSave() final;
void EnableSave() final;
void LockFileState ( CSphVector<CSphString> & dFiles ) final;
Expand Down Expand Up @@ -10300,74 +10300,21 @@ void RtIndex_c::GetIndexFiles ( StrVec_t& dFiles, StrVec_t& dExt, const Filename
dExt.Uniq(); // might be duplicates of tok \ dict files from disk chunks
}

bool RtIndex_c::CopyExternalFiles ( int /*iPostfix*/, StrVec_t & dCopied )
bool RtIndex_c::CopyExternalFiles ( int /*iPostfix*/, const CSphString & sFromPath, StrVec_t & dCopied, CSphString & sError )
{
struct Rename_t
{
CSphString m_sSrc;
CSphString m_sDst;
};

CSphVector<Rename_t> dExtFiles;
if ( m_pTokenizer && !m_pTokenizer->GetSettings().m_sSynonymsFile.IsEmpty() )
{
const char * szRenameTo = "exceptions.txt";
dExtFiles.Add ( { m_pTokenizer->GetSettings().m_sSynonymsFile, szRenameTo } );
const_cast<CSphTokenizerSettings &>(m_pTokenizer->GetSettings()).m_sSynonymsFile = szRenameTo;
}

if ( m_pDict )
{
const CSphString & sStopwords = m_pDict->GetSettings().m_sStopwords;
if ( !sStopwords.IsEmpty() )
{
StringBuilder_c sNewStopwords(" ");
StrVec_t dStops = sphSplit ( sStopwords.cstr(), sStopwords.Length(), " \t," );
ARRAY_FOREACH ( i, dStops )
{
CSphString sTmp;
sTmp.SetSprintf ( "stopwords_%d.txt", i );
dExtFiles.Add ( { dStops[i], sTmp } );

sNewStopwords << sTmp;
}

const_cast<CSphDictSettings &>(m_pDict->GetSettings()).m_sStopwords = sNewStopwords.cstr();
}

StrVec_t dNewWordforms;
ARRAY_FOREACH ( i, m_pDict->GetSettings().m_dWordforms )
{
CSphString sTmp;
sTmp.SetSprintf ( "wordforms_%d.txt", i );
dExtFiles.Add( { m_pDict->GetSettings().m_dWordforms[i], sTmp } );
dNewWordforms.Add(sTmp);
}

const_cast<CSphDictSettings &>(m_pDict->GetSettings()).m_dWordforms = dNewWordforms;
}

CSphString sPathOnly = GetPathOnly ( GetFilebase() );
for ( const auto & i : dExtFiles )
{
CSphString sDest;
sDest.SetSprintf ( "%s%s", sPathOnly.cstr(), i.m_sDst.cstr() );
if ( !CopyFile ( i.m_sSrc, sDest, m_sLastError ) )
return false;

dCopied.Add(sDest);
}
CSphString sDstPath = GetPathOnly ( GetFilebase() );
if ( !FixupCopyExternalFiles ( sDstPath, sFromPath, -1, m_pTokenizer.Ptr(), m_pDict.Ptr(), dCopied, sError ) )
return false;

SaveMeta();

auto pDiskChunks = m_tRtChunks.DiskChunks();
auto& dDiskChunks = *pDiskChunks;
ARRAY_FOREACH ( i, dDiskChunks )
if ( !dDiskChunks[i]->CastIdx().CopyExternalFiles ( i, dCopied ) )
{
m_sLastError = dDiskChunks[i]->Cidx().GetLastError();
{
if ( !dDiskChunks[i]->CastIdx().CopyExternalFiles ( i, sFromPath, dCopied, sError ) )
return false;
}
}

return true;
}
Expand Down

0 comments on commit b5a3b7f

Please sign in to comment.