Skip to content

Commit

Permalink
Refs #10247. Finish indexing algorithm.
Browse files Browse the repository at this point in the history
  • Loading branch information
Michael Wedel committed Dec 5, 2014
1 parent 5a0b0f2 commit 78d559c
Show file tree
Hide file tree
Showing 4 changed files with 41 additions and 84 deletions.
Expand Up @@ -24,25 +24,19 @@ struct MANTID_SINQ_DLL IndexCandidatePair
observed(),
candidate(),
positionMatch(0.0),
intensityMatch(0.0),
candidateCollectionIndex(0) { }

IndexCandidatePair(const PoldiPeak_sptr &measuredPeak, const PoldiPeak_sptr &candidatePeak, size_t index);

/// Comparison operator, scores are compared.
/// Comparison operator, position matches are compared.
bool operator <(const IndexCandidatePair &other) const
{
if(fabs(positionMatch - other.positionMatch) > 0.01) {
return positionMatch < other.positionMatch;
}

return intensityMatch < other.intensityMatch;
return positionMatch < other.positionMatch;
}

PoldiPeak_sptr observed;
PoldiPeak_sptr candidate;
double positionMatch;
double intensityMatch;
size_t candidateCollectionIndex;
};

Expand Down Expand Up @@ -122,9 +116,6 @@ class MANTID_SINQ_DLL PoldiIndexKnownCompounds : public API::Algorithm
void assignFwhmEstimates(const std::vector<PoldiPeakCollection_sptr> &peakCollections, const std::vector<double> &tolerances) const;
void assignFwhmEstimates(const PoldiPeakCollection_sptr &peakCollection, double tolerance) const;

// Integration
PoldiPeakCollection_sptr getIntegratedPeaks(const PoldiPeakCollection_sptr &rawPeaks);

// Indexing algorithm
void indexPeaks(const PoldiPeakCollection_sptr &measured, const std::vector<PoldiPeakCollection_sptr> &knownCompoundPeaks);

Expand All @@ -137,6 +128,8 @@ class MANTID_SINQ_DLL PoldiIndexKnownCompounds : public API::Algorithm
bool inPeakSet(const std::set<PoldiPeak_sptr> &peakSet, const PoldiPeak_sptr &peak) const;
void assignPeakIndex(const IndexCandidatePair &candidate);

// Finalization
PoldiPeakCollection_sptr getIntensitySortedPeakCollection(const PoldiPeakCollection_sptr &peaks) const;

PoldiPeakCollection_sptr m_measuredPeaks;
std::vector<PoldiPeakCollection_sptr> m_expectedPhases;
Expand Down
70 changes: 21 additions & 49 deletions Code/Mantid/Framework/SINQ/src/PoldiIndexKnownCompounds.cpp
Expand Up @@ -50,19 +50,9 @@ IndexCandidatePair::IndexCandidatePair(const PoldiPeak_sptr &measuredPeak, const

boost::math::normal positionDistribution(0.0, sigmaD);

positionMatch = (1.0 - boost::math::cdf(positionDistribution, differenceD)) * 2.0;
double candidateIntensity = candidatePeak->intensity();

double peakI = observed->intensity();
double sigmaI = 0.25 * fabs(peakI);
double differenceI = fabs(peakI - candidate->intensity());

boost::math::normal intensityDistribution(0.0, sigmaI);

intensityMatch = (1.0 - boost::math::cdf(intensityDistribution, differenceI)) * 2.0;

std::cout << MillerIndicesIO::toString(candidate->hkl()) << std::endl;
std::cout << candidate->intensity() << " " << peakI << " " << positionMatch << std::endl;
std::cout << candidate->d() << " " << peakD << " " << intensityMatch << std::endl << std::endl;
positionMatch = (1.0 - boost::math::cdf(positionDistribution, differenceD)) * 2.0 * candidateIntensity;
}

/// Default constructor
Expand Down Expand Up @@ -354,8 +344,6 @@ void PoldiIndexKnownCompounds::scaleToExperimentalValues(const std::vector<Poldi
maximumCalculatedIntensity = std::max(getMaximumIntensity(*it), maximumCalculatedIntensity);
}

std::cout << "MAX: " << maximumExperimentalIntensity << " " << maximumCalculatedIntensity << std::endl;

scaleIntensityEstimates(peakCollections, std::vector<double>(peakCollections.size(), maximumExperimentalIntensity / maximumCalculatedIntensity));
}

Expand Down Expand Up @@ -417,33 +405,6 @@ void PoldiIndexKnownCompounds::assignFwhmEstimates(const PoldiPeakCollection_spt
}
}

PoldiPeakCollection_sptr PoldiIndexKnownCompounds::getIntegratedPeaks(const PoldiPeakCollection_sptr &rawPeaks)
{
if(rawPeaks->intensityType() == PoldiPeakCollection::Integral) {
return rawPeaks;
}

PeakFunctionIntegrator integrator;

PoldiPeakCollection_sptr peaks = boost::make_shared<PoldiPeakCollection>(PoldiPeakCollection::Integral);
for(size_t i = 0; i < rawPeaks->peakCount(); ++i) {
PoldiPeak_sptr peak = rawPeaks->peak(i)->clone();

IPeakFunction_sptr peakFunction = boost::dynamic_pointer_cast<IPeakFunction>(FunctionFactory::Instance().createFunction(rawPeaks->getProfileFunctionName()));
peakFunction->setCentre(0.0);
peakFunction->setHeight(peak->intensity());
peakFunction->setFwhm(peak->fwhm(PoldiPeak::AbsoluteD));

IntegrationResult result = integrator.integrateInfinity(peakFunction);

peak->setIntensity(UncertainValue(result.result));

peaks->addPeak(peak);
}

return peaks;
}

/** Tries to index the supplied measured peaks
*
* The method tries to index the supplied measured peaks by finding peaks with similar d-spacings
Expand Down Expand Up @@ -589,7 +550,7 @@ void PoldiIndexKnownCompounds::assignCandidates(const std::vector<IndexCandidate

g_log.information() << " Candidate d=" << static_cast<double>(measuredPeak->d()) << " -> "
<< "Phase: " << currentCandidate.candidateCollectionIndex << " [" << MillerIndicesIO::toString(expectedPeak->hkl()) << "] (d=" << static_cast<double>(expectedPeak->d()) << "), "
<< "Score=(" << currentCandidate.positionMatch << ", " << currentCandidate.intensityMatch << "): ";
<< "Score=(" << currentCandidate.positionMatch << "): ";

/* If the peak has not been indexed yet, it is not stored in the set
* that holds measured peaks that are already indexed, so the candidate
Expand Down Expand Up @@ -646,6 +607,20 @@ void PoldiIndexKnownCompounds::assignPeakIndex(const IndexCandidatePair &candida
m_indexedPeaks[candidate.candidateCollectionIndex]->addPeak(candidate.observed);
}

PoldiPeakCollection_sptr PoldiIndexKnownCompounds::getIntensitySortedPeakCollection(const PoldiPeakCollection_sptr &peaks) const
{
std::vector<PoldiPeak_sptr> peakVector(peaks->peaks());

std::sort(peakVector.begin(), peakVector.end(), boost::bind<bool>(&PoldiPeak::greaterThan, _1, _2, &PoldiPeak::intensity));

PoldiPeakCollection_sptr sortedPeaks = boost::make_shared<PoldiPeakCollection>(peaks->intensityType());
for(size_t i = 0; i < peakVector.size(); ++i) {
sortedPeaks->addPeak(peakVector[i]->clone());
}

return sortedPeaks;
}

/** Initialize the algorithm's properties.
*/
void PoldiIndexKnownCompounds::init()
Expand Down Expand Up @@ -676,11 +651,8 @@ void PoldiIndexKnownCompounds::exec()

DataObjects::TableWorkspace_sptr peakTableWorkspace = getProperty("InputWorkspace");

PoldiPeakCollection_sptr unindexedRawPeaks = boost::make_shared<PoldiPeakCollection>(peakTableWorkspace);
g_log.information() << " Number of peaks: " << unindexedRawPeaks->peakCount() << std::endl;

g_log.information() << " Integrating peaks..." << std::endl;
PoldiPeakCollection_sptr unindexedPeaks = getIntegratedPeaks(unindexedRawPeaks);
PoldiPeakCollection_sptr unindexedPeaks = boost::make_shared<PoldiPeakCollection>(peakTableWorkspace);
g_log.information() << " Number of peaks: " << unindexedPeaks->peakCount() << std::endl;

std::vector<Workspace_sptr> workspaces = getWorkspaces(getProperty("CompoundWorkspaces"));
std::vector<PoldiPeakCollection_sptr> peakCollections = getPeakCollections(workspaces);
Expand Down Expand Up @@ -721,13 +693,13 @@ void PoldiIndexKnownCompounds::exec()
WorkspaceGroup_sptr outputWorkspaces = boost::make_shared<WorkspaceGroup>();

for(size_t i = 0; i < m_indexedPeaks.size(); ++i) {
ITableWorkspace_sptr tableWs = m_indexedPeaks[i]->asTableWorkspace();
PoldiPeakCollection_sptr intensitySorted = getIntensitySortedPeakCollection(m_indexedPeaks[i]);
ITableWorkspace_sptr tableWs = intensitySorted->asTableWorkspace();
AnalysisDataService::Instance().addOrReplace("Indexed_" + m_phaseNames[i], tableWs);

outputWorkspaces->addWorkspace(tableWs);
}


ITableWorkspace_sptr unindexedTableWs = m_unindexedPeaks->asTableWorkspace();

std::string inputWorkspaceName = getPropertyValue("InputWorkspace");
Expand Down
29 changes: 12 additions & 17 deletions Code/Mantid/Framework/SINQ/test/PoldiIndexKnownCompoundsTest.h
Expand Up @@ -334,8 +334,10 @@ class PoldiIndexKnownCompoundsTest : public CxxTest::TestSuite
PoldiPeakCollection_sptr indexedSilicon = PoldiPeakCollectionHelpers::createTheoreticalPeakCollectionSilicon();
TS_ASSERT_THROWS_NOTHING(alg.scaleIntensityEstimates(indexedSilicon, 2.0));

PoldiPeakCollection_sptr reference = PoldiPeakCollectionHelpers::createTheoreticalPeakCollectionSilicon();

for(size_t i = 0; i < indexedSilicon->peakCount(); ++i) {
TS_ASSERT_EQUALS(indexedSilicon->peak(i)->intensity(), 2.0);
TS_ASSERT_EQUALS(indexedSilicon->peak(i)->intensity(), 2.0 * reference->peak(i)->intensity());
}

// test override with vectors
Expand Down Expand Up @@ -498,7 +500,7 @@ class PoldiIndexKnownCompoundsTest : public CxxTest::TestSuite
IndexCandidatePair defaultConstructed;
TS_ASSERT(!defaultConstructed.observed);
TS_ASSERT(!defaultConstructed.observed);
TS_ASSERT_EQUALS(defaultConstructed.score, 0.0);
TS_ASSERT_EQUALS(defaultConstructed.positionMatch, 0.0);
TS_ASSERT_EQUALS(defaultConstructed.candidateCollectionIndex, 0);
}

Expand All @@ -511,9 +513,7 @@ class PoldiIndexKnownCompoundsTest : public CxxTest::TestSuite
PoldiPeak_sptr peak = PoldiPeak::create(Conversions::dToQ(2.0));

/* Two candidates for indexing with different
* distances from the peak position.
* They both have a contribution of 1.0 and equal , so score is only
* determined by |d(peak)-d(candidate)|.
* distances from the peak position, but equal tolerances.
*/
PoldiPeak_sptr candidate1 = PoldiPeak::create(Conversions::dToQ(2.04), 1.0);
candidate1->setFwhm(UncertainValue(alg.sigmaToFwhm(0.1)), PoldiPeak::Relative);
Expand All @@ -533,16 +533,10 @@ class PoldiIndexKnownCompoundsTest : public CxxTest::TestSuite
TS_ASSERT_EQUALS(peakCandidate1.candidate, candidate1);
TS_ASSERT_EQUALS(peakCandidate1.candidateCollectionIndex, 1);

/* Score is a gaussian with A = I(candidate), sigma from FWHM(candidate)
* and x0 = d(candidate). x is d(peak).
*/
TS_ASSERT_DELTA(peakCandidate1.score, 0.98096021516673242965, 1e-12);

IndexCandidatePair peakCandidate2(peak, candidate2, 1);
TS_ASSERT_EQUALS(peakCandidate2.observed, peak);
TS_ASSERT_EQUALS(peakCandidate2.candidate, candidate2);
TS_ASSERT_EQUALS(peakCandidate2.candidateCollectionIndex, 1);
TS_ASSERT_DELTA(peakCandidate2.score, 0.91685535573202892876, 1e-12);

// Test comparison operator
TS_ASSERT(peakCandidate2 < peakCandidate1);
Expand Down Expand Up @@ -574,15 +568,16 @@ class PoldiIndexKnownCompoundsTest : public CxxTest::TestSuite
TS_ASSERT_THROWS_NOTHING(alg.indexPeaks(measured, expected));
TS_ASSERT_EQUALS(alg.m_unindexedPeaks->peakCount(), 2);

PoldiPeakCollection_sptr indexedSi = alg.m_indexedPeaks[0];
PoldiPeakCollection_sptr indexedSi = alg.getIntensitySortedPeakCollection(alg.m_indexedPeaks[0]);
TS_ASSERT_EQUALS(indexedSi->peakCount(), 4);

TS_ASSERT_EQUALS(indexedSi->peak(0)->hkl(), MillerIndices(3, 3, 1));
// Peaks are ordered by intensity
TS_ASSERT_EQUALS(indexedSi->peak(3)->hkl(), MillerIndices(3, 3, 1));
// Make sure this is the correct peak (not the one introduced above)
TS_ASSERT_EQUALS(indexedSi->peak(1)->hkl(), MillerIndices(3, 1, 1));
TS_ASSERT_EQUALS(indexedSi->peak(2)->hkl(), MillerIndices(2, 2, 0));
TS_ASSERT_EQUALS(indexedSi->peak(3)->hkl(), MillerIndices(4, 2, 2));
TS_ASSERT_EQUALS(indexedSi->peak(3)->d(), 1.108644);
TS_ASSERT_EQUALS(indexedSi->peak(2)->hkl(), MillerIndices(3, 1, 1));
TS_ASSERT_EQUALS(indexedSi->peak(1)->hkl(), MillerIndices(2, 2, 0));
TS_ASSERT_EQUALS(indexedSi->peak(0)->hkl(), MillerIndices(4, 2, 2));
TS_ASSERT_EQUALS(indexedSi->peak(0)->d(), 1.108644);
}

private:
Expand Down
Expand Up @@ -14,16 +14,13 @@ In many experiments at POLDI it is well known which compounds are present in a s

The major difficulty arises from the fact that typical POLDI experiments involve lattice deformations, which causes a shift in peak positions. For a single phase material with few peaks, this can be handled by making the tolerance larger - but only to a certain degree. If cell parameters are too large and consequently lattice plane spacings at low d-values too narrow, the indexing becomes ambiguous. Of course, introducing additional phases makes this situation even worse, leading to even more ambiguity.

To resolve these ambiguities, this algorithm assumes that a peak must occur somewhere in the vicinity of the theoretical position. As the distance between measured and calculated position grows, it becomes less and less likely that the measured peak corresponds to the theoretical one. That behavior can be modeled by assuming a Gaussian distribution around the position :math:`d_0` of the theoretical peak:
To resolve these ambiguities, this algorithm assumes that a peak must occur somewhere in the vicinity of the theoretical position. As the distance between measured and calculated position grows, it becomes less and less likely that the measured peak corresponds to the theoretical one. By assuming a normal distribution centered at 0 and a standard deviation corresponding to an acceptable tolerance, the probability of finding the peak within this distance can be calculated.

.. math::
f(d) = I\cdot\exp\left[-\frac{1}{2}\left(\frac{d - d_0}{\sigma}\right)^2\right]
The standard deviation :math:`\sigma` of the distribution is simply :math:`\delta\cdot d_0`. To determine whether a peak can be indexed by a reflection, the algorithm checks if observed and calculated position are closer than :math:`3\sigma`. If :math:`n` reflections are closer to an observed peak at position :math:`d`, all are considered a candidate for correct assignment. In order to decide which reflection is most likely to be correct, the probability is calculated for each combination of measured and calculated position and the candidate with highest value is used to indexed the measured peak.

The standard deviation :math:`\sigma` of the distribution is simply :math:`\delta\cdot d_0`. To determine whether a peak can be indexed by a reflection, the algorithm checks if observed and calculated position are closer than :math:`3\sigma`. If :math:`n` reflections are closer to an observed peak at position :math:`d`, all are considered a candidate for correct assignment. In order to decide which reflection is most likely to be correct, the value of :math:`f_j(d)` is calculated for each combination of measured and calculated position and the candidate with highest value is used to indexed the measured peak.
In the previous example, all calculated reflections get the same weight, that's why the closest peak is always selected. This works for a single phase material, but it becomes error prone when more phases are present. A common example is the presence of a minority phase which makes up only a few percent of the sample, so that only very strong reflections of that compound can be detected. In that case a reflection of the minority compound may be very close the measured peak position, but in fact it may not even be detectable because of the low scattering contribution.

In the previous example, the parameter :math:`I` is identical for all reflections, that's why the closest peak is always selected. This works for a single phase material, but it becomes error prone when more phases are present. A common example is the presence of a minority phase which makes up only a few percent of the sample, so that only very strong reflections of that compound can be detected. In that case a reflection of the minority compound may be very close the measured peak position, but in fact it may not even be detectable because of the low scattering contribution.

For that reason it's possible to provide estimates for scattering contributions of each phase, which are used to calculate :math:`I`. Picking up the numbers from above, :math:`I_{major}` would be 0.95 and :math:`I_{minor}` 0.05. This means that reflections of the majority phase are assigned with higher priority, because it is much more likely to detect them.
For that reason it's possible to provide estimates for scattering contributions of each phase, which are used to calculate :math:`I`. Picking up the numbers from above, :math:`I_{major}` would be 0.95 and :math:`I_{minor}` 0.05. This means that reflections of the majority phase are assigned with higher priority, because it is much more likely to detect them. Since :math:`I` is proportional to :math:`F^2`, the structure factors are also taken into account (including multiplicity related to the Laue class).

The output of the algorithm is a WorkspaceGroup, with one TableWorkspace for each input component containing the indexed peaks for that compound. Additionally there is a TableWorkspace with peaks that could not be indexed.

Expand Down

0 comments on commit 78d559c

Please sign in to comment.