Refs #10247. Finish indexing algorithm.

mantidproject · Dec 5, 2014 · 78d559c · 78d559c
1 parent 5a0b0f2
commit 78d559c
Show file tree

Hide file tree

Showing 4 changed files with 41 additions and 84 deletions.
diff --git a/Code/Mantid/Framework/SINQ/inc/MantidSINQ/PoldiIndexKnownCompounds.h b/Code/Mantid/Framework/SINQ/inc/MantidSINQ/PoldiIndexKnownCompounds.h
@@ -24,25 +24,19 @@ struct MANTID_SINQ_DLL IndexCandidatePair
         observed(),
         candidate(),
         positionMatch(0.0),
-        intensityMatch(0.0),
         candidateCollectionIndex(0) { }
 
     IndexCandidatePair(const PoldiPeak_sptr &measuredPeak, const PoldiPeak_sptr &candidatePeak, size_t index);
 
-    /// Comparison operator, scores are compared.
+    /// Comparison operator, position matches are compared.
     bool operator <(const IndexCandidatePair &other) const
     {
-        if(fabs(positionMatch - other.positionMatch) > 0.01) {
-            return positionMatch < other.positionMatch;
-        }
-
-        return intensityMatch < other.intensityMatch;
+        return positionMatch < other.positionMatch;
     }
 
     PoldiPeak_sptr observed;
     PoldiPeak_sptr candidate;
     double positionMatch;
-    double intensityMatch;
     size_t candidateCollectionIndex;
 };
 
@@ -122,9 +116,6 @@ class MANTID_SINQ_DLL PoldiIndexKnownCompounds  : public API::Algorithm
     void assignFwhmEstimates(const std::vector<PoldiPeakCollection_sptr> &peakCollections, const std::vector<double> &tolerances) const;
     void assignFwhmEstimates(const PoldiPeakCollection_sptr &peakCollection, double tolerance) const;
 
-    // Integration
-    PoldiPeakCollection_sptr getIntegratedPeaks(const PoldiPeakCollection_sptr &rawPeaks);
-
     // Indexing algorithm
     void indexPeaks(const PoldiPeakCollection_sptr &measured, const std::vector<PoldiPeakCollection_sptr> &knownCompoundPeaks);
 
@@ -137,6 +128,8 @@ class MANTID_SINQ_DLL PoldiIndexKnownCompounds  : public API::Algorithm
     bool inPeakSet(const std::set<PoldiPeak_sptr> &peakSet, const PoldiPeak_sptr &peak) const;
     void assignPeakIndex(const IndexCandidatePair &candidate);
 
+    // Finalization
+    PoldiPeakCollection_sptr getIntensitySortedPeakCollection(const PoldiPeakCollection_sptr &peaks) const;
 
     PoldiPeakCollection_sptr m_measuredPeaks;
     std::vector<PoldiPeakCollection_sptr> m_expectedPhases;

diff --git a/Code/Mantid/Framework/SINQ/src/PoldiIndexKnownCompounds.cpp b/Code/Mantid/Framework/SINQ/src/PoldiIndexKnownCompounds.cpp
@@ -50,19 +50,9 @@ IndexCandidatePair::IndexCandidatePair(const PoldiPeak_sptr &measuredPeak, const
 
     boost::math::normal positionDistribution(0.0, sigmaD);
 
-    positionMatch = (1.0 - boost::math::cdf(positionDistribution, differenceD)) * 2.0;
+    double candidateIntensity = candidatePeak->intensity();
 
-    double peakI = observed->intensity();
-    double sigmaI = 0.25 * fabs(peakI);
-    double differenceI = fabs(peakI - candidate->intensity());
-
-    boost::math::normal intensityDistribution(0.0, sigmaI);
-
-    intensityMatch = (1.0 - boost::math::cdf(intensityDistribution, differenceI)) * 2.0;
-
-    std::cout << MillerIndicesIO::toString(candidate->hkl()) << std::endl;
-    std::cout << candidate->intensity() << " " << peakI << " " << positionMatch << std::endl;
-    std::cout << candidate->d() << " " << peakD << " " << intensityMatch << std::endl << std::endl;
+    positionMatch = (1.0 - boost::math::cdf(positionDistribution, differenceD)) * 2.0 * candidateIntensity;
 }
 
 /// Default constructor
@@ -354,8 +344,6 @@ void PoldiIndexKnownCompounds::scaleToExperimentalValues(const std::vector<Poldi
         maximumCalculatedIntensity = std::max(getMaximumIntensity(*it), maximumCalculatedIntensity);
     }
 
-    std::cout << "MAX: " << maximumExperimentalIntensity << " " << maximumCalculatedIntensity << std::endl;
-
     scaleIntensityEstimates(peakCollections, std::vector<double>(peakCollections.size(), maximumExperimentalIntensity / maximumCalculatedIntensity));
 }
 
@@ -417,33 +405,6 @@ void PoldiIndexKnownCompounds::assignFwhmEstimates(const PoldiPeakCollection_spt
     }
 }
 
-PoldiPeakCollection_sptr PoldiIndexKnownCompounds::getIntegratedPeaks(const PoldiPeakCollection_sptr &rawPeaks)
-{
-    if(rawPeaks->intensityType() == PoldiPeakCollection::Integral) {
-        return rawPeaks;
-    }
-
-    PeakFunctionIntegrator integrator;
-
-    PoldiPeakCollection_sptr peaks = boost::make_shared<PoldiPeakCollection>(PoldiPeakCollection::Integral);
-    for(size_t i = 0; i < rawPeaks->peakCount(); ++i) {
-        PoldiPeak_sptr peak = rawPeaks->peak(i)->clone();
-
-        IPeakFunction_sptr peakFunction = boost::dynamic_pointer_cast<IPeakFunction>(FunctionFactory::Instance().createFunction(rawPeaks->getProfileFunctionName()));
-        peakFunction->setCentre(0.0);
-        peakFunction->setHeight(peak->intensity());
-        peakFunction->setFwhm(peak->fwhm(PoldiPeak::AbsoluteD));
-
-        IntegrationResult result = integrator.integrateInfinity(peakFunction);
-
-        peak->setIntensity(UncertainValue(result.result));
-
-        peaks->addPeak(peak);
-    }
-
-    return peaks;
-}
-
 /** Tries to index the supplied measured peaks
  *
  *  The method tries to index the supplied measured peaks by finding peaks with similar d-spacings
@@ -589,7 +550,7 @@ void PoldiIndexKnownCompounds::assignCandidates(const std::vector<IndexCandidate
 
         g_log.information() << "    Candidate d=" << static_cast<double>(measuredPeak->d()) << " -> "
                             << "Phase: " << currentCandidate.candidateCollectionIndex << " [" << MillerIndicesIO::toString(expectedPeak->hkl()) << "] (d=" << static_cast<double>(expectedPeak->d()) << "), "
-                            << "Score=(" << currentCandidate.positionMatch << ", " << currentCandidate.intensityMatch << "): ";
+                            << "Score=(" << currentCandidate.positionMatch << "): ";
 
         /* If the peak has not been indexed yet, it is not stored in the set
          * that holds measured peaks that are already indexed, so the candidate
@@ -646,6 +607,20 @@ void PoldiIndexKnownCompounds::assignPeakIndex(const IndexCandidatePair &candida
     m_indexedPeaks[candidate.candidateCollectionIndex]->addPeak(candidate.observed);
 }
 
+PoldiPeakCollection_sptr PoldiIndexKnownCompounds::getIntensitySortedPeakCollection(const PoldiPeakCollection_sptr &peaks) const
+{
+    std::vector<PoldiPeak_sptr> peakVector(peaks->peaks());
+
+    std::sort(peakVector.begin(), peakVector.end(), boost::bind<bool>(&PoldiPeak::greaterThan, _1, _2, &PoldiPeak::intensity));
+
+    PoldiPeakCollection_sptr sortedPeaks = boost::make_shared<PoldiPeakCollection>(peaks->intensityType());
+    for(size_t i = 0; i < peakVector.size(); ++i) {
+        sortedPeaks->addPeak(peakVector[i]->clone());
+    }
+
+    return sortedPeaks;
+}
+
 /** Initialize the algorithm's properties.
    */
 void PoldiIndexKnownCompounds::init()
@@ -676,11 +651,8 @@ void PoldiIndexKnownCompounds::exec()
 
     DataObjects::TableWorkspace_sptr peakTableWorkspace = getProperty("InputWorkspace");
 
-    PoldiPeakCollection_sptr unindexedRawPeaks = boost::make_shared<PoldiPeakCollection>(peakTableWorkspace);
-    g_log.information() << "  Number of peaks: " << unindexedRawPeaks->peakCount() << std::endl;
-
-    g_log.information() << "  Integrating peaks..." << std::endl;
-    PoldiPeakCollection_sptr unindexedPeaks = getIntegratedPeaks(unindexedRawPeaks);
+    PoldiPeakCollection_sptr unindexedPeaks = boost::make_shared<PoldiPeakCollection>(peakTableWorkspace);
+    g_log.information() << "  Number of peaks: " << unindexedPeaks->peakCount() << std::endl;
 
     std::vector<Workspace_sptr> workspaces = getWorkspaces(getProperty("CompoundWorkspaces"));
     std::vector<PoldiPeakCollection_sptr> peakCollections = getPeakCollections(workspaces);
@@ -721,13 +693,13 @@ void PoldiIndexKnownCompounds::exec()
     WorkspaceGroup_sptr outputWorkspaces = boost::make_shared<WorkspaceGroup>();
 
     for(size_t i = 0; i < m_indexedPeaks.size(); ++i) {
-        ITableWorkspace_sptr tableWs = m_indexedPeaks[i]->asTableWorkspace();
+        PoldiPeakCollection_sptr intensitySorted = getIntensitySortedPeakCollection(m_indexedPeaks[i]);
+        ITableWorkspace_sptr tableWs = intensitySorted->asTableWorkspace();
         AnalysisDataService::Instance().addOrReplace("Indexed_" + m_phaseNames[i], tableWs);
 
         outputWorkspaces->addWorkspace(tableWs);
     }
 
-
     ITableWorkspace_sptr unindexedTableWs = m_unindexedPeaks->asTableWorkspace();
 
     std::string inputWorkspaceName = getPropertyValue("InputWorkspace");

diff --git a/Code/Mantid/Framework/SINQ/test/PoldiIndexKnownCompoundsTest.h b/Code/Mantid/Framework/SINQ/test/PoldiIndexKnownCompoundsTest.h
@@ -334,8 +334,10 @@ class PoldiIndexKnownCompoundsTest : public CxxTest::TestSuite
         PoldiPeakCollection_sptr indexedSilicon = PoldiPeakCollectionHelpers::createTheoreticalPeakCollectionSilicon();
         TS_ASSERT_THROWS_NOTHING(alg.scaleIntensityEstimates(indexedSilicon, 2.0));
 
+        PoldiPeakCollection_sptr reference = PoldiPeakCollectionHelpers::createTheoreticalPeakCollectionSilicon();
+
         for(size_t i = 0; i < indexedSilicon->peakCount(); ++i) {
-            TS_ASSERT_EQUALS(indexedSilicon->peak(i)->intensity(), 2.0);
+            TS_ASSERT_EQUALS(indexedSilicon->peak(i)->intensity(), 2.0 * reference->peak(i)->intensity());
         }
 
         // test override with vectors
@@ -498,7 +500,7 @@ class PoldiIndexKnownCompoundsTest : public CxxTest::TestSuite
         IndexCandidatePair defaultConstructed;
         TS_ASSERT(!defaultConstructed.observed);
         TS_ASSERT(!defaultConstructed.observed);
-        TS_ASSERT_EQUALS(defaultConstructed.score, 0.0);
+        TS_ASSERT_EQUALS(defaultConstructed.positionMatch, 0.0);
         TS_ASSERT_EQUALS(defaultConstructed.candidateCollectionIndex, 0);
     }
 
@@ -511,9 +513,7 @@ class PoldiIndexKnownCompoundsTest : public CxxTest::TestSuite
         PoldiPeak_sptr peak = PoldiPeak::create(Conversions::dToQ(2.0));
 
         /* Two candidates for indexing with different
-         * distances from the peak position.
-         * They both have a contribution of 1.0 and equal , so score is only
-         * determined by |d(peak)-d(candidate)|.
+         * distances from the peak position, but equal tolerances.
          */
         PoldiPeak_sptr candidate1 = PoldiPeak::create(Conversions::dToQ(2.04), 1.0);
         candidate1->setFwhm(UncertainValue(alg.sigmaToFwhm(0.1)), PoldiPeak::Relative);
@@ -533,16 +533,10 @@ class PoldiIndexKnownCompoundsTest : public CxxTest::TestSuite
         TS_ASSERT_EQUALS(peakCandidate1.candidate, candidate1);
         TS_ASSERT_EQUALS(peakCandidate1.candidateCollectionIndex, 1);
 
-        /* Score is a gaussian with A = I(candidate), sigma from FWHM(candidate)
-         * and x0 = d(candidate). x is d(peak).
-         */
-        TS_ASSERT_DELTA(peakCandidate1.score, 0.98096021516673242965, 1e-12);
-
         IndexCandidatePair peakCandidate2(peak, candidate2, 1);
         TS_ASSERT_EQUALS(peakCandidate2.observed, peak);
         TS_ASSERT_EQUALS(peakCandidate2.candidate, candidate2);
         TS_ASSERT_EQUALS(peakCandidate2.candidateCollectionIndex, 1);
-        TS_ASSERT_DELTA(peakCandidate2.score, 0.91685535573202892876, 1e-12);
 
         // Test comparison operator
         TS_ASSERT(peakCandidate2 < peakCandidate1);
@@ -574,15 +568,16 @@ class PoldiIndexKnownCompoundsTest : public CxxTest::TestSuite
         TS_ASSERT_THROWS_NOTHING(alg.indexPeaks(measured, expected));
         TS_ASSERT_EQUALS(alg.m_unindexedPeaks->peakCount(), 2);
 
-        PoldiPeakCollection_sptr indexedSi = alg.m_indexedPeaks[0];
+        PoldiPeakCollection_sptr indexedSi = alg.getIntensitySortedPeakCollection(alg.m_indexedPeaks[0]);
         TS_ASSERT_EQUALS(indexedSi->peakCount(), 4);
 
-        TS_ASSERT_EQUALS(indexedSi->peak(0)->hkl(), MillerIndices(3, 3, 1));
+        // Peaks are ordered by intensity
+        TS_ASSERT_EQUALS(indexedSi->peak(3)->hkl(), MillerIndices(3, 3, 1));
         // Make sure this is the correct peak (not the one introduced above)
-        TS_ASSERT_EQUALS(indexedSi->peak(1)->hkl(), MillerIndices(3, 1, 1));
-        TS_ASSERT_EQUALS(indexedSi->peak(2)->hkl(), MillerIndices(2, 2, 0));
-        TS_ASSERT_EQUALS(indexedSi->peak(3)->hkl(), MillerIndices(4, 2, 2));
-        TS_ASSERT_EQUALS(indexedSi->peak(3)->d(), 1.108644);
+        TS_ASSERT_EQUALS(indexedSi->peak(2)->hkl(), MillerIndices(3, 1, 1));
+        TS_ASSERT_EQUALS(indexedSi->peak(1)->hkl(), MillerIndices(2, 2, 0));
+        TS_ASSERT_EQUALS(indexedSi->peak(0)->hkl(), MillerIndices(4, 2, 2));
+        TS_ASSERT_EQUALS(indexedSi->peak(0)->d(), 1.108644);
     }
 
 private:

diff --git a/Code/Mantid/docs/source/algorithms/PoldiIndexKnownCompounds-v1.rst b/Code/Mantid/docs/source/algorithms/PoldiIndexKnownCompounds-v1.rst
@@ -14,16 +14,13 @@ In many experiments at POLDI it is well known which compounds are present in a s
 
 The major difficulty arises from the fact that typical POLDI experiments involve lattice deformations, which causes a shift in peak positions. For a single phase material with few peaks, this can be handled by making the tolerance larger - but only to a certain degree. If cell parameters are too large and consequently lattice plane spacings at low d-values too narrow, the indexing becomes ambiguous. Of course, introducing additional phases makes this situation even worse, leading to even more ambiguity.
 
-To resolve these ambiguities, this algorithm assumes that a peak must occur somewhere in the vicinity of the theoretical position. As the distance between measured and calculated position grows, it becomes less and less likely that the measured peak corresponds to the theoretical one. That behavior can be modeled by assuming a Gaussian distribution around the position :math:`d_0` of the theoretical peak:
+To resolve these ambiguities, this algorithm assumes that a peak must occur somewhere in the vicinity of the theoretical position. As the distance between measured and calculated position grows, it becomes less and less likely that the measured peak corresponds to the theoretical one. By assuming a normal distribution centered at 0 and a standard deviation corresponding to an acceptable tolerance, the probability of finding the peak within this distance can be calculated.
 
-.. math::
-    f(d) = I\cdot\exp\left[-\frac{1}{2}\left(\frac{d - d_0}{\sigma}\right)^2\right]
+The standard deviation :math:`\sigma` of the distribution is simply :math:`\delta\cdot d_0`. To determine whether a peak can be indexed by a reflection, the algorithm checks if observed and calculated position are closer than :math:`3\sigma`. If :math:`n` reflections are closer to an observed peak at position :math:`d`, all are considered a candidate for correct assignment. In order to decide which reflection is most likely to be correct, the probability is calculated for each combination of measured and calculated position and the candidate with highest value is used to indexed the measured peak.
 
-The standard deviation :math:`\sigma` of the distribution is simply :math:`\delta\cdot d_0`. To determine whether a peak can be indexed by a reflection, the algorithm checks if observed and calculated position are closer than :math:`3\sigma`. If :math:`n` reflections are closer to an observed peak at position :math:`d`, all are considered a candidate for correct assignment. In order to decide which reflection is most likely to be correct, the value of :math:`f_j(d)` is calculated for each combination of measured and calculated position and the candidate with highest value is used to indexed the measured peak.
+In the previous example, all calculated reflections get the same weight, that's why the closest peak is always selected. This works for a single phase material, but it becomes error prone when more phases are present. A common example is the presence of a minority phase which makes up only a few percent of the sample, so that only very strong reflections of that compound can be detected. In that case a reflection of the minority compound may be very close the measured peak position, but in fact it may not even be detectable because of the low scattering contribution.
 
-In the previous example, the parameter :math:`I` is identical for all reflections, that's why the closest peak is always selected. This works for a single phase material, but it becomes error prone when more phases are present. A common example is the presence of a minority phase which makes up only a few percent of the sample, so that only very strong reflections of that compound can be detected. In that case a reflection of the minority compound may be very close the measured peak position, but in fact it may not even be detectable because of the low scattering contribution.
-
-For that reason it's possible to provide estimates for scattering contributions of each phase, which are used to calculate :math:`I`. Picking up the numbers from above, :math:`I_{major}` would be 0.95 and :math:`I_{minor}` 0.05. This means that reflections of the majority phase are assigned with higher priority, because it is much more likely to detect them.
+For that reason it's possible to provide estimates for scattering contributions of each phase, which are used to calculate :math:`I`. Picking up the numbers from above, :math:`I_{major}` would be 0.95 and :math:`I_{minor}` 0.05. This means that reflections of the majority phase are assigned with higher priority, because it is much more likely to detect them. Since :math:`I` is proportional to :math:`F^2`, the structure factors are also taken into account (including multiplicity related to the Laue class).
 
 The output of the algorithm is a WorkspaceGroup, with one TableWorkspace for each input component containing the indexed peaks for that compound. Additionally there is a TableWorkspace with peaks that could not be indexed.