Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[WIP] Permutation Testing #559

Open
wants to merge 11 commits into
base: master
Choose a base branch
from
Original file line number Diff line number Diff line change
Expand Up @@ -57,14 +57,14 @@ public class ProteinQuantificationEngine
throw new MzLibException("Conditions must be defined to run the Bayesian protein quantification");
}

this.MaxThreads = maxThreads;
this.Results = results;
this.UseSharedPeptides = useSharedPeptides;
this.ControlCondition = controlCondition;
this.FoldChangeCutoff = foldChangeCutoff;
this.BurnInSteps = mcmcBurninSteps;
this.McmcSteps = mcmcSteps;
this.PairedSamples = pairedSamples;
MaxThreads = maxThreads;
Results = results;
UseSharedPeptides = useSharedPeptides;
ControlCondition = controlCondition;
FoldChangeCutoff = foldChangeCutoff;
BurnInSteps = mcmcBurninSteps;
McmcSteps = mcmcSteps;
PairedSamples = pairedSamples;

if (randomSeed == null)
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,10 +34,10 @@ public abstract class ProteinQuantificationEngineResult

protected ProteinQuantificationEngineResult(ProteinGroup protein, List<Peptide> peptides, string controlCondition, string treatmentCondition)
{
this.Protein = protein;
this.Peptides = peptides;
this.ControlCondition = controlCondition;
this.TreatmentCondition = treatmentCondition;
Protein = protein;
Peptides = peptides;
ControlCondition = controlCondition;
TreatmentCondition = treatmentCondition;
}

/// <summary>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,9 +31,9 @@ public class UnpairedProteinQuantResult : ProteinQuantificationEngineResult
bool useSharedPeptides, FlashLfqResults flashLfqResults, Dictionary<(Peptide, string, int), (double, DetectionType)> peptideToSampleQuantity)
: base(protein, peptides, controlCondition, treatmentCondition)
{
this.PeptideToSampleQuantity = peptideToSampleQuantity;
this.UseSharedPeptides = useSharedPeptides;
this.FlashLfqResults = flashLfqResults;
PeptideToSampleQuantity = peptideToSampleQuantity;
UseSharedPeptides = useSharedPeptides;
FlashLfqResults = flashLfqResults;

ConditionsWithPeptideSampleQuantities = new Dictionary<string, List<Datum>>();
GetPeptideSampleQuantities();
Expand Down Expand Up @@ -81,7 +81,7 @@ public class UnpairedProteinQuantResult : ProteinQuantificationEngineResult

if (!skepticalPrior)
{
this.FoldChangePointEstimate = diffs.Median();
FoldChangePointEstimate = diffs.Median();

double uncertaintyInControl = (hdi95Control.hdi_end - hdi95Control.hdi_start) / 2;
double uncertaintyInTreatment = (hdi95Treatment.hdi_end - hdi95Treatment.hdi_start) / 2;
Expand All @@ -106,12 +106,12 @@ public class UnpairedProteinQuantResult : ProteinQuantificationEngineResult
}

double propagatedSd = propagatedSds.Median();
this.StandardDeviationPointEstimate = propagatedSd;
StandardDeviationPointEstimate = propagatedSd;
}
else
{
this.NullHypothesisInterval = Math.Sqrt(Math.Pow(controlNullHypothesisInterval, 2) + Math.Pow(treatmentNullHypothesisInterval, 2));
this.CalculatePosteriorErrorProbability(diffs);
NullHypothesisInterval = Math.Sqrt(Math.Pow(controlNullHypothesisInterval, 2) + Math.Pow(treatmentNullHypothesisInterval, 2));
CalculatePosteriorErrorProbability(diffs);
}
}

Expand Down Expand Up @@ -177,8 +177,8 @@ public override string ToString()
var controlMmtsString = controlMmtsStringBuilder.ToString();
var treatmentMmtsString = treatmentMmtsStringBuilder.ToString();

NMeasurementsControl = this.ConditionsWithPeptideSampleQuantities[ControlCondition].Count;
NMeasurementsTreatment = this.ConditionsWithPeptideSampleQuantities[TreatmentCondition].Count;
NMeasurementsControl = ConditionsWithPeptideSampleQuantities[ControlCondition].Count;
NMeasurementsTreatment = ConditionsWithPeptideSampleQuantities[TreatmentCondition].Count;

if (controlMmtsString.Length > 32000)
{
Expand Down
1 change: 1 addition & 0 deletions mzLib/FlashLFQ/FlashLFQ.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
</PropertyGroup>

<ItemGroup>
<PackageReference Include="Accord.Statistics" Version="3.8.0" />
<PackageReference Include="MathNet.Numerics" Version="4.8.1" />
<PackageReference Include="NetSerializer" Version="4.1.1" />
<PackageReference Include="SharpLearning.Optimization" Version="[0.28.0]" />
Expand Down
12 changes: 6 additions & 6 deletions mzLib/FlashLFQ/FlashLFQResults.cs
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ public void MergeResultsWith(FlashLfqResults mergeFrom)

foreach (var pep in mergeFrom.PeptideModifiedSequences)
{
if (this.PeptideModifiedSequences.TryGetValue(pep.Key, out var peptide))
if (PeptideModifiedSequences.TryGetValue(pep.Key, out var peptide))
{
Peptide mergeFromPep = pep.Value;
Peptide mergeToPep = peptide;
Expand All @@ -69,13 +69,13 @@ public void MergeResultsWith(FlashLfqResults mergeFrom)
}
else
{
this.PeptideModifiedSequences.Add(pep.Key, pep.Value);
PeptideModifiedSequences.Add(pep.Key, pep.Value);
}
}

foreach (var pg in mergeFrom.ProteinGroups)
{
if (this.ProteinGroups.TryGetValue(pg.Key, out var proteinGroup))
if (ProteinGroups.TryGetValue(pg.Key, out var proteinGroup))
{
ProteinGroup mergeFromPg = pg.Value;
ProteinGroup mergeToPg = proteinGroup;
Expand All @@ -87,19 +87,19 @@ public void MergeResultsWith(FlashLfqResults mergeFrom)
}
else
{
this.ProteinGroups.Add(pg.Key, pg.Value);
ProteinGroups.Add(pg.Key, pg.Value);
}
}

foreach (var fromPeaks in mergeFrom.Peaks)
{
if (this.Peaks.TryGetValue(fromPeaks.Key, out var toPeaks))
if (Peaks.TryGetValue(fromPeaks.Key, out var toPeaks))
{
toPeaks.AddRange(fromPeaks.Value);
}
else
{
this.Peaks.Add(fromPeaks.Key, fromPeaks.Value);
Peaks.Add(fromPeaks.Key, fromPeaks.Value);
}
}
}
Expand Down
27 changes: 25 additions & 2 deletions mzLib/FlashLFQ/FlashLfqEngine.cs
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ public class FlashLfqEngine

// settings for the Bayesian protein quantification engine
public readonly bool BayesianProteinQuant;

public readonly bool PermutationProteinQuant;
public readonly string ProteinQuantBaseCondition;
public readonly double ProteinQuantFoldChangeCutoff;
public readonly int McmcSteps;
Expand Down Expand Up @@ -74,6 +74,8 @@ public class FlashLfqEngine
double maxMbrWindow = 2.5,
bool requireMsmsIdInCondition = false,

bool permutationProteinQuant = false,

// settings for the Bayesian protein quantification engine
bool bayesianProteinQuant = false,
string proteinQuantBaseCondition = null,
Expand Down Expand Up @@ -110,6 +112,7 @@ public class FlashLfqEngine
Normalize = normalize;
MaxThreads = maxThreads;
BayesianProteinQuant = bayesianProteinQuant;
PermutationProteinQuant = permutationProteinQuant;
PairedSamples = pairedSamples;
ProteinQuantBaseCondition = proteinQuantBaseCondition;
ProteinQuantFoldChangeCutoff = proteinQuantFoldChangeCutoff;
Expand Down Expand Up @@ -228,6 +231,26 @@ public FlashLfqResults Run()
ProteinQuantFoldChangeCutoff, RandomSeed, McmcBurninSteps, McmcSteps, PairedSamples).Run();
}
}
// do permutation/t-test protein fold-change analysis
else if(PermutationProteinQuant) //TODO bayesian changes protein quant values, might want to use those values. Test with and without
{
if (_spectraFileInfo.Count == 1 || _spectraFileInfo.Select(p => p.Condition).Distinct().Count() == 1)
{
if (!Silent)
{
Console.WriteLine("Can't do permutation testing with only one spectra file or condition. FlashLFQ will still do a top3 protein quant");
}
}
else
{
if (!Silent)
{
Console.WriteLine("Running permutation testing to determine significant proteins");
}

new PermutationTestingEngine(_results).Run();
}
}

// done
if (!Silent)
Expand Down Expand Up @@ -1348,4 +1371,4 @@ private void CutPeak(ChromatographicPeak peak, double identificationTime)
}
}
}
}
}
87 changes: 87 additions & 0 deletions mzLib/FlashLFQ/PermutationTesting/ImputationProcess.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
using MathNet.Numerics.Distributions;
using System;
using System.Collections.Generic;
using System.Linq;

namespace FlashLFQ
{
/// <summary>
/// Class used for imputing missing intensity values for each protein
/// </summary>
public class ImputationProcess
{
/// <summary>
/// Imputes missing intensity values
/// </summary>
public List<ProteinGroup> RunImputationProcess(List<ProteinGroup> proteins, List<SpectraFileInfo> sampleFileNames, double meanFraction)
{
//foreach sample, determine the distribution of protein intensities
//create a function to impute data for the given sample
//impute missing values

//create array to store all intensities for each file
List<double>[] intensitiesForEachSample = new List<double>[sampleFileNames.Count];
for(int i=0; i<sampleFileNames.Count; i++)
{
intensitiesForEachSample[i] = new List<double>();
}

//populate intensities for each file
foreach(ProteinGroup protein in proteins)
{
for(int i=0; i<sampleFileNames.Count; i++)
{
intensitiesForEachSample[i].Add(protein.GetIntensity(sampleFileNames[i]));
}
}

//create sample-specific functions for imputation
Normal[] imputationFunctions = new Normal[sampleFileNames.Count];
for(int i=0; i<sampleFileNames.Count; i++)
{
List<double> intensitiesForThisSample = intensitiesForEachSample[i];
double mean = intensitiesForThisSample.Average();
double stddev = Math.Sqrt((intensitiesForThisSample.Sum(x => Math.Pow(x - mean, 2))) / intensitiesForThisSample.Count);
double fractionOfMissingValues = (proteins.Count - intensitiesForThisSample.Count) * 1d / proteins.Count;
imputationFunctions[i] = CreateImputationFunction(mean, stddev, fractionOfMissingValues, meanFraction);
}

//impute values for each protein
List<ProteinGroup> proteinsWithImputedValues = new List<ProteinGroup>(proteins.Count);
foreach(ProteinGroup protein in proteins)
{
//create new protein to store the imputed values
ProteinGroup imputedProtein = new ProteinGroup(protein.ProteinGroupName, protein.GeneName, protein.Organism);
for(int i=0; i<sampleFileNames.Count; i++)
{
SpectraFileInfo file = sampleFileNames[i];
double intensity = protein.GetIntensity(file);
intensity = intensity > 0 ? intensity : imputationFunctions[i].Sample(); //impute if needed
imputedProtein.SetIntensity(file, intensity);
}
proteinsWithImputedValues.Add(imputedProtein);
}

return proteinsWithImputedValues;
}

/// <summary>
/// Imputes missing intensity value for each protein
/// </summary>
public Normal CreateImputationFunction(double mean, double stddev, double fractionOfMissingValues, double meanFraction)
{
double imputedProbability = Math.Min(fractionOfMissingValues / (1 - fractionOfMissingValues), 1); //TODO: Explanation needed
double standardDeviationFraction = Math.Max(2 * fractionOfMissingValues, 0.3); //TODO: Explanation needed
double stdDevFraction = 0.6 * (1 - (fractionOfMissingValues * fractionOfMissingValues));//TODO: Explanation needed
Normal probabilityDist = new Normal(mean, standardDeviationFraction);
double probabilitySetPoint = probabilityDist.Density(mean + stdDevFraction * standardDeviationFraction);
double yCoordinate = imputedProbability * probabilitySetPoint;
double deltaX = standardDeviationFraction * stdDevFraction;
Normal xCoord = new Normal(mean, stddev);
double deltaMu = xCoord.InverseCumulativeDistribution(yCoordinate);
double meanDownshift = (deltaMu - deltaX * meanFraction);

return new Normal(meanDownshift, standardDeviationFraction, new Random(2));
}
}
}
Loading