Skip to content

Commit

Permalink
updated reports to use sample masking function; updated some plots to…
Browse files Browse the repository at this point in the history
… use SampleClass and colour scheme
  • Loading branch information
carolinesands committed Feb 20, 2024
1 parent 5f585c9 commit f525e26
Show file tree
Hide file tree
Showing 5 changed files with 70 additions and 91 deletions.
4 changes: 3 additions & 1 deletion nPYc/objects/_msDataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -1460,7 +1460,9 @@ def _inferBatches(self, gapLength=24):

# Check `Acquired Time` or `Run Order` information available for all samples
if numpy.any(sampleMetadata[usefield].isnull()):
raise npycToolboxError("Unable to run batch and run order correction without `sampleMetadata[`" + usefield + "`]` info for ALL samples")
missingData = sampleMetadata.loc[sampleMetadata[usefield].isnull(),['Sample File Name', usefield]]
raise npycToolboxError("Unable to run batch and run order correction without `sampleMetadata[`" + usefield + "`]` info for ALL samples, info missing for:",
table=missingData)

# Generate sampleMetadata sorted by run order
sortedSampleMetadata = sampleMetadata.sort_values(by='Run Order')
Expand Down
2 changes: 0 additions & 2 deletions nPYc/plotting/_plotBatchAndROCorrection.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,8 +59,6 @@ def plotBatchAndROCorrection(dataset, datasetcorrected, featureList, addViolin=T
# Check that dimensions are the same
try:
# Attempting to add arrays ar1 and ar2
print(msData.intensityData.shape)
print(msDatacorrected.intensityData.shape)
msData.intensityData + msDatacorrected.intensityData
except ValueError as ve:
print(ve)
Expand Down
2 changes: 2 additions & 0 deletions nPYc/reports/_generateBasicPCAReport.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,8 @@ def generateBasicPCAReport(pcaModel, dataset, figureCounter=1, destinationPath=N
classes=dataset.sampleMetadata['SampleClass'],
colourType='categorical',
title='SampleClass',
colourDict=dataset.Attributes['sampleTypeColours'],
markerDict=dataset.Attributes['sampleTypeMarkers'],
savePath=saveAs,
figures=figuresQCscores,
figureFormat=dataset.Attributes['figureFormat'],
Expand Down
146 changes: 61 additions & 85 deletions nPYc/reports/_generateReportMS.py
Original file line number Diff line number Diff line change
Expand Up @@ -159,32 +159,24 @@ def _finalReport(dataset, destinationPath=None, pcaModel=None, reportType='final
figureSize=dataset.Attributes['figureSize']

# Define sample masks
# TODO - CAZ this should use the function
SSmask = (dataset.sampleMetadata['SampleType'].values == SampleType.StudySample) & \
(dataset.sampleMetadata['AssayRole'].values == AssayRole.Assay)
SPmask = (dataset.sampleMetadata['SampleType'].values == SampleType.StudyPool) & \
(dataset.sampleMetadata['AssayRole'].values == AssayRole.PrecisionReference)
ERmask = (dataset.sampleMetadata['SampleType'].values == SampleType.ExternalReference) & \
(dataset.sampleMetadata['AssayRole'].values == AssayRole.PrecisionReference)
LRmask = (dataset.sampleMetadata['SampleType'].values == SampleType.StudyPool) & \
(dataset.sampleMetadata['AssayRole'].values == AssayRole.LinearityReference)
acquiredMasks = generateTypeRoleMasks(dataset.sampleMetadata)

# Set up template item and save required info
item = dict()
item['Name'] = dataset.name
item['ReportType'] = 'feature summary'
item['Nfeatures'] = dataset.intensityData.shape[1]
item['Nsamples'] = dataset.intensityData.shape[0]
item['SScount'] = str(sum(SSmask))
item['SPcount'] = str(sum(SPmask))
item['ERcount'] = str(sum(ERmask))
item['LRcount'] = str(sum(LRmask))
item['SScount'] = str(sum(acquiredMasks['SSmask']))
item['SPcount'] = str(sum(acquiredMasks['SPmask']))
item['ERcount'] = str(sum(acquiredMasks['ERmask']))
item['LRcount'] = str(sum(acquiredMasks['SRDmask']))
item['corrMethod'] = dataset.Attributes['corrMethod']
figNo = 1

# Mean intensities of Study Pool samples (for future plotting segmented by intensity)
meanIntensitiesSP = numpy.log(numpy.nanmean(dataset.intensityData[SPmask, :], axis=0))
meanIntensitiesSP[numpy.mean(dataset.intensityData[SPmask, :], axis=0) == 0] = numpy.nan
meanIntensitiesSP = numpy.log(numpy.nanmean(dataset.intensityData[acquiredMasks['SPmask'], :], axis=0))
meanIntensitiesSP[numpy.mean(dataset.intensityData[acquiredMasks['SPmask'], :], axis=0) == 0] = numpy.nan
meanIntensitiesSP[numpy.isinf(meanIntensitiesSP)] = numpy.nan

# Table 1: Sample summary
Expand Down Expand Up @@ -381,8 +373,12 @@ def _finalReport(dataset, destinationPath=None, pcaModel=None, reportType='final
print('Figure ' + str(figNo) + ': Feature intensity histogram for all samples and all features in final dataset (by sample type)')
figNo = figNo+1

_plotAbundanceBySampleType(dataset.intensityData, SSmask, SPmask, ERmask, saveAs, dataset)

_plotAbundanceBySampleType(dataset.intensityData,
acquiredMasks['SSmask'],
acquiredMasks['SPmask'],
acquiredMasks['ERmask'],
saveAs,
dataset)

# Figure: Ion map
if 'm/z' in dataset.featureMetadata.columns and 'Retention Time' in dataset.featureMetadata.columns:
Expand Down Expand Up @@ -510,24 +506,12 @@ def _featureReport(dataset, destinationPath=None):
item['Nsamples'] = dataset.intensityData.shape[0]

# Define sample masks
SSmask = (dataset.sampleMetadata['SampleType'].values == SampleType.StudySample) & \
(dataset.sampleMetadata['AssayRole'].values == AssayRole.Assay)
SPmask = (dataset.sampleMetadata['SampleType'].values == SampleType.StudyPool) & \
(dataset.sampleMetadata['AssayRole'].values == AssayRole.PrecisionReference)
ERmask = (dataset.sampleMetadata['SampleType'].values == SampleType.ExternalReference) & \
(dataset.sampleMetadata['AssayRole'].values == AssayRole.PrecisionReference)

try:
LRmask = (dataset.sampleMetadata['SampleType'].values == SampleType.StudyPool) & \
(dataset.sampleMetadata['AssayRole'].values == AssayRole.LinearityReference)
item['LRcount'] = str(sum(LRmask))
except KeyError:
pass
acquiredMasks = generateTypeRoleMasks(dataset.sampleMetadata)

# Set up template item and save required info
item['SScount'] = str(sum(SSmask))
item['SPcount'] = str(sum(SPmask))
item['ERcount'] = str(sum(ERmask))
item['SScount'] = str(sum(acquiredMasks['SSmask']))
item['SPcount'] = str(sum(acquiredMasks['SPmask']))
item['ERcount'] = str(sum(acquiredMasks['ERmask']))
item['corrMethod'] = dataset.Attributes['corrMethod']

##
Expand All @@ -549,8 +533,8 @@ def _featureReport(dataset, destinationPath=None):
# Generate correlation to dilution for each batch subset - plot TIC and histogram of correlation to dilution

# Mean intensities of Study Pool samples (for future plotting segmented by intensity)
meanIntensitiesSP = numpy.log(numpy.nanmean(dataset.intensityData[SPmask, :], axis=0))
meanIntensitiesSP[numpy.mean(dataset.intensityData[SPmask, :], axis=0) == 0] = numpy.nan
meanIntensitiesSP = numpy.log(numpy.nanmean(dataset.intensityData[acquiredMasks['SPmask'], :], axis=0))
meanIntensitiesSP[numpy.mean(dataset.intensityData[acquiredMasks['SPmask'], :], axis=0) == 0] = numpy.nan
meanIntensitiesSP[numpy.isinf(meanIntensitiesSP)] = numpy.nan

# Figure 1: Histogram of log mean abundance by sample type
Expand All @@ -562,7 +546,12 @@ def _featureReport(dataset, destinationPath=None):
else:
print('Figure 1: Feature intensity histogram for all samples and all features in dataset (by sample type).')

_plotAbundanceBySampleType(dataset.intensityData, SSmask, SPmask, ERmask, saveAs, dataset)
_plotAbundanceBySampleType(dataset.intensityData,
acquiredMasks['SSmask'],
acquiredMasks['SPmask'],
acquiredMasks['ERmask'],
saveAs,
dataset)

if ('Acquired Time' in dataset.sampleMetadata.columns) or ('Run Order' in dataset.sampleMetadata.columns):

Expand Down Expand Up @@ -629,7 +618,7 @@ def _featureReport(dataset, destinationPath=None):
print('\x1b[31;1m Acquired Time/Run Order data not available to plot\n\033[0;0m')

# Correlation to dilution figures:
if sum(LRmask) != 0:
if sum(acquiredMasks['SRDmask']) != 0:

# Figure 4: Histogram of correlation to dilution by abundance percentiles
if destinationPath:
Expand Down Expand Up @@ -703,7 +692,7 @@ def _featureReport(dataset, destinationPath=None):
figureSize=dataset.Attributes['figureSize'])

# Figure 7: Scatterplot of RSD vs correlation to dilution
if sum(LRmask) != 0:
if sum(acquiredMasks['SRDmask']) != 0:
if destinationPath:
item['RsdVsCorrelationFigure'] = os.path.join(graphicsPath,
item['Name'] + '_rsdVsCorrelation.' + dataset.Attributes[
Expand Down Expand Up @@ -817,15 +806,9 @@ def _featureSelectionReport(dataset, destinationPath=None, withArtifactualFilter
"""

# Define sample masks
SSmask = (dataset.sampleMetadata['SampleType'].values == SampleType.StudySample) & \
(dataset.sampleMetadata['AssayRole'].values == AssayRole.Assay)
SRmask = (dataset.sampleMetadata['SampleType'].values == SampleType.StudyPool) & \
(dataset.sampleMetadata['AssayRole'].values == AssayRole.PrecisionReference)
SRDmask = (dataset.sampleMetadata['SampleType'].values == SampleType.StudyPool) & \
(dataset.sampleMetadata['AssayRole'].values == AssayRole.LinearityReference)
Blankmask = dataset.sampleMetadata['SampleType'] == SampleType.ProceduralBlank

if (sum(SRDmask) <= 2) | (sum(SRmask) <= 1):
acquiredMasks = generateTypeRoleMasks(dataset.sampleMetadata)

if (sum(acquiredMasks['SRDmask']) <= 2) | (sum(acquiredMasks['SPmask']) <= 1):
raise ValueError('Cannot generate report - No linearity reference or '
'precision reference samples available')

Expand All @@ -844,7 +827,6 @@ def _featureSelectionReport(dataset, destinationPath=None, withArtifactualFilter
else:
graphicsPath = None


# Feature selection parameters and numbers passing
item = dict()
item['Name'] = dataset.name
Expand All @@ -860,7 +842,7 @@ def _featureSelectionReport(dataset, destinationPath=None, withArtifactualFilter
else:
item['corrExclusions'] = 'none'

if sum(SRDmask) > 0:
if sum(acquiredMasks['SRDmask']) > 0:
item['corrPassed'] = str(sum(dataset.correlationToDilution >= item['corrThreshold'])) + ' passed selection.'
passMask = numpy.logical_and(passMask, dataset.correlationToDilution >= item['corrThreshold'])
else:
Expand All @@ -869,9 +851,9 @@ def _featureSelectionReport(dataset, destinationPath=None, withArtifactualFilter
# RSD in SR samples, and RSD in SS samples > RSD in SR samples
item['rsdThreshold'] = dataset.Attributes['filterParameters']['rsdThreshold'] if dataset.Attributes['filterParameters']['rsdThreshold'] is not None else dataset.Attributes['rsdThreshold']
item['rsdSPvsSSvarianceRatio'] = dataset.Attributes['filterParameters']['varianceRatio'] if dataset.Attributes['filterParameters']['varianceRatio'] is not None else dataset.Attributes['varianceRatio']
rsdSS = rsd(dataset.intensityData[SSmask, :])
rsdSS = rsd(dataset.intensityData[acquiredMasks['SSmask'], :])

if sum(SRmask) > 0:
if sum(acquiredMasks['SPmask']) > 0:
item['rsdPassed'] = str(sum(dataset.rsdSP <= item['rsdThreshold'])) + ' passed selection.'
item['rsdSPvsSSPassed'] = str(sum(dataset.rsdSP * item['rsdSPvsSSvarianceRatio'] <= rsdSS)) + ' passed selection.'
passMask = numpy.logical_and(passMask, dataset.rsdSP <= item['rsdThreshold'])
Expand All @@ -881,7 +863,7 @@ def _featureSelectionReport(dataset, destinationPath=None, withArtifactualFilter
item['rsdSPvsSSPassed'] = 'Not applied (no SR samples present).'

# Blank mask
if (dataset.Attributes['featureFilters']['blankFilter'] is True) & (sum(Blankmask) >= 2):
if (dataset.Attributes['featureFilters']['blankFilter'] is True) & (sum(acquiredMasks['Blankmask']) >= 2):
item['BlankThreshold'] = dataset.Attributes['filterParameters']['blankThreshold'] if dataset.Attributes['filterParameters']['blankThreshold'] is not None else dataset.Attributes['blankThreshold']

blankMask = blankFilter(dataset, item['BlankThreshold'])
Expand All @@ -905,7 +887,7 @@ def _featureSelectionReport(dataset, destinationPath=None, withArtifactualFilter
featureNos = numpy.zeros(rValsRep.shape, dtype=numpy.int)
if withArtifactualFiltering:
# with blankThreshold in heatmap
if (dataset.Attributes['featureFilters']['blankFilter'] is True) & (sum(Blankmask) >= 2):
if (dataset.Attributes['featureFilters']['blankFilter'] is True) & (sum(acquiredMasks['Blankmask']) >= 2):
for rsdNo in range(rValsRep.shape[1]):
featureNos[0, rsdNo] = sum(dataset.artifactualFilter(featMask=(
(dataset.correlationToDilution >= rValsRep[0, rsdNo]) & (
Expand All @@ -922,7 +904,7 @@ def _featureSelectionReport(dataset, destinationPath=None, withArtifactualFilter
dataset.featureMask == True))))
else:
# with blankThreshold in heatmap
if (dataset.Attributes['featureFilters']['blankFilter'] is True) & (sum(Blankmask) >= 2):
if (dataset.Attributes['featureFilters']['blankFilter'] is True) & (sum(acquiredMasks['Blankmask']) >= 2):
for rsdNo in range(rValsRep.shape[1]):
featureNos[0, rsdNo] = sum(
(dataset.correlationToDilution >= rValsRep[0, rsdNo]) & (dataset.rsdSP <= rsdValsRep[0, rsdNo]) & (
Expand Down Expand Up @@ -1025,7 +1007,7 @@ def _batchCorrectionAssessmentReport(dataset, destinationPath=None, batch_correc
item['SScount'] = str(sum(acquiredMasks['SSmask']))
item['SPcount'] = str(sum(acquiredMasks['SPmask']))
item['ERcount'] = str(sum(acquiredMasks['ERmask']))
item['LRcount'] = str(sum(acquiredMasks['LRmask']))
item['LRcount'] = str(sum(acquiredMasks['SRDmask']))
item['corrMethod'] = dataset.Attributes['corrMethod']

##
Expand Down Expand Up @@ -1151,27 +1133,19 @@ def _batchCorrectionSummaryReport(dataset, correctedDataset, destinationPath=Non
featName=False
figureSize=dataset.Attributes['figureSize']


# Define sample masks
SSmask = (dataset.sampleMetadata['SampleType'].values == SampleType.StudySample) & \
(dataset.sampleMetadata['AssayRole'].values == AssayRole.Assay)
SPmask = (dataset.sampleMetadata['SampleType'].values == SampleType.StudyPool) & \
(dataset.sampleMetadata['AssayRole'].values == AssayRole.PrecisionReference)
ERmask = (dataset.sampleMetadata['SampleType'].values == SampleType.ExternalReference) & \
(dataset.sampleMetadata['AssayRole'].values == AssayRole.PrecisionReference)
LRmask = (dataset.sampleMetadata['SampleType'].values == SampleType.StudyPool) & \
(dataset.sampleMetadata['AssayRole'].values == AssayRole.LinearityReference)
acquiredMasks = generateTypeRoleMasks(dataset.sampleMetadata)

# Set up template item and save required info
item = dict()
item['Name'] = dataset.name
item['ReportType'] = 'feature summary'
item['Nfeatures'] = dataset.intensityData.shape[1]
item['Nsamples'] = dataset.intensityData.shape[0]
item['SScount'] = str(sum(SSmask))
item['SPcount'] = str(sum(SPmask))
item['ERcount'] = str(sum(ERmask))
item['LRcount'] = str(sum(LRmask))
item['SScount'] = str(sum(acquiredMasks['SSmask']))
item['SPcount'] = str(sum(acquiredMasks['SPmask']))
item['ERcount'] = str(sum(acquiredMasks['ERmask']))
item['LRcount'] = str(sum(acquiredMasks['SRDmask']))
item['corrMethod'] = dataset.Attributes['corrMethod']

##
Expand All @@ -1191,8 +1165,8 @@ def _batchCorrectionSummaryReport(dataset, correctedDataset, destinationPath=Non


# Mean intensities of Study Pool samples (for future plotting segmented by intensity)
meanIntensitiesSP = numpy.log(numpy.nanmean(dataset.intensityData[SPmask, :], axis=0))
meanIntensitiesSP[numpy.mean(dataset.intensityData[SPmask, :], axis=0) == 0] = numpy.nan
meanIntensitiesSP = numpy.log(numpy.nanmean(dataset.intensityData[acquiredMasks['SPmask'], :], axis=0))
meanIntensitiesSP[numpy.mean(dataset.intensityData[acquiredMasks['SPmask'], :], axis=0) == 0] = numpy.nan
meanIntensitiesSP[numpy.isinf(meanIntensitiesSP)] = numpy.nan

# Figure 1: Feature intensity histogram for all samples and all features in dataset (by sample type).
Expand All @@ -1206,7 +1180,12 @@ def _batchCorrectionSummaryReport(dataset, correctedDataset, destinationPath=Non
print('Figure 1: Feature intensity histogram for all samples and all features in dataset (by sample type).')
print('Pre-correction.')

_plotAbundanceBySampleType(dataset.intensityData, SSmask, SPmask, ERmask, saveAs, dataset)
_plotAbundanceBySampleType(dataset.intensityData,
acquiredMasks['SSmask'],
acquiredMasks['SPmask'],
acquiredMasks['ERmask'],
saveAs,
dataset)

# Post-correction
if destinationPath:
Expand All @@ -1216,7 +1195,12 @@ def _batchCorrectionSummaryReport(dataset, correctedDataset, destinationPath=Non
else:
print('Post-correction.')

_plotAbundanceBySampleType(correctedDataset.intensityData, SSmask, SPmask, ERmask, saveAs, correctedDataset)
_plotAbundanceBySampleType(correctedDataset.intensityData,
acquiredMasks['SSmask'],
acquiredMasks['SPmask'],
acquiredMasks['ERmask'],
saveAs,
correctedDataset)

# Figure 2: TIC for all samples and features.
if ('Acquired Time' in dataset.sampleMetadata.columns) or ('Run Order' in dataset.sampleMetadata.columns):
Expand Down Expand Up @@ -1389,27 +1373,19 @@ def _featureCorrelationToDilutionReport(dataset, destinationPath=None):
if not hasattr(dataset.sampleMetadata, 'Correction Batch'):
raise ValueError("Correction Batch information missing, run addSampleInfo(descriptionFormat=\'Batches\')")


# Define sample masks
SSmask = (dataset.sampleMetadata['SampleType'].values == SampleType.StudySample) & \
(dataset.sampleMetadata['AssayRole'].values == AssayRole.Assay)
SPmask = (dataset.sampleMetadata['SampleType'].values == SampleType.StudyPool) & \
(dataset.sampleMetadata['AssayRole'].values == AssayRole.PrecisionReference)
ERmask = (dataset.sampleMetadata['SampleType'].values == SampleType.ExternalReference) & \
(dataset.sampleMetadata['AssayRole'].values == AssayRole.PrecisionReference)
LRmask = (dataset.sampleMetadata['SampleType'].values == SampleType.StudyPool) & \
(dataset.sampleMetadata['AssayRole'].values == AssayRole.LinearityReference)
acquiredMasks = generateTypeRoleMasks(dataset.sampleMetadata)

# Set up template item and save required info
item = dict()
item['Name'] = dataset.name
item['ReportType'] = 'feature summary'
item['Nfeatures'] = dataset.intensityData.shape[1]
item['Nsamples'] = dataset.intensityData.shape[0]
item['SScount'] = str(sum(SSmask))
item['SPcount'] = str(sum(SPmask))
item['ERcount'] = str(sum(ERmask))
item['LRcount'] = str(sum(LRmask))
item['SScount'] = str(sum(acquiredMasks['SSmask']))
item['SPcount'] = str(sum(acquiredMasks['SPmask']))
item['ERcount'] = str(sum(acquiredMasks['ERmask']))
item['LRcount'] = str(sum(acquiredMasks['SRDmask']))
item['corrMethod'] = dataset.Attributes['corrMethod']

##
Expand Down

0 comments on commit f525e26

Please sign in to comment.