Skip to content

Commit

Permalink
Fixes to the PLS-DA implementation to work with filtered class groups >2
Browse files Browse the repository at this point in the history
  • Loading branch information
Martin Fitzpatrick committed Oct 24, 2014
1 parent f26be50 commit b673ea1
Show file tree
Hide file tree
Showing 6 changed files with 102 additions and 28 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# Pathomx

*Latest stable release v3.0.1 (22nd October 2014).*
*Latest stable release v3.0.2 (24th October 2014).*

Pathomx is an interactive tool for the analysis and visualisation of scientific data.
Built on IPython it allows rapid, workflow-based exploration of complex datasets through
Expand Down
2 changes: 1 addition & 1 deletion VERSION
Original file line number Diff line number Diff line change
@@ -1 +1 @@
3.0.1
3.0.2
2 changes: 1 addition & 1 deletion pathomx/figures.py
Original file line number Diff line number Diff line change
Expand Up @@ -459,7 +459,7 @@ def scatterplot(data, figure=None, ax=None, styles=None, lines=[], label_index=N
df = data

s = ls.markersize ** 2 if ls.markersize != None else 20 #default
plots[c] = ax.scatter(df.iloc[:, 0], df.iloc[:, 1], color=ls.markerfacecolor, marker=ls.marker, s=s)
plots[c] = ax.scatter(df.iloc[:, 0].values, df.iloc[:, 1].values, color=ls.markerfacecolor, marker=ls.marker, s=s)


# Calculate 95% confidence interval for data but only if points >1
Expand Down
30 changes: 20 additions & 10 deletions pathomx/plugins/multivariate/pls_da.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,19 +12,29 @@
class_idx = input_data.index.names.index('Class')
classes = list( input_data.index.levels[ class_idx ] )

Y = input_data.index.labels[ class_idx ]
Y = []
sample_filter_idx = []
for n, cv in enumerate(input_data.index.values):
c = cv[class_idx]
if c == _experiment_control:
Y.append(0)
sample_filter_idx.append(n)
elif c == _experiment_test or _experiment_test == '*':
Y.append(1)
sample_filter_idx.append(n)

plsr.fit(input_data.values, Y)

plsr.fit(input_data.values[sample_filter_idx,:], Y)

# Build scores into a dso no_of_samples x no_of_principal_components
scores = pd.DataFrame(plsr.x_scores_)
scores.index = input_data.index
scores.index = pd.MultiIndex.from_tuples([v for n, v in enumerate(input_data.index.values) if n in sample_filter_idx], names=list(input_data.index.names))

scoresl =[]
for n,s in enumerate(plsr.x_scores_.T):
scoresl.append( 'Latent Variable %d' % (n+1) ) #, plsr.y_weights_[0][n])
scoresl.append( 'Latent Variable %d' % (n+1) ) #, plsr.y_weights_[0][n])
scores.columns = scoresl


weights = pd.DataFrame( plsr.x_weights_.T )
weights.columns = input_data.columns
Expand All @@ -39,9 +49,9 @@
for n in range(0, plsr.x_weights_.shape[1] ):
lvd = pd.DataFrame( plsr.x_weights_[:,n:n+1].T )
lvd.columns = input_data.columns

vars()['LV%d' % (n+1)] = spectra(lvd, styles=styles)

#weightsdl.append("Weights on LV %s" % (n+1))
weightsdc.append("LV %s" % (n+1))

Expand All @@ -56,7 +66,7 @@
label_index = None

for sc in score_combinations:
vars()['Scores %dv%d' % (sc[0]+1, sc[1]+1)] = scatterplot(scores.iloc[:,sc], styles=styles, label_index=label_index)
vars()['Scores %dv%d' % (sc[0]+1, sc[1]+1)] = scatterplot(scores.iloc[:,sc], styles=styles, label_index=label_index)

weightsd = None; # Clean up
lvd = None; # Clean up
weightsd = None; # Clean up
lvd = None; # Clean up
78 changes: 64 additions & 14 deletions pathomx/plugins/nmr/bruker_import.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,12 +130,18 @@ def autophase_PeakMinima(x, s):
else:
sample_id_regexp = None

if config['class_regexp']:
class_regexp = re.compile(config['class_regexp'])
else:
class_regexp = None

# We should have a folder name; so find all files named fid underneath it (together with path)
# Extract the path, and the parent folder name (for sample label)
nmr_data = []
nmr_dic = []
sample_labels = []
sample_classes = []

_ppm_real_scan_folder = False
fids = []
for r, d, files in os.walk(config['filename']): # filename contains a folder for Bruker data
Expand Down Expand Up @@ -173,31 +179,75 @@ def autophase_PeakMinima(x, s):
if config['sample_id_from'] == 'Scan number':
label = os.path.basename(fid)

elif config['sample_id_from'] == 'Experiment name':
label = dic['acqus']['EXP']

elif config['sample_id_from'] == 'Experiment (regexp)' and sample_id_regexp is not None:
m = sample_id_regexp.search(dic['acqus']['EXP'])
if m:
label = m.group(0) if m.lastindex is None else m.group(m.lastindex)
elif config['sample_id_from'] == 'Sequential':
label = str(n+1)

else: # Fallback
elif config['sample_id_from'] == 'Experiment (regexp)':
if sample_id_regexp is None:
label = dic['acqus']['EXP']

elif config['sample_id_from'] == 'Path (regexp)' and sample_id_regexp is not None:
m = sample_id_regexp.search(fid)
if m:
label = m.group(0) if m.lastindex is None else m.group(m.lastindex)
else:
m = sample_id_regexp.search(dic['acqus']['EXP'])
if m:
label = m.group(0) if m.lastindex is None else m.group(m.lastindex)

else: # Fallback
else: # Fallback
label = dic['acqus']['EXP']

elif config['sample_id_from'] == 'Path (regexp)':
if sample_id_regexp is None:
label = os.path.basename(fid)

else:
m = sample_id_regexp.search(fid)
if m:
label = m.group(0) if m.lastindex is None else m.group(m.lastindex)

else: # Fallback
label = fid

else:
label = os.path.basename(fid)


# Generate sample id for this spectra
# ['Scan number', 'Experiment name', 'Experiment (regexp)', 'Path (regexp)']
if config['class_from'] == 'None':
classn = ''

elif config['class_from'] == 'Experiment (regexp)':
if class_regexp is None:
classn = dic['acqus']['EXP']

else:
m = class_regexp.search(dic['acqus']['EXP'])
if m:
classn = m.group(0) if m.lastindex is None else m.group(m.lastindex)

else: # Fallback
classn = dic['acqus']['EXP']

elif config['class_from'] == 'Path (regexp)':
if class_regexp is None:
classn = os.path.basename(fid)

else:
m = class_regexp.search(fid)
if m:
classn = m.group(0) if m.lastindex is None else m.group(m.lastindex)

else: # Fallback
classn = fid

else:
classn = ''

#if 'AUTOPOS' in dic['acqus']:
# label = label + " %s" % dic['acqus']['AUTOPOS']

sample_labels.append(label)
sample_classes.append(classn)

nmr_data.append(data)
nmr_dic.append(dic)
_ppm_real_scan_folder = fid
Expand Down Expand Up @@ -233,7 +283,7 @@ def autophase_PeakMinima(x, s):

print("Processing spectra to Pandas DataFrame...")
output_data = pd.DataFrame(nmr_data)
output_data.index = pd.MultiIndex.from_tuples([(l, '') for l in sample_labels], names=['Sample', 'Class'])
output_data.index = pd.MultiIndex.from_tuples([(l, c) for l, c in zip(sample_labels, sample_classes)], names=['Sample', 'Class'])
output_data.columns = pd.MultiIndex.from_tuples([(s, ) for s in nmr_ppms], names=['Scale'])

# Export the dictionary parameters for all sets
Expand Down
16 changes: 15 additions & 1 deletion pathomx/plugins/nmr/loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ def __init__(self, parent, *args, **kwargs):
self.config.add_handler('path_filter_regexp', pathfreg_le)

cb_sampleidfrom = QComboBox()
cb_sampleidfrom.addItems(['Scan number', 'Experiment name', 'Experiment (regexp)', 'Path (regexp)'])
cb_sampleidfrom.addItems(['Scan number', 'Experiment (regexp)', 'Path (regexp)'])
grid.addWidget(QLabel('Sample ID from'), 2, 0)
grid.addWidget(cb_sampleidfrom, 2, 1)
self.config.add_handler('sample_id_from', cb_sampleidfrom)
Expand All @@ -76,6 +76,17 @@ def __init__(self, parent, *args, **kwargs):
grid.addWidget(sample_regexp_le, 3, 1)
self.config.add_handler('sample_id_regexp', sample_regexp_le)

cb_classfrom = QComboBox()
cb_classfrom.addItems(['None', 'Experiment (regexp)', 'Path (regexp)'])
grid.addWidget(QLabel('Class from'), 4, 0)
grid.addWidget(cb_classfrom, 4, 1)
self.config.add_handler('class_from', cb_classfrom)

class_regexp_le = QLineEdit()
grid.addWidget(QLabel('Class regexp'), 5, 0)
grid.addWidget(class_regexp_le, 5, 1)
self.config.add_handler('class_regexp', class_regexp_le)

gb.setLayout(grid)
self.layout.addWidget(gb)

Expand Down Expand Up @@ -138,6 +149,9 @@ def __init__(self, *args, **kwargs):
'path_filter_regexp': '',
'sample_id_from': 'Scan number', # Experiment name, Path regexp,
'sample_id_regexp': '',

'class_from': 'None', # Experiment name, Path regexp,
'class_regexp': '',
})

self.addConfigPanel(BrukerImportConfigPanel, 'Settings')
Expand Down

0 comments on commit b673ea1

Please sign in to comment.