In [2]:
import matchms
import numpy as np
import json
from collections import namedtuple
from typing import List, Tuple
from compMetabolomics.map_to_size import transform_log2_fold_change_to_node_size
from compMetabolomics.spectrum import Spectrum, load_json_spectra, parse_json_spectrum, get_min_max, get_spectrum_ids
from compMetabolomics import tsne_embedding
from compMetabolomics.spectral_comparison import convert_similarity_to_distance

In [3]:
spectra = load_json_spectra("data/spectra.json")

In [4]:
# list of feature_id, list of fold changes, list of node_size mappings, get fold change data per feature
import pandas as pd
quant_table = pd.read_csv("data/quant_table.csv")
treat_table = pd.read_csv("data/treat_table.csv")
joined_data = pd.merge(quant_table, treat_table, on='sample_id', how="inner")
joined_data = joined_data.drop('sample_id', axis=1)
numeric_data_only = joined_data.drop('treatment', axis=1)
mean_condition_0 = numeric_data_only[joined_data['treatment'] == 'PleurotusOMSW0'].mean()
mean_condition_80 = numeric_data_only[joined_data['treatment'] == 'PleurotusOMSW80'].mean()
meansdf = pd.concat([mean_condition_0, mean_condition_80], axis=1)
meansdf.columns = ["omsw0", "omsw80"]
meansdf['ratio'] = meansdf['omsw80'] / meansdf['omsw0']
meansdf['log2ratio'] = np.log2(meansdf['ratio'])
meansdf = meansdf.reset_index().rename(columns={'index': 'feature_id'})
meansdf['node_size'] = [transform_log2_fold_change_to_node_size(val) for val in meansdf['log2ratio']]
meansdf = meansdf.reset_index().rename(columns={'omsw0': 'mean_omsw0', 'omsw80': 'mean_omsw80', 'ratio': 'ratio (omsw80 / omsw0)'})
meansdf


  result = getattr(ufunc, method)(*inputs, **kwargs)


Unnamed: 0,index,feature_id,mean_omsw0,mean_omsw80,ratio (omsw80 / omsw0),log2ratio,node_size
0,0,14957,0.000102,0.000138,1.363459,0.447271,11.3762
1,1,9690,0.000010,0.000013,1.361695,0.445404,11.3705
2,2,17689,0.000035,0.000000,0.000000,-inf,50.0000
3,3,14990,0.000231,0.000327,1.413709,0.499485,11.5369
4,4,8507,0.000001,0.000000,0.000000,-inf,50.0000
...,...,...,...,...,...,...,...
1970,1970,17255,0.000036,0.000101,2.801901,1.486406,14.5736
1971,1971,8934,0.000003,0.000013,4.533655,2.180675,16.7098
1972,1972,17235,0.000022,0.000008,0.349582,-1.516298,14.6655
1973,1973,15093,0.000022,0.000011,0.524921,-0.929827,12.8610


In [15]:
def align_with_spectral_feature_id(datadf : pd.DataFrame, spectra: List[Spectrum]):
  """Aligns the datadf dataframe which contains a featur_id column with the ordering of feature_id in a list of spectra."""
  out = pd.DataFrame({'feature_id' : [spec.feature_id for spec in spectra]})
  out = out.merge(datadf, how="inner")
  return out
aligned_meansdf = align_with_spectral_feature_id(meansdf, spectra)
aligned_meansdf

Unnamed: 0,feature_id,index,mean_omsw0,mean_omsw80,ratio (omsw80 / omsw0),log2ratio,node_size
0,32,195,0.000081,2.458133e-06,0.030389,-5.040310,25.5086
1,112,1916,0.000509,5.733487e-04,1.125561,0.170644,10.5251
2,129,1668,0.001074,4.931320e-05,0.045921,-4.444715,23.6760
3,132,442,0.000016,3.342157e-05,2.030210,1.021629,13.1435
4,138,45,0.011806,4.122425e-04,0.034917,-4.839913,24.8920
...,...,...,...,...,...,...,...
1970,18513,1346,0.000000,3.272545e-04,inf,inf,50.0000
1971,18527,1483,0.000000,9.891311e-07,inf,inf,50.0000
1972,18533,673,0.000000,1.133804e-06,inf,inf,50.0000
1973,18548,27,0.000000,3.183553e-05,inf,inf,50.0000


Dummy run with known output:

works as expected with numbers checking out

In [26]:
# list of feature_id, list of fold changes, list of node_size mappings, get fold change data per feature
import pandas as pd
quant_table = pd.DataFrame({"sample_id" : [1,2,3,4,5,6], "feature_1" : [1,1,1,4,5,6], "feature_2" : [0,0,0,4,5,6], "feature_3" : [1,1,1,100,1000,10000]})
treat_table = pd.DataFrame({"sample_id" : [1,2,3,4,5,6], "treatment" : ['PleurotusOMSW0','PleurotusOMSW0','PleurotusOMSW0','PleurotusOMSW80','PleurotusOMSW80','PleurotusOMSW80']})
joined_data = pd.merge(quant_table, treat_table, on='sample_id', how="inner")
joined_data = joined_data.drop('sample_id', axis=1)
joined_data

Unnamed: 0,feature_1,feature_2,feature_3,treatment
0,1,0,1,PleurotusOMSW0
1,1,0,1,PleurotusOMSW0
2,1,0,1,PleurotusOMSW0
3,4,4,100,PleurotusOMSW80
4,5,5,1000,PleurotusOMSW80
5,6,6,10000,PleurotusOMSW80


In [27]:

numeric_data_only = joined_data.drop('treatment', axis=1)
mean_condition_0 = numeric_data_only[joined_data['treatment'] == 'PleurotusOMSW0'].mean()
mean_condition_80 = numeric_data_only[joined_data['treatment'] == 'PleurotusOMSW80'].mean()
meansdf = pd.concat([mean_condition_0, mean_condition_80], axis=1)
meansdf.columns = ["omsw0", "omsw80"]
meansdf['ratio'] = meansdf['omsw80'] / meansdf['omsw0']
meansdf

Unnamed: 0,omsw0,omsw80,ratio
feature_1,1.0,5.0,5.0
feature_2,0.0,5.0,inf
feature_3,1.0,3700.0,3700.0


In [28]:
meansdf['log2ratio'] = np.log2(meansdf['ratio'])
meansdf

Unnamed: 0,omsw0,omsw80,ratio,log2ratio
feature_1,1.0,5.0,5.0,2.321928
feature_2,0.0,5.0,inf,inf
feature_3,1.0,3700.0,3700.0,11.85331


In [29]:
meansdf = meansdf.reset_index().rename(columns={'index': 'feature_id'})
meansdf['node_size'] = [transform_log2_fold_change_to_node_size(val) for val in meansdf['log2ratio']]
meansdf = meansdf.reset_index().rename(columns={'omsw0': 'mean_omsw0', 'omsw80': 'mean_omsw80', 'ratio': 'ratio (omsw80 / omsw0)'})
meansdf


Unnamed: 0,index,feature_id,mean_omsw0,mean_omsw80,ratio (omsw80 / omsw0),log2ratio,node_size
0,0,feature_1,1.0,5.0,5.0,2.321928,17.1444
1,1,feature_2,0.0,5.0,inf,inf,50.0
2,2,feature_3,1.0,3700.0,3700.0,11.85331,46.4717
