In [16]:
import pandas as pd
import numpy as np
import os

import M2module
import M2dataproccess

from sklearn.exceptions import ConvergenceWarning
import warnings
with warnings.catch_warnings():
    warnings.filterwarnings("ignore", category=ConvergenceWarning)

In [20]:
# Create the folder if it doesn't exist
folder_name = 'twinsuk'
if not os.path.exists(folder_name):
    os.makedirs(folder_name)
else:
    os.rmdir(folder_name)
    os.makedirs(folder_name)
    
#create data folder for storing data file in twinsuk folder
data_folder = os.makedirs('twinsuk/data')

#create data folder for storing result file in twinsuk folder
results_folder = os.makedirs('twinsuk/results')

In [21]:
# Prompt user for the microbiome file name and path
file_name = input("Enter the file name and path for the microbiome file: ")
print(file_name)

biocrust/microbiome.csv


In [28]:

# Load and process microbiome data
mic, mic_header = M2dataproccess.parse_raw_data(file_name)

#Transposing the data
mic_t = M2dataproccess.transpose_csv(mic)
# Drop microbes present in less than 20% of the samples
microbiome = M2dataproccess.drop_rare_features(mic_t)

# Convert microbiome data to numeric and fill NaN with zeros
microbiome_data = microbiome.apply(pd.to_numeric, errors='coerce').fillna(0)
microbiome.to_csv('twinsuk/data/microbiome_data.csv', index=True)

# Apply CLR transformation and power transformation 
raw = M2dataproccess.make_compositional(microbiome_data, 'none', 'none')
raw.to_csv('twinsuk/data/raw.csv', index=True)
microbiome_clr = M2dataproccess.make_compositional(microbiome_data, 'clr', 'none')
microbiome_clr.to_csv('twinsuk/data/microbiome_clr.csv', index=True)
microbiome_pt = M2dataproccess.make_compositional(microbiome_data, 'none', 'power')
microbiome_pt.to_csv('twinsuk/data/microbiome_pt.csv', index=True)
microbiome_pt_clr = M2dataproccess.make_compositional(microbiome_data, 'clr', 'power')
microbiome_pt_clr.to_csv('twinsuk/data/microbiome_pt_clr.csv', index=True)

Number of microbes before dropping: 466
Number of microbes after dropping: 424
[[0.14218385 0.09773019 0.         ... 0.         0.00640977 0.        ]
 [0.04999016 0.00093486 0.01136587 ... 0.         0.00998819 0.        ]
 [0.06691807 0.         0.01193007 ... 0.00767912 0.         0.00671923]
 ...
 [0.09543282 0.17607058 0.         ... 0.         0.00133248 0.        ]
 [0.07272472 0.16518967 0.         ... 0.         0.00117222 0.00178178]
 [0.0922465  0.20711628 0.         ... 0.         0.00213831 0.0029081 ]]
[[0.14218385 0.09773019 0.         ... 0.         0.00640977 0.        ]
 [0.04999016 0.00093486 0.01136587 ... 0.         0.00998819 0.        ]
 [0.06691807 0.         0.01193007 ... 0.00767912 0.         0.00671923]
 ...
 [0.09543282 0.17607058 0.         ... 0.         0.00133248 0.        ]
 [0.07272472 0.16518967 0.         ... 0.         0.00117222 0.00178178]
 [0.0922465  0.20711628 0.         ... 0.         0.00213831 0.0029081 ]]
[[0.14218385 0.09773019 0.       

In [23]:
# Prompt user for the memetabolome file name and path
file_name_2 = input("Enter the file name and path for the microbiome file: ")
print(file_name_2)

biocrust/metabolome.csv


In [29]:
# Load metabolome
# See how the data looks like
met, met_header = M2dataproccess.parse_raw_data(file_name_2)
#transposing the metabolome data
met_t = M2dataproccess.transpose_csv(met)
met_t.to_csv('twinsuk/data/metabolome_data.csv', index=True)

# Use the same function to drop metabolites present in less than 20% of the samples
metabolome = M2dataproccess.drop_rare_features(met_t)

# Apply log transformation to the metabolomics data
metabolome_data_log = M2dataproccess.make_metabolomics(metabolome)
metabolome_data_log.to_csv('twinsuk/data/metabolome_data_log.csv', index=True)

Number of microbes before dropping: 85
Number of microbes after dropping: 85
After log2 transformation: (19, 85)
After kNN imputation: (19, 85)
After creating DataFrame from imputed data: (19, 85)


  result = func(self.values, **kwargs)


In [25]:
# Dictionary to hold the three microbiome versions
microbiome_versions = {
    'raw': microbiome_data,  # Ensure this is your raw microbiome data
    'clr': microbiome_clr,  # Ensure this is your CLR-transformed data
    'pt': microbiome_pt,     # Ensure this is your power-transformed data
    'pt_clr': microbiome_pt_clr    # Ensure this is your power-transformed on clr data
}

# Loop through each microbiome version and process it with metabolite data
for version_name, microbiome in microbiome_versions.items():
    
    # Initialize variables to store correct and incorrect counts
    correct_count_and = 0
    wrong_count_and = 0

    correct_count_or = 0
    wrong_count_or = 0
    
    print(f"Microbiome shape: {microbiome.shape}")
    print(f"Metabolome shape: {metabolome_data_log.shape}")
    
    #microbe, metabolite = M2dataproccess.align_microbiome_metabolite(microbiome, metabolome_data_log)
    microbe, metabolite = M2dataproccess.align_microbiome_metabolite(microbiome, metabolome_data_log)
    print(f"Microbiome shape: {microbe.shape}")
    print(f"Metabolome shape: {metabolite.shape}")

    # Perform the MB algorithm and regress microbiome on metabolome and vice versa
    with warnings.catch_warnings():
        warnings.filterwarnings("ignore", category=ConvergenceWarning) 
        # Perform neighborhood selection (assuming this function is predefined)
        microbiome_neighborhoods = M2module.mb_neighborhood_selection(microbe, metabolite)
        metabolites_neighborhoods = M2module.mb_neighborhood_selection(metabolite, microbe)

    # Renaming the rows and columns for clarity
    microbiome_neighborhoods.index = microbe.columns 
    microbiome_neighborhoods.columns = metabolite.columns

    metabolites_neighborhoods.index = metabolite.columns
    metabolites_neighborhoods.columns = microbe.columns

    # Get the AND and OR bidirectional relationships
    bidirectional_relationships_and = M2module.find_bipartite_neighborhood(microbiome_neighborhoods, metabolites_neighborhoods, "min")
    bidirectional_relationships_or = M2module.find_bipartite_neighborhood(microbiome_neighborhoods, metabolites_neighborhoods, "max")
    
    # Save the results to CSV files for each microbiome version
    and_csv_path = f'twinsuk/results/micromet2_{version_name}_min.csv'
    or_csv_path = f'twinsuk/results/micromet2_{version_name}_max.csv'
    
    # Save to CSV
    bidirectional_relationships_and.to_csv(and_csv_path)
    bidirectional_relationships_or.to_csv(or_csv_path)
    
    print(f"Saved AND results to {and_csv_path}")
    print(f"Saved OR results to {or_csv_path}")

Microbiome shape: (19, 424)
Metabolome shape: (19, 85)
Microbiome shape: (19, 424)
Metabolome shape: (19, 85)
Saved AND results to twinsuk/results/micromet2_raw_min.csv
Saved OR results to twinsuk/results/micromet2_raw_max.csv
Microbiome shape: (19, 424)
Metabolome shape: (19, 85)
Microbiome shape: (19, 424)
Metabolome shape: (19, 85)
Saved AND results to twinsuk/results/micromet2_clr_min.csv
Saved OR results to twinsuk/results/micromet2_clr_max.csv
Microbiome shape: (19, 424)
Metabolome shape: (19, 85)
Microbiome shape: (19, 424)
Metabolome shape: (19, 85)
Saved AND results to twinsuk/results/micromet2_pt_min.csv
Saved OR results to twinsuk/results/micromet2_pt_max.csv
Microbiome shape: (19, 424)
Metabolome shape: (19, 85)
Microbiome shape: (19, 424)
Metabolome shape: (19, 85)
Saved AND results to twinsuk/results/micromet2_pt_clr_min.csv
Saved OR results to twinsuk/results/micromet2_pt_clr_max.csv
