In [6]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

import os

In [19]:
input_dir = '../../../out_results/out_correlation/correlation_bio_env'
output_subdir = 'sorted_corr_list'
output_dir = os.path.join(input_dir, output_subdir)

# Ensure the output directory exists
os.makedirs(output_dir, exist_ok=True)

# Loop through each file in the directory
for filename in os.listdir(input_dir):
    if filename.endswith(".tsv") and 'heatmap' in filename:
        filepath = os.path.join(input_dir, filename)
        df = pd.read_csv(filepath, sep='\t', index_col=0)  # Read the TSV file into a DataFrame

        # Prepare to collect all the cells with their absolute values
        cells = []

        # Iterate through DataFrame to find all values and their coordinates
        # Use index and column names from the DataFrame
        for col_name in df.columns:
            for row_name in df.index:
                value = df.at[row_name, col_name]
                cells.append((abs(value), row_name, col_name))

        # Sort cells based on the absolute value in descending order
        cells_sorted = sorted(cells, reverse=True, key=lambda x: x[0])

        # Extract matrix_type and subsample from filename
        parts = filename.split('_')
        matrix_type = parts[4]
        subsample = parts[5].replace('.tsv', '')  # Removing the file extension

        # Define output file name based on the input file name
        output_filename = f'sorted_corr_list_{matrix_type}_{subsample}.txt'
        output_file = os.path.join(output_dir, output_filename)

        # Write the results to a file
        with open(output_file, 'w') as f:
            for value, row_name, col_name in cells_sorted:
                f.write(f'({row_name} (TF), {col_name} (Env)): {value}\n')

        print(f"Results written to {output_file}")

Results written to ../../../out_results/out_correlation/correlation_bio_env/sorted_corr_list/sorted_corr_list_M0_epi.txt
Results written to ../../../out_results/out_correlation/correlation_bio_env/sorted_corr_list/sorted_corr_list_M0_srf-nonpolar.txt
Results written to ../../../out_results/out_correlation/correlation_bio_env/sorted_corr_list/sorted_corr_list_M1_polar.txt
Results written to ../../../out_results/out_correlation/correlation_bio_env/sorted_corr_list/sorted_corr_list_MX_polar.txt
Results written to ../../../out_results/out_correlation/correlation_bio_env/sorted_corr_list/sorted_corr_list_salazar_epi.txt
Results written to ../../../out_results/out_correlation/correlation_bio_env/sorted_corr_list/sorted_corr_list_salazar_epi-nonpolar.txt
Results written to ../../../out_results/out_correlation/correlation_bio_env/sorted_corr_list/sorted_corr_list_salazar_polar.txt
Results written to ../../../out_results/out_correlation/correlation_bio_env/sorted_corr_list/sorted_corr_list_sala

In [20]:
pd.read_csv('../../../out_results/out_correlation/correlation_bio_env/corr_bio_env_heatmap_M0_all.tsv', sep='\t', index_col=0)

Unnamed: 0,Latitude,Longitude,Temperature,Oxygen,ChlorophyllA,Salinity,Carbon.total,NO2,NO2NO3,NO3,...,Residence.time,PAR.PC,Gradient.Surface.temp(SST),Fluorescence,Density,Depth.Min.O2,Depth.Max.O2,Mean Flux at 150m,FluxAttenuation,NPP 8d VGPM (mgC/m2/day)
AcnR,0.034122,-0.023914,-0.430914,-0.134447,0.241701,-0.352423,0.385227,0.221081,0.458572,0.502100,...,-0.072368,0.100429,0.070830,0.572354,0.225118,-0.011296,-0.207779,0.244975,-0.308712,0.322417
AgaR,-0.365365,-0.086127,0.771257,-0.347627,-0.433789,0.471448,-0.540209,-0.053155,-0.260748,-0.330689,...,0.115466,-0.445508,-0.254267,-0.698098,-0.453695,0.079570,0.181728,-0.091527,0.162605,-0.320469
AmtR,0.160961,-0.033398,-0.152801,-0.084618,-0.069572,-0.031987,0.179317,0.020637,-0.023093,0.185137,...,0.035374,-0.051476,0.189127,0.115241,0.157843,0.024401,0.125329,-0.088949,-0.091787,-0.010186
AraR,-0.255208,-0.127140,0.428420,-0.025130,-0.219997,0.349915,-0.371094,0.021562,-0.255190,-0.297795,...,0.133459,-0.207991,-0.114797,-0.510183,-0.202541,0.001628,0.119500,-0.120055,0.332696,-0.214700
ArgR,-0.178004,-0.060164,0.475629,0.105520,-0.201918,0.248321,-0.359486,-0.040225,-0.272238,-0.475514,...,0.087704,-0.089233,-0.104280,-0.579869,-0.352139,0.030287,0.168704,-0.238013,0.331788,-0.305335
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
TyrR,0.231666,0.230077,-0.139690,0.454185,0.259015,-0.106410,0.274984,-0.095395,-0.190345,-0.406173,...,-0.097773,0.111185,0.228037,-0.041489,0.002000,-0.232290,0.000723,-0.041233,0.234556,-0.049322
VanR,-0.107556,-0.052757,0.356993,-0.082712,-0.155339,0.172754,-0.198775,-0.095325,-0.294198,-0.231826,...,0.109175,-0.129405,-0.028092,-0.249239,-0.344146,-0.104110,0.059906,0.232023,0.079925,0.118157
XylR,-0.129225,-0.090897,0.392707,0.126393,-0.028622,0.187991,-0.211028,0.015480,-0.234286,-0.438261,...,0.083516,0.108551,-0.058458,-0.449970,-0.302347,0.020126,0.174404,-0.162883,0.217379,-0.139940
ZntR,0.201195,-0.030478,-0.449140,0.055706,0.072848,-0.311462,0.394331,0.043606,0.240309,0.309035,...,-0.044687,-0.131380,0.141895,0.467994,0.262436,0.081152,-0.122273,-0.024696,-0.032638,0.194736


In [18]:
filename = 'corr_bio_env_heatmap_M0_all.tsv'
parts = filename.split('_')
matrix_type = parts[4]
subsample = parts[5].replace('.tsv', '')  # Removing the file extension

print(matrix_type, subsample)

M0 all
