# Comparing 2 Morans Strategies

## Imports

In [None]:
import metab_iso_processing as processing
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

## Get Valid Metabs

In [None]:
metab_processor = processing.MIProcessing(absolute_data_path = "/Users/goldfei/Documents/generated-data", relative_data_path='brain-m0-no-log', morans_path = 'valid-metabs-brain-glucose.txt')

In [None]:
valid_metabolites = metab_processor.generate_valid_metabs()

## Get Valid Isos

In [None]:
iso_processor = processing.MIProcessing(absolute_data_path = "/Users/goldfei/Documents/generated-data", relative_data_path='brain-m0-no-log', morans_path = 'valid-isos-brain-glucose.txt')

In [None]:
valid_isos = iso_processor.generate_valid_metabs()

## How many good and bad Isos are we keeping per metabolite?

In [None]:
good_iso_count_kept = {}

for metabolite in valid_metabolites:
    count = sum(1 for iso in valid_isos if iso.startswith(metabolite + " m+"))
    good_iso_count_kept[metabolite] = count

In [None]:
good_iso_count_kept

In [None]:
all_isos = iso_processor.get_all_metabs()

In [None]:
all_iso_count_kept = {}

for metabolite in valid_metabolites:
    count = sum(1 for iso in all_isos[0] if iso.startswith(metabolite + " m+"))
    all_iso_count_kept[metabolite] = count

In [None]:
all_iso_count_kept

In [None]:
bad_iso_count_kept = {metabolite: all_iso_count_kept[metabolite] - good_iso_count_kept[metabolite] for metabolite in all_iso_count_kept}

In [None]:
bad_iso_count_kept

## Plotting

In [None]:
bar_width = 0.6
index = range(len(all_iso_count_kept))

fig, ax = plt.subplots(figsize=(20, 15))

bar1 = ax.barh(index, good_iso_count_kept.values(), bar_width, label='Good Isotopologues', color='#A6C48A')
bar2 = ax.barh(index, bad_iso_count_kept.values(), bar_width, left=list(good_iso_count_kept.values()), label='Bad Isotopologues', color='#DD9787')

for i, (good, bad) in enumerate(zip(good_iso_count_kept.values(), bad_iso_count_kept.values())):
    ax.text(good + bad + 0.5, i, f"{good} / {bad}", va='center')

ax.set_ylabel('Metabolite')
ax.set_xlabel('# of Isotopologues')
ax.set_title('Good vs Bad Isotopologues Kept per Metabolite')
ax.set_yticks(index)
ax.set_yticklabels(all_iso_count_kept.keys())
ax.legend()

plt.tight_layout()
plt.show()

## How many good and bad Isos are we throwing out per metabolite?

In [None]:
all_metabs = metab_processor.get_all_metabs()[0]

In [None]:
len(all_metabs)

In [None]:
invalid_metabs = []

for metab in all_metabs:
    if metab not in valid_metabolites:
        invalid_metabs.append(metab)


In [None]:
len(invalid_metabs)

In [None]:
all_isos = iso_processor.get_all_metabs()[0]

In [None]:
len(all_isos)

In [None]:
invalid_isos = []

for iso in all_isos:
    if iso not in valid_isos:
        invalid_isos.append(iso)

In [None]:
len(invalid_isos)

In [None]:
bad_iso_count_dropped = {}

for metabolite in invalid_metabs:
    count = sum(1 for iso in invalid_isos if iso.startswith(metabolite + " m+"))
    bad_iso_count_dropped[metabolite] = count

In [None]:
all_iso_count_dropped = {}

for metabolite in invalid_metabs:
    count = sum(1 for iso in all_isos if iso.startswith(metabolite + " m+"))
    all_iso_count_dropped[metabolite] = count

In [None]:
good_iso_count_dropped = {metabolite: all_iso_count_dropped[metabolite] - bad_iso_count_dropped[metabolite] for metabolite in all_iso_count_dropped}

In [None]:
good_iso_count_dropped

In [None]:
bar_width = 0.7
index = range(len(all_iso_count_dropped))

fig, ax = plt.subplots(figsize=(20, 40))

bar1 = ax.barh(index, good_iso_count_dropped.values(), bar_width, label='Good Isotopologues', color='#A6C48A')
bar2 = ax.barh(index, bad_iso_count_dropped.values(), bar_width, left=list(good_iso_count_dropped.values()), label='Bad Isotopologues', color='#DD9787')

for i, (good, bad) in enumerate(zip(good_iso_count_dropped.values(), bad_iso_count_dropped.values())):
    ax.text(good + bad + 0.5, i, f"{good} / {bad}", va='center')

ax.set_ylabel('Metabolite')
ax.set_xlabel('# of Isotopologues')
ax.set_title('Good vs Bad Isotopologues Dropped per Metabolite')
ax.set_yticks(index)
ax.set_yticklabels(all_iso_count_dropped.keys())
ax.legend()

plt.tight_layout()
plt.show()