# MINES - Results

## MongoDB (Querys)

### Starting Compounds --> Predicted

To analyse the results, it is more practical to make tables of interest. This can be done in compase (GUI for Mongo) and adapted to here.

In [1]:
from pymongo import MongoClient
import polars as pl

# change the configsetting, to see the full tables
pl.Config.set_tbl_rows(100)
pl.Config(fmt_str_lengths=550)


client = MongoClient('mongodb://localhost:27017/test-mines')
result = client['lotus_mines_enzymatic']['reactions'].aggregate([
    {
        '$project': {
            '_id': 1, 
            'reactants': {
                '$map': {
                    'input': '$Reactants', 
                    'as': 'reactant', 
                    'in': {
                        '$arrayElemAt': [
                            '$$reactant', 1
                        ]
                    }
                }
            }, 
            'products': {
                '$map': {
                    'input': '$Products', 
                    'as': 'product', 
                    'in': {
                        '$arrayElemAt': [
                            '$$product', 1
                        ]
                    }
                }
            }
        }
    }, {
        '$lookup': {
            'from': 'compounds', 
            'localField': 'reactants', 
            'foreignField': '_id', 
            'as': 'reactant_details'
        }
    }, {
        '$lookup': {
            'from': 'compounds', 
            'localField': 'products', 
            'foreignField': '_id', 
            'as': 'product_details'
        }
    }, {
        '$addFields': {
            'reactants': {
                '$map': {
                    'input': '$reactant_details', 
                    'as': 'detail', 
                    'in': {
                        'id': '$$detail._id', 
                        'type': '$$detail.Type'
                    }
                }
            }, 
            'products': {
                '$map': {
                    'input': '$product_details', 
                    'as': 'detail', 
                    'in': {
                        'id': '$$detail._id', 
                        'type': '$$detail.Type'
                    }
                }
            }
        }
    }, {
        '$project': {
            'reactant_details': 0, 
            'product_details': 0
        }
    }, {
        '$addFields': {
            'starting_compounds': {
                '$filter': {
                    'input': '$reactants', 
                    'as': 'reactant', 
                    'cond': {
                        '$eq': [
                            '$$reactant.type', 'Starting Compound'
                        ]
                    }
                }
            }, 
            'predicted_compounds': {
                '$filter': {
                    'input': '$products', 
                    'as': 'product', 
                    'cond': {
                        '$eq': [
                            '$$product.type', 'Predicted'
                        ]
                    }
                }
            }
        }
    }, {
        '$project': {
            '_id': 1, 
            'starting_compounds': '$starting_compounds.id', 
            'predicted_compounds': '$predicted_compounds.id'
        }
    }
])

In [None]:
%%time

# Collect all results into a list
result_list = list(result)

# Transform the result into a Polars DataFrame
df = pl.DataFrame({
    'reaction_id': [doc['_id'] for doc in result_list],
    'starting_compounds': [doc.get('starting_compounds', []) for doc in result_list],
    'predicted_compounds': [doc.get('predicted_compounds', []) for doc in result_list],
})

# Print the DataFrame
print(df)

In [None]:
df_exploded = df.explode("predicted_compounds").explode("starting_compounds")

df_exploded

In [None]:
df_starting_compounds = df.with_columns(counts_starting_compounds = pl.col("starting_compounds").list.len())
df_starting_compounds_unique = df_starting_compounds.select(pl.col("counts_starting_compounds").unique())

df_predicted_compounds = df.with_columns(counts_predicted_compounds = pl.col("predicted_compounds").list.len())
df_predicted_compounds_unique = df_predicted_compounds.select(pl.col("counts_predicted_compounds").unique())

df_starting_compounds_unique.max(), df_predicted_compounds_unique.max()

## Analysis

In [None]:
import matplotlib.pyplot as plt
import polars as pl
import numpy as np

# change the configsetting, to see the full tables
pl.Config.set_tbl_rows(7)
pl.Config(fmt_str_lengths=550)

df = pl.read_parquet("../data/MINES/reactions_compounds_list.parquet")

In [None]:
# Filter out rows with empty lists
df_filtered = df.filter((pl.col("starting_compounds").list.len() > 0) & (pl.col("predicted_compounds").list.len() > 0))

# Ensure both columns have matching element counts
df_exploded = df_filtered.explode(['starting_compounds']).explode(['predicted_compounds'])

df_exploded

In [None]:
print( df_exploded.describe() )

print("reaction_id:", df_exploded[:, 'reaction_id'].unique().count())
print("starting compounds: ", df_exploded[:, 'starting_compounds'].unique().count())
print("predicted_compounds:", df_exploded[:, 'predicted_compounds'].unique().count())


In [None]:
# Create a directed graph
G = nx.DiGraph()

# Convert the DataFrame to a list of dictionaries
rows = df_exploded.to_dicts()

# Add edges to the graph
for row in rows:
    starting_compound = row['starting_compounds']
    predicted_compound = row['predicted_compounds']
    G.add_edge(starting_compound, predicted_compound)

# Draw the graph
plt.figure(figsize=(12, 8))
pos = nx.spring_layout(G)
nx.draw(G, pos, with_labels=True, node_size=3000, node_color="skyblue", font_size=10, font_color="black", font_weight="bold", edge_color="gray")
plt.title('Compound Prediction Network')
plt.show()