In [382]:
%matplotlib inline
import numpy as np
import networkx as nx
import pandas as pd
import re
from matplotlib import pyplot as plt
from matplotlib import cm
from sklearn.preprocessing import MinMaxScaler
from bokeh.io import output_file, show
from bokeh.plotting import figure, from_networkx

In [4]:
!ls ../../data/external/kida.uva.2014/

gas_species.dat  kida.uva.2014.bib  kida.uva.2014.dat  README.rtf


In [63]:
!head ../../data/external/kida.uva.2014/kida.uva.2014.dat

!    Reactants                    Products                                                  A          B          C      xxxxxxxxxxxxxxxxxxxxx ITYPE Tmin   Tmax formula ID xxxxx
N2         CR                     N          N                                             5.000e+00  0.000e+00  0.000e+00 1.25e+00 0.00e+00 logn  1  -9999   9999  1     1 1  1
H          CR                     H+         e-                                            4.600e-01  0.000e+00  0.000e+00 2.00e+00 0.00e+00 logn  1  -9999   9999  1     2 1  1
He         CR                     He+        e-                                            5.000e-01  0.000e+00  0.000e+00 2.00e+00 0.00e+00 logn  1  -9999   9999  1     3 1  1
N          CR                     N+         e-                                            2.100e+00  0.000e+00  0.000e+00 2.00e+00 0.00e+00 logn  1  -9999   9999  1     4 1  1
O          CR                     O+         e-                                            2.800e+00  0.000e+00  0

In [548]:
def parse_kida_format(path: str):
    data = list()
    with open(path) as read_file:
        for line in read_file.readlines():
            if not line.startswith("!"):
                split_line = line.split()
                react1, react2 = split_line[:2]
                # this regex is complicated because sometimes people
                # provide inconsistent formatting on the exponent
                values = re.findall(r"\d.\d{3,4}[eE][\+\-]\d{1,2}", line)
                alpha, beta, gamma = [float(value) for value in values]
                # grab the second integer specification, which is the reaction type
                integers = re.findall(r"\s+\d{1,2}\s+", line)
                try:
                    react_index = int(integers[1])
                except IndexError:
                    print(integers)
                data.append([react1, react2, alpha, beta, gamma, react_index])
    return data


def kida_to_dataframe(data, ignore=["CRP", "CR", "Photon", "GRAIN0", "GRAIN-", "XH"], react_class=[3, 4, 5]) -> pd.DataFrame:
    df = pd.DataFrame(data, columns=["A", "B", "alpha", "beta", "gamma", "react_class"])
    # ignore reactions with reactants that are in the ignore list, and ensure that
    # we have a mapping for the reaction class
    filtered_df = df.loc[
        (~df["B"].str.contains(rf"\b(?:{'|'.join(ignore)})\b")) & (~df["A"].str.contains(rf"\b(?:{'|'.join(ignore)})\b")) & (df["react_class"].isin(react_class))
    ]
    filtered_df.reset_index(drop=True, inplace=True)
    return filtered_df

In [494]:
data = parse_kida_format("../../data/external/kida.uva.2014/kida.uva.2014.dat")

In [103]:
def arrhenius_rate(T: float, alpha: float, beta: float, gamma: float) -> float:
    return alpha * (T / 300.)**beta * np.exp(-gamma/T)

def ionpol1(T: float, alpha: float, beta: float, gamma: float) -> float:
    return alpha * beta * (0.62 + 0.4767 * gamma * (300. / T)**0.5)

def ionpol2(T: float, alpha: float, beta: float, gamma: float) -> float:
    return alpha * beta * (1. + 0.0967 * gamma * (300 / T)**0.5 + (gamma**2. / 10.526) * (300. / T))

In [210]:
def compute_rate(react_class: int, T: float, alpha: float, beta: float, gamma: float) -> float:
    if react_class == 3:
        func = arrhenius_rate
    elif react_class == 4:
        func = ionpol1
    elif react_class == 5:
        func = ionpol2
    else:
        raise NotImplementedError("Reaction class not recgonized.")
    return func(T, alpha, beta, gamma)

In [80]:
df = pd.DataFrame(data, columns=["A", "B", "alpha", "beta", "gamma", "react_class"])

In [258]:
ignore = ["CRP", "CR", "Photon", "GRAIN0", "GRAIN-", "XH"]

In [352]:
filtered_df = df.loc[(~df["B"].str.contains(rf"\b(?:{'|'.join(ignore)})\b")) & (~df["A"].str.contains(rf"\b(?:{'|'.join(ignore)})\b")) & (df["react_class"].isin([3, 4, 5]))]

In [353]:
filtered_df.reset_index(drop=True, inplace=True)

In [356]:
rates = list()
for index, row in filtered_df.iterrows():
    rates.append(np.log10(compute_rate(row["react_class"], 5., row["alpha"], row["beta"], row["gamma"])))

  This is separate from the ipykernel package so we can avoid doing imports until


In [357]:
filtered_df["Rate"] = rates

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.


In [358]:
scaler = MinMaxScaler()

In [359]:
filtered_df["Rate"].replace(-np.inf, 0., inplace=True)
filtered_df["Rate"].replace(np.inf, 0., inplace=True)
bottom_10 = np.percentile(filtered_df["Rate"], [5.])[0]
filtered_df.loc[filtered_df["Rate"] < bottom_10, "Rate"] = 0.

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._update_inplace(new_data)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[item] = s


In [360]:
filtered_df["NormRate"] = scaler.fit_transform(filtered_df["Rate"].values[:,None])

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.


In [506]:
filtered_df.loc[:,"react_colors"] = filtered_df["react_class"].map(
    {3: "#e41a1c", 4: "#377eb8", 5: "#4daf4a", 6: "#984ea3"}
)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[item] = s


In [507]:
def generate_kida_graph(dataframe):
    unique_molecules = list(dataframe["A"].unique()) + list(dataframe["B"].unique())
    unique_molecules = list(set(unique_molecules))
    # networkx Graph object manipulations
    graph = nx.Graph()
    graph.add_nodes_from(unique_molecules)
    # loop over each reaction, adding data about the reaction as edge attributes
    for index, row in dataframe.iterrows():
        graph.add_edge(row["A"], row["B"], weight=row["NormRate"], A=row["A"], B=row["B"], rate=row["Rate"], react_color=row["react_colors"])
    return graph

In [508]:
graph = generate_kida_graph(filtered_df)

In [509]:
weights = nx.get_edge_attributes(graph, 'weight')

In [510]:
# matplotlib version for static plots
# fig, ax = plt.subplots(figsize=(10,10))

# pos = nx.spring_layout(graph, scale=4., seed=42, center=(0., 0.), iterations=100)
# nx.draw_networkx_nodes(graph, pos, ax=ax,
#                        nodelist=graph.nodes(),
#                        node_size=10.,
#                        node_color='black',
#                        alpha=0.7)
# nx.draw_networkx_edges(graph,pos, ax=ax,
#                        width = 1.,
#                        edgelist = weights.keys(),
#                        edge_color=cm.viridis_r([float(value) for value in weights.values()]),
#                        alpha=0.2)
# nx.draw_networkx_labels(G, pos=pos,
#                         labels=dict(zip(nodelist,nodelist)),
#                         font_color='white')

In [517]:
from bokeh.io import output_file, show
from bokeh.models import Ellipse, Circle, GraphRenderer, StaticLayoutProvider, MultiLine, HoverTool, BoxZoomTool, ResetTool, EdgesAndLinkedNodes, BoxSelectTool, TapTool
from bokeh.palettes import Spectral4
from bokeh.plotting import figure

In [524]:
def bokehify_graph(nx_graph, title: str, path: str, show_plot: bool = True):
    plot = figure(title=title, x_range=(-1.1,1.1), y_range=(-1.1,1.1), plot_width=1000, plot_height=700)

    node_hover_tool = HoverTool(tooltips=[("A", "@A"), ("B", "@B"), ("Log10 rate", "@rate")])
    plot.add_tools(node_hover_tool, TapTool(), BoxZoomTool(), ResetTool(), BoxSelectTool())

    graph_renderer = from_networkx(nx_graph, nx.spring_layout, scale=2, center=(0,0), seed=42)

    graph_renderer.node_renderer.glyph = Circle(size=5, fill_color=Spectral4[0])
    graph_renderer.node_renderer.selection_glyph = Circle(size=15., fill_color=Spectral4[2])
    graph_renderer.edge_renderer.glyph = MultiLine(line_color="react_color", line_alpha=0.3, line_width=0.5)
    graph_renderer.edge_renderer.hover_glyph = MultiLine(line_color='#abdda4', line_width=3, line_alpha=0.6)
    graph_renderer.edge_renderer.selection_glyph = MultiLine(line_color=Spectral4[2], line_width=3, line_alpha=0.8)

    graph_renderer.inspection_policy = EdgesAndLinkedNodes()
    graph_renderer.selection_policy = NodesAndLinkedEdges()
    plot.renderers.append(graph_renderer)

    output_file(f"{path}.html")
    if show_plot:
        show(plot)
    return plot

In [525]:
_ = bokehify_graph(graph, "KIDA Ion and neutral reactions at 5 K", "kida_reactions", True)

In [526]:
cyanides = filtered_df.loc[(filtered_df["A"].str.contains("CN")) | (filtered_df["B"].str.contains("CN"))]

In [527]:
cyanide_graph = generate_kida_graph(cyanides)

In [528]:
_ = bokehify_graph(cyanide_graph, "KIDA cyanide-bearing reactions at 5 K", "kida_cyanides")

## GOTHAM BATMAN network

In [549]:
batman = parse_kida_format("../../data/external/kida.uva.2014/thebatman.dat")

In [554]:
batman_df = kida_to_dataframe(batman, ignore=[" "])

In [555]:
batman_df

Unnamed: 0,A,B,alpha,beta,gamma,react_class
0,CH2OH,CRP,3000.0,0.0,0.0,3
1,H+,HNCO,1.48e-08,0.5,0.0,3
2,He+,HNCO,7.66e-09,0.5,0.0,3
3,C+,CH2OH,7.5e-10,0.5,0.0,3
4,C+,CH2OH,7.5e-10,0.5,0.0,3
5,C+,CH2OH,7.5e-10,0.5,0.0,3
6,H3+,CH2OH,4e-09,0.5,0.0,3
7,HCO+,CH2OH,1.7e-09,0.5,0.0,3
8,He+,CH2OH,1.7e-09,0.5,0.0,3
9,He+,CH2OH,1.7e-09,0.5,0.0,3
