In [3]:
import pandas as pd
import networkx as nx
import matplotlib.pyplot as plt
import glob
import os

# Load Files Here: (extension name needed)
input_folder = 'test/'
output_folder = 'test/'
xlsx_files = glob.glob(os.path.join(input_folder, '*.xlsx'))

# Loop through each file in the folder:
for input_file_path in xlsx_files:
    
    df = pd.read_excel(input_file_path, sheet_name='CF2_series')
    df = df[~df['category'].isin(['E'])].reset_index(drop=True)
    peak_values = df['exper_mz']
    peak_values.drop_duplicates(inplace=True)
    peak_values.reset_index(drop=True, inplace=True)
    print(len(peak_values))
    
    tolerance = 2E-4 # This value corresponds to 0.2 ppm at m/z 1000
    G = nx.Graph()

    for peak in peak_values:
        G.add_node(peak)

    dummy_nodes = []
    for i in range(len(peak_values)):
        for j in range(i+1, len(peak_values)):
            delta_mz = abs(peak_values.iloc[i] - peak_values.iloc[j])
            if abs(delta_mz - 49.99681) <= tolerance:
                G.add_edge(peak_values.iloc[i], peak_values.iloc[j], weight=49.99681, color='red')
            if abs(delta_mz - 44.02621) <= tolerance:
                G.add_edge(peak_values.iloc[i], peak_values.iloc[j], weight=44.02621, color='green')
            if abs(delta_mz - 14.01565) <= tolerance:
                G.add_edge(peak_values.iloc[i], peak_values.iloc[j], weight=14.01565, color='silver')
            if abs(delta_mz - 99.99362) <= tolerance:
                dummy_node = round((peak_values.iloc[i] + peak_values.iloc[j]) / 2, 5)
                if not any(abs(dummy_node - peak) <= tolerance for peak in peak_values):
                    G.add_node(dummy_node, is_dummy=True)
                    dummy_nodes.append(dummy_node)
                    G.add_edge(peak_values.iloc[i], dummy_node, weight=49.99681, color='red', style='dotted')
                    G.add_edge(dummy_node, peak_values.iloc[j], weight=49.99681, color='red', style='dotted')
                
    isolated_nodes = [n for n in G.nodes() if G.degree(n) == 0]
    G.remove_nodes_from(isolated_nodes)
    
    # plt.figure(figsize=(10, 10))
    print(f'{input_file_path}:')
    print(G)
    print(f'including {len(dummy_nodes)} dummy nodes.')
    print(nx.number_connected_components(G), 'connected components')
    
    # Remove the first two most populated components from the graph
    # components = sorted(nx.connected_components(G), key=len, reverse=True)
    # G.remove_nodes_from(components[0])
    # G.remove_nodes_from(components[1])
    
    pos = nx.nx_agraph.graphviz_layout(G, prog='neato')

    for edge in G.edges():
        length = G.edges()[edge]['weight']
        color = G.edges()[edge]['color']
        style = G.edges()[edge].get('style', 'solid')
        nx.draw_networkx_edges(G, pos, edgelist=[edge], width=2, alpha=0.8, edge_color=color, style=style)     

    nx.draw_networkx_labels(G, pos, font_size=4, font_color='black', font_family='arial')  

    dummy_nodes = [n for n in G.nodes() if G.nodes[n].get('is_dummy')]
    real_nodes = [n for n in G.nodes() if not G.nodes[n].get('is_dummy')]
    node_values = {node: node for node in real_nodes}
    vmin, vmax = min(node_values.values()), max(node_values.values())
    colors = [plt.cm.Blues((node_values[v] - vmin) / (vmax - vmin)) for v in real_nodes]
    
    nx.draw_networkx_nodes(G, pos, nodelist=real_nodes, node_size=15, node_color=colors, alpha=1)  
    # nx.draw_networkx_nodes(G, pos, nodelist=dummy_nodes, node_size=15, node_color='gray', alpha=1, node_shape='x')  
    nx.draw_networkx_nodes(G, pos, nodelist=dummy_nodes, alpha=1, node_shape='')
    
    plt.axis('off')

    # Save the plot
    input_file_name = os.path.splitext(os.path.basename(input_file_path))[0]
    output_file_name = input_file_name.replace('_series', '') + '_network.png'
    output_file_path = os.path.join(output_folder, output_file_name)

    plt.savefig(output_file_path, dpi=600, bbox_inches='tight') 
    plt.close() 

60
test/AFFF_PetersonBucket_3sigma+_recal_rev_series.xlsx:
Graph with 69 nodes and 63 edges
including 9 dummy nodes.
11 connected components


If converted into a function:

In [4]:
"""
def plot_network(input_folder, output_folder, extension):
    xlsx_files = glob.glob(os.path.join(input_folder, f'*.{extension}'))
    # ...insert the code above...
    
    
Example:
input_folder = 'negative_series_0p2ppm/'
output_folder = 'negative_plots_CF2series_0p2ppm/'
extension = 'xlsx'
plot_network(input_folder, output_folder, extension)

"""

"\ndef plot_network(input_folder, output_folder, extension):\n    xlsx_files = glob.glob(os.path.join(input_folder, f'*.{extension}'))\n    # ...insert the code above...\n    \n    \nExample:\ninput_folder = 'negative_series_0p2ppm/'\noutput_folder = 'negative_plots_CF2series_0p2ppm/'\nextension = 'xlsx'\nplot_network(input_folder, output_folder, extension)\n\n"