Regular Graph Outputs

In [26]:
import pandas as pd
import os
from collections import defaultdict

def process_files(input_folder, stats_file, output_folder="results"):
    # Load graph statistics (graph_name is the index)
    stats_df = pd.read_csv(stats_file)

    # Store results separately by mu_rate
    results_by_mu = defaultdict(list)
    stats_df = stats_df.set_index(["graph_name", "graph_type"])
    graph_type = "regular_graphs"

    # Loop through all files in the folder
    for fname in os.listdir(input_folder):
        # Extract graph_name and mu_rate
        base = fname.replace(".txt", "")

        graph_name, mu_rate = base.rsplit("_", 1)

        file_path = os.path.join(input_folder, fname)

        with open(file_path, "r") as f:
            lines = f.readlines()
        avg_softsweep_prob = float(lines[0].split(":")[1])
        avg_num_clones  = float(lines[1].split(":")[1])

        try: 
            degree = stats_df.loc[(graph_name, graph_type), "degree_mean"]
            degree_variance = stats_df.loc[(graph_name, graph_type), "degree_var"]
            amplification = stats_df.loc[(graph_name, graph_type), "amp"]
            acc = stats_df.loc[(graph_name, graph_type), "acc"]
            con = stats_df.loc[(graph_name, graph_type), "connectivity"]
            ass = stats_df.loc[(graph_name, graph_type), "degree_assortativity"]
            if con == 0: 
                continue
            if amplification > 500: 
                continue
            trans =  stats_df.loc[(graph_name, graph_type), "transitivity"]
        except KeyError:
            print(graph_name)
            continue

        results_by_mu[mu_rate].append({
            "file_name": graph_name,
            "avg_softsweep_prob": avg_softsweep_prob,
            "avg_num_clones": avg_num_clones,
            "degree": degree,
            "degree_variance": degree_variance,
            "amplification": amplification,
            "acc": acc,
            "connectivity": con, 
            "transitivity": trans,
            "degree_assortativity": ass 
        })

    # Make sure output folder exists
    os.makedirs(output_folder, exist_ok=True)

    # Write one CSV per mu_rate
    for mu_rate, rows in results_by_mu.items():
        out_path = os.path.join(output_folder, f"mu{mu_rate}.csv")
        df = pd.DataFrame(rows)
        df.to_csv(out_path, index=False)


process_files("simulation_results/regular_graphs_output", "params.csv", "results/regular_graphs")


d61_6
d61_6
d61_6
d61_6
d61_6
d61_6
d61_6
d61_6
d61_6
d61_6


Regular Graphs 4 

In [27]:
import pandas as pd
import os
from collections import defaultdict

def process_files(input_folder, stats_file, output_folder="results"):
    # Load graph statistics (graph_name is the index)
    stats_df = pd.read_csv(stats_file)

    # Store results separately by mu_rate
    results_by_mu = defaultdict(list)
    stats_df = stats_df.set_index(["graph_name", "graph_type"])
    graph_type = "regular_graphs_4"

    # Loop through all files in the folder
    for fname in os.listdir(input_folder):

        # Extract graph_name and mu_rate
        base = fname.replace(".txt", "")

        graph_name, mu_rate = base.rsplit("_", 1)
        _, frac = graph_name.rsplit("_", 1)
        frac = frac.replace("f", "")

        file_path = os.path.join(input_folder, fname)

        # Read file and compute averages
        with open(file_path, "r") as f:
            lines = f.readlines()

        avg_softsweep_prob = float(lines[0].split(":")[1])
        avg_num_clones  = float(lines[1].split(":")[1])
        try: 
            degree = stats_df.loc[(graph_name, graph_type), "degree_mean"]
            degree_variance = stats_df.loc[(graph_name, graph_type), "degree_var"]
            amplification = stats_df.loc[(graph_name, graph_type), "amp"]
            acc = stats_df.loc[(graph_name, graph_type), "acc"]
            con = stats_df.loc[(graph_name, graph_type), "connectivity"]
            trans =  stats_df.loc[(graph_name, graph_type), "transitivity"]
            ass = stats_df.loc[(graph_name, graph_type), "degree_assortativity"]
        except KeyError:
            print(graph_name)
            continue
        
        results_by_mu[mu_rate].append({
            "file_name": graph_name,
            "avg_softsweep_prob": avg_softsweep_prob,
            "avg_num_clones": avg_num_clones,
            "degree": degree,
            "degree_variance": degree_variance,
            "amplification": amplification,
            "acc": acc,
            "connectivity": con, 
            "transitivity": trans,
            "frac_triangle": frac,
            "degree_assortativity": ass 
        })

    # Make sure output folder exists
    os.makedirs(output_folder, exist_ok=True)

    # Write one CSV per mu_rate
    for mu_rate, rows in results_by_mu.items():
        out_path = os.path.join(output_folder, f"mu{mu_rate}.csv")
        df = pd.DataFrame(rows)
        df.to_csv(out_path, index=False)


process_files("simulation_results/regular_graphs_4_output", "params.csv", "results/regular_graphs_4")

d4_f0.1
d4_f0.16
d4_f0.14
d4_f0.14
d4_f0.21
d4_f0.1
d4_f0.16
d4_f0.1
d4_f0.21
d4_f0.21
d4_f0.16
d4_f0.1
d4_f0.14
d4_f0.21
d4_f0.14
d4_f0.16
d4_f0.1
d4_f0.14
d4_f0.16
d4_f0.21
d4_f0.1
d4_f0.1
d4_f0.16
d4_f0.16
d4_f0.14
d4_f0.14
d4_f0.21
d4_f0.14
d4_f0.16
d4_f0.21
d4_f0.21
d4_f0.1
d4_f0.14
d4_f0.21
d4_f0.21
d4_f0.14
d4_f0.1
d4_f0.1
d4_f0.16
d4_f0.16


Regular Graphs 10

In [28]:
import pandas as pd
import os
from collections import defaultdict

def process_files(input_folder, stats_file, output_folder="results"):
    # Load graph statistics (graph_name is the index)
    stats_df = pd.read_csv(stats_file)
    
    stats_df = stats_df.set_index(["graph_name", "graph_type"])
    graph_type = "regular_graphs_10"
    # Store results separately by mu_rate
    results_by_mu = defaultdict(list)

    # Loop through all files in the folder
    for fname in os.listdir(input_folder):

        # Extract graph_name and mu_rate
        base = fname.replace(".txt", "")

        graph_name, mu_rate = base.rsplit("_", 1)
        _, frac = graph_name.rsplit("_", 1)
        frac = frac.replace("f", "")

        file_path = os.path.join(input_folder, fname)

        # Read file and compute averages
        with open(file_path, "r") as f:
            lines = f.readlines()

        avg_softsweep_prob = float(lines[0].split(":")[1])
        avg_num_clones  = float(lines[1].split(":")[1])

        try: 
            degree = stats_df.loc[(graph_name, graph_type), "degree_mean"]
            degree_variance = stats_df.loc[(graph_name, graph_type), "degree_var"]
            amplification = stats_df.loc[(graph_name, graph_type), "amp"]
            acc = stats_df.loc[(graph_name, graph_type), "acc"]
            con = stats_df.loc[(graph_name, graph_type), "connectivity"]
            trans =  stats_df.loc[(graph_name, graph_type), "transitivity"]
            ass = stats_df.loc[(graph_name, graph_type), "degree_assortativity"]
        except KeyError:
            print(graph_name)
            continue
        
        results_by_mu[mu_rate].append({
            "file_name": graph_name,
            "avg_softsweep_prob": avg_softsweep_prob,
            "avg_num_clones": avg_num_clones,
            "degree": degree,
            "degree_variance": degree_variance,
            "amplification": amplification,
            "acc": acc,
            "connectivity": con, 
            "transitivity": trans,
            "frac_triangle": frac,
            "degree_assortativity": ass 
        })

    # Make sure output folder exists
    os.makedirs(output_folder, exist_ok=True)

    # Write one CSV per mu_rate
    for mu_rate, rows in results_by_mu.items():
        out_path = os.path.join(output_folder, f"mu{mu_rate}.csv")
        df = pd.DataFrame(rows)
        df.to_csv(out_path, index=False)


process_files("simulation_results/regular_graphs_10_output", "params.csv", "results/regular_graphs_10")

Geometric Random 1 

In [29]:
import pandas as pd
import os
from collections import defaultdict

def process_files(input_folder, stats_file, output_folder="results"):
    # Load graph statistics (graph_name is the index)
    stats_df = pd.read_csv(stats_file)

    stats_df = stats_df.set_index(["graph_name", "graph_type"])
    graph_type = "random_geometric_1"

    # Store results separately by mu_rate
    results_by_mu = defaultdict(list)

    # Loop through all files in the folder
    for fname in os.listdir(input_folder):

        # Extract graph_name and mu_rate
        base = fname.replace(".txt", "")

        graph_name, mu_rate = base.rsplit("_", 1)
        _ , radius, _ = graph_name.rsplit("_", 2)

        file_path = os.path.join(input_folder, fname)

        # Read file and compute averages
        with open(file_path, "r") as f:
            lines = f.readlines()

        avg_softsweep_prob = float(lines[0].split(":")[1])
        avg_num_clones  = float(lines[1].split(":")[1])


        try: 
            degree = stats_df.loc[(graph_name, graph_type), "degree_mean"]
            degree_variance = stats_df.loc[(graph_name, graph_type), "degree_var"]
            amplification = stats_df.loc[(graph_name, graph_type), "amp"]
            acc = stats_df.loc[(graph_name, graph_type), "acc"]
            con = stats_df.loc[(graph_name, graph_type), "connectivity"]
            trans =  stats_df.loc[(graph_name, graph_type), "transitivity"]
            ass = stats_df.loc[(graph_name, graph_type), "degree_assortativity"]
        except KeyError:
            print(graph_name)
            continue
        
        results_by_mu[mu_rate].append({
            "file_name": graph_name,
            "avg_softsweep_prob": avg_softsweep_prob,
            "avg_num_clones": avg_num_clones,
            "degree": degree,
            "degree_variance": degree_variance,
            "amplification": amplification,
            "acc": acc,
            "connectivity": con, 
            "transitivity": trans,
            "radius": radius,
            "degree_assortativity": ass 
        })
    # Make sure output folder exists
    os.makedirs(output_folder, exist_ok=True)

    # Write one CSV per mu_rate
    for mu_rate, rows in results_by_mu.items():
        out_path = os.path.join(output_folder, f"mu{mu_rate}.csv")
        df = pd.DataFrame(rows)
        df.to_csv(out_path, index=False)


process_files("simulation_results/random_geometric_1_output", "params.csv", "results/random_geometric_1")

random_geometric_0.96_8
random_geometric_0.96_8
random_geometric_0.96_8
random_geometric_0.95_10
random_geometric_0.95_10
random_geometric_0.96_8
random_geometric_0.95_10
random_geometric_0.95_10
random_geometric_0.96_8
random_geometric_0.95_10
random_geometric_0.96_8
random_geometric_0.96_8
random_geometric_0.95_10
random_geometric_0.95_10
random_geometric_0.95_10
random_geometric_0.96_8
random_geometric_0.95_10
random_geometric_0.96_8
random_geometric_0.96_8
random_geometric_0.95_10


Geometric Random 2 by jump kernel

In [30]:
import pandas as pd
import os
from collections import defaultdict

def process_files(input_folder, stats_file, output_folder="results"):
    # Load graph statistics (graph_name is the index)
    stats_df = pd.read_csv(stats_file)
    stats_df = stats_df.set_index(["graph_name", "graph_type"])
    graph_type = "random_geometric_2"

    # Store results separately by mu_rate
    results_by_mu = defaultdict(list)

    # Loop through all files in the folder
    for fname in os.listdir(input_folder):

        # Extract graph_name and mu_rate
        base = fname.replace(".txt", "")

        graph_name, mu_rate = base.rsplit("_", 1)
        _ , radius, _ = graph_name.rsplit("_", 2)

        file_path = os.path.join(input_folder, fname)

        # Read file and compute averages
        with open(file_path, "r") as f:
            lines = f.readlines()

        try:
            avg_softsweep_prob = float(lines[0].split(":")[1])
            avg_num_clones = float(lines[1].split(":")[1])
        except IndexError:
            continue


        try: 
            degree = stats_df.loc[(graph_name, graph_type), "degree_mean"]
            degree_variance = stats_df.loc[(graph_name, graph_type), "degree_var"]
            amplification = stats_df.loc[(graph_name, graph_type), "amp"]
            acc = stats_df.loc[(graph_name, graph_type), "acc"]
            con = stats_df.loc[(graph_name, graph_type), "connectivity"]
            trans =  stats_df.loc[(graph_name, graph_type), "transitivity"]
            ass = stats_df.loc[(graph_name, graph_type), "degree_assortativity"]
        except KeyError:
            print(graph_name)
            continue

        results_by_mu[mu_rate].append({
            "file_name": graph_name,
            "avg_softsweep_prob": avg_softsweep_prob,
            "avg_num_clones": avg_num_clones,
            "degree": degree,
            "degree_variance": degree_variance,
            "amplification": amplification,
            "acc": acc,
            "connectivity": con, 
            "transitivity": trans,
            "jump_kernel": radius,
            "degree_assortativity": ass 
        })
    # Make sure output folder exists
    os.makedirs(output_folder, exist_ok=True)

    # Write one CSV per mu_rate
    for mu_rate, rows in results_by_mu.items():
        out_path = os.path.join(output_folder, f"mu{mu_rate}.csv")
        df = pd.DataFrame(rows)
        df.to_csv(out_path, index=False)


process_files("simulation_results/random_geometric_2_output", "params.csv", "results/random_geometric_2")

random_geometric_0.2_22
random_geometric_0.2_36
random_geometric_0.5_50
random_geometric_0.5_50
random_geometric_2.5_9
random_geometric_3.0_2
random_geometric_3.0_39
random_geometric_2.0_15
random_geometric_4.5_16
random_geometric_4.5_16
random_geometric_3.0_2
random_geometric_0.2_36
random_geometric_0.2_21
random_geometric_0.2_21
random_geometric_0.2_22
random_geometric_2.5_9
random_geometric_1.0_47
random_geometric_1.25_24
random_geometric_3.0_39
random_geometric_1.0_47
random_geometric_1.25_24
random_geometric_2.0_15
random_geometric_4.0_23
random_geometric_3.0_2
random_geometric_4.0_23
random_geometric_2.0_15
random_geometric_2.5_9
random_geometric_0.2_21
random_geometric_0.2_22
random_geometric_0.2_36
random_geometric_0.5_50
random_geometric_0.2_21
random_geometric_2.0_15
random_geometric_2.0_15
random_geometric_4.0_23
random_geometric_1.0_47
random_geometric_3.0_2
random_geometric_3.0_39
random_geometric_1.25_24
random_geometric_0.2_36
random_geometric_1.0_47
random_geometric_1.2

Grid

In [31]:
import pandas as pd
import os
from collections import defaultdict

def process_files(input_folder, stats_file, output_folder="results"):
    # Load graph statistics (graph_name is the index)
    stats_df = pd.read_csv(stats_file)
    stats_df = stats_df.set_index(["graph_name", "graph_type"])
    graph_type = "grids"

    # Store results separately by mu_rate
    results_by_mu = defaultdict(list)

    # Loop through all files in the folder
    for fname in os.listdir(input_folder):

        # Extract graph_name and mu_rate
        base = fname.replace(".txt", "")

        graph_name, mu_rate = base.rsplit("_", 1)
        _, i = graph_name.rsplit("_", 1)
        file_path = os.path.join(input_folder, fname)

        # Read file and compute averages
        with open(file_path, "r") as f:
            lines = f.readlines()

        avg_softsweep_prob = float(lines[0].split(":")[1])
        avg_num_clones  = float(lines[1].split(":")[1])
        
        try: 
            degree = stats_df.loc[(graph_name, graph_type), "degree_mean"]
            degree_variance = stats_df.loc[(graph_name, graph_type), "degree_var"]
            amplification = stats_df.loc[(graph_name, graph_type), "amp"]
            acc = stats_df.loc[(graph_name, graph_type), "acc"]
            con = stats_df.loc[(graph_name, graph_type), "connectivity"]
            trans =  stats_df.loc[(graph_name, graph_type), "transitivity"]
            ass = stats_df.loc[(graph_name, graph_type), "degree_assortativity"]
        except KeyError:
            print(graph_name)
            continue
        
        results_by_mu[mu_rate].append({
            "file_name": graph_name,
            "avg_softsweep_prob": avg_softsweep_prob,
            "avg_num_clones": avg_num_clones,
            "degree": degree,
            "degree_variance": degree_variance,
            "amplification": amplification,
            "acc": acc,
            "connectivity": con, 
            "transitivity": trans, 
            "height": i,
            "degree_assortativity": ass 
        })
    # Make sure output folder exists
    os.makedirs(output_folder, exist_ok=True)

    # Write one CSV per mu_rate
    for mu_rate, rows in results_by_mu.items():
        out_path = os.path.join(output_folder, f"mu{mu_rate}.csv")
        df = pd.DataFrame(rows)
        df.to_csv(out_path, index=False)


process_files("simulation_results/grids_output", "params.csv", "results/grids")

Lines 

In [32]:
import pandas as pd
import os
from collections import defaultdict

def process_files(input_folder, stats_file, output_folder="results"):
    # Load graph statistics (graph_name is the index)
    stats_df = pd.read_csv(stats_file)
    stats_df = stats_df.set_index(["graph_name", "graph_type"])
    graph_type = "lines_2"

    # Store results separately by mu_rate
    results_by_mu = defaultdict(list)

    # Loop through all files in the folder
    for fname in os.listdir(input_folder):

        # Extract graph_name and mu_rate
        base = fname.replace(".txt", "")

        graph_name, mu_rate = base.rsplit("_", 1)
        _, i = graph_name.rsplit("_", 1)
        file_path = os.path.join(input_folder, fname)

        # Read file and compute averages
        with open(file_path, "r") as f:
            lines = f.readlines()

        avg_softsweep_prob = float(lines[0].split(":")[1])
        avg_num_clones  = float(lines[1].split(":")[1])
        
        try: 
            degree = stats_df.loc[(graph_name, graph_type), "degree_mean"]
            degree_variance = stats_df.loc[(graph_name, graph_type), "degree_var"]
            amplification = stats_df.loc[(graph_name, graph_type), "amp"]
            acc = stats_df.loc[(graph_name, graph_type), "acc"]
            con = stats_df.loc[(graph_name, graph_type), "connectivity"]
            trans =  stats_df.loc[(graph_name, graph_type), "transitivity"]
            ass = stats_df.loc[(graph_name, graph_type), "degree_assortativity"]
        except KeyError:
            print(graph_name)
            continue

        results_by_mu[mu_rate].append({
            "file_name": graph_name,
            "avg_softsweep_prob": avg_softsweep_prob,
            "avg_num_clones": avg_num_clones,
            "degree": degree,
            "degree_variance": degree_variance,
            "amplification": amplification,
            "acc": acc,
            "connectivity": con, 
            "transitivity": trans, 
            "connections": i,
            "degree_assortativity": ass 
        })
    # Make sure output folder exists
    os.makedirs(output_folder, exist_ok=True)

    # Write one CSV per mu_rate
    for mu_rate, rows in results_by_mu.items():
        out_path = os.path.join(output_folder, f"mu{mu_rate}.csv")
        df = pd.DataFrame(rows)
        df.to_csv(out_path, index=False)


process_files("simulation_results/lines_2_output", "params.csv", "results/lines_2")

line_374
line_402
line_450
line_374
line_402
line_450
line_157
line_308
line_487
line_336
line_309
line_136
line_415
line_364
line_361
line_38
line_362
line_361
line_308
line_452
line_157
line_38
line_353
line_34
line_160
line_464
line_134
line_312
line_347
line_134
line_461
line_73
line_461
line_385
line_35
line_48
line_136
line_335
line_217
line_336
line_157
line_335
line_217
line_415
line_364
line_309
line_38
line_487
line_385
line_487
line_309
line_404
line_48
line_452
line_404
line_308
line_362
line_464
line_48
line_347
line_312
line_136
line_160
line_385
line_353
line_34
line_427
line_134
line_35
line_427
line_73
line_160
line_38
line_487
line_362
line_309
line_353
line_404
line_34
line_487
line_309
line_374
line_335
line_450
line_217
line_336
line_402
line_415
line_364
line_217
line_335
line_73
line_452
line_404
line_361
line_73
line_427
line_464
line_160
line_347
line_312
line_136
line_160
line_34
line_353
line_362
line_461
line_385
line_35
line_48
line_427
line_464
line_452
li

Fingers 

In [33]:
import pandas as pd
import os
from collections import defaultdict

def process_files(input_folder, stats_file, output_folder="results"):
    # Load graph statistics (graph_name is the index)
    stats_df = pd.read_csv(stats_file)
    stats_df = stats_df.set_index(["graph_name", "graph_type"])
    graph_type = "fingers"

    # Store results separately by mu_rate
    results_by_mu = defaultdict(list)

    # Loop through all files in the folder
    for fname in os.listdir(input_folder):

        # Extract graph_name and mu_rate
        base = fname.replace(".txt", "")

        graph_name, mu_rate = base.rsplit("_", 1)
        _, i = graph_name.rsplit("_", 1)
        file_path = os.path.join(input_folder, fname)

        # Read file and compute averages
        with open(file_path, "r") as f:
            lines = f.readlines()

        try:
            avg_softsweep_prob = float(lines[0].split(":")[1])
            avg_num_clones = float(lines[1].split(":")[1])
        except IndexError:
            continue
        
        try: 
            degree = stats_df.loc[(graph_name, graph_type), "degree_mean"]
            degree_variance = stats_df.loc[(graph_name, graph_type), "degree_var"]
            amplification = stats_df.loc[(graph_name, graph_type), "amp"]
            acc = stats_df.loc[(graph_name, graph_type), "acc"]
            con = stats_df.loc[(graph_name, graph_type), "connectivity"]
            trans =  stats_df.loc[(graph_name, graph_type), "transitivity"]
            ass = stats_df.loc[(graph_name, graph_type), "degree_assortativity"]
        except KeyError:
            print(graph_name)
            continue

        results_by_mu[mu_rate].append({
            "file_name": graph_name,
            "avg_softsweep_prob": avg_softsweep_prob,
            "avg_num_clones": avg_num_clones,
            "degree": degree,
            "degree_variance": degree_variance,
            "amplification": amplification,
            "acc": acc,
            "connectivity": con, 
            "transitivity": trans, 
            "connections": i,
            "degree_assortativity": ass 
        })
    # Make sure output folder exists
    os.makedirs(output_folder, exist_ok=True)

    # Write one CSV per mu_rate
    for mu_rate, rows in results_by_mu.items():
        out_path = os.path.join(output_folder, f"mu{mu_rate}.csv")
        df = pd.DataFrame(rows)
        df.to_csv(out_path, index=False)


process_files("simulation_results/fingers_output", "params.csv", "results/fingers")

Bottlenecks 2

In [34]:
import pandas as pd
import os
from collections import defaultdict

def process_files(input_folder, stats_file, output_folder="results"):
    # Load graph statistics (graph_name is the index)
    stats_df = pd.read_csv(stats_file)
    stats_df = stats_df.set_index(["graph_name", "graph_type"])
    graph_type = "bottlenecks_2"

    # Store results separately by mu_rate
    results_by_mu = defaultdict(list)

    # Loop through all files in the folder
    for fname in os.listdir(input_folder):

        # Extract graph_name and mu_rate
        base = fname.replace(".txt", "")

        graph_name, mu_rate = base.rsplit("_", 1)
        _, i = graph_name.rsplit("_", 1)


        file_path = os.path.join(input_folder, fname)

        # Read file and compute averages
        with open(file_path, "r") as f:
            lines = f.readlines()

        avg_softsweep_prob = float(lines[0].split(":")[1])
        avg_num_clones  = float(lines[1].split(":")[1])

        try: 
            degree = stats_df.loc[(graph_name, graph_type), "degree_mean"]
            degree_variance = stats_df.loc[(graph_name, graph_type), "degree_var"]
            amplification = stats_df.loc[(graph_name, graph_type), "amp"]
            acc = stats_df.loc[(graph_name, graph_type), "acc"]
            con = stats_df.loc[(graph_name, graph_type), "connectivity"]
            trans =  stats_df.loc[(graph_name, graph_type), "transitivity"]
            ass = stats_df.loc[(graph_name, graph_type), "degree_assortativity"]
        except KeyError:
            print(graph_name)
            continue

        results_by_mu[mu_rate].append({
            "file_name": graph_name,
            "avg_softsweep_prob": avg_softsweep_prob,
            "avg_num_clones": avg_num_clones,
            "degree": degree,
            "degree_variance": degree_variance,
            "amplification": amplification,
            "acc": acc,
            "connectivity": con, 
            "transitivity": trans, 
            "connections": i,
            "degree_assortativity": ass 
        })
    # Make sure output folder exists
    os.makedirs(output_folder, exist_ok=True)

    # Write one CSV per mu_rate
    for mu_rate, rows in results_by_mu.items():
        out_path = os.path.join(output_folder, f"mu{mu_rate}.csv")
        df = pd.DataFrame(rows)
        df.to_csv(out_path, index=False)

process_files("simulation_results/bottlenecks_2_output", "params.csv", "results/bottlenecks_2")

Bottlenecks 4

In [35]:
import pandas as pd
import os
from collections import defaultdict

def process_files(input_folder, stats_file, output_folder="results"):
    # Load graph statistics (graph_name is the index)
    stats_df = pd.read_csv(stats_file)
    stats_df = stats_df.set_index(["graph_name", "graph_type"])
    graph_type = "bottlenecks_4"

    # Store results separately by mu_rate
    results_by_mu = defaultdict(list)

    # Loop through all files in the folder
    for fname in os.listdir(input_folder):

        # Extract graph_name and mu_rate
        base = fname.replace(".txt", "")

        graph_name, mu_rate = base.rsplit("_", 1)
        _, i = graph_name.rsplit("_", 1)


        file_path = os.path.join(input_folder, fname)

        # Read file and compute averages
        with open(file_path, "r") as f:
            lines = f.readlines()

        avg_softsweep_prob = float(lines[0].split(":")[1])
        avg_num_clones  = float(lines[1].split(":")[1])

        try: 
            degree = stats_df.loc[(graph_name, graph_type), "degree_mean"]
            degree_variance = stats_df.loc[(graph_name, graph_type), "degree_var"]
            amplification = stats_df.loc[(graph_name, graph_type), "amp"]
            acc = stats_df.loc[(graph_name, graph_type), "acc"]
            con = stats_df.loc[(graph_name, graph_type), "connectivity"]
            trans =  stats_df.loc[(graph_name, graph_type), "transitivity"]
            ass = stats_df.loc[(graph_name, graph_type), "degree_assortativity"]
        except KeyError:
            print(graph_name)
            continue

        results_by_mu[mu_rate].append({
            "file_name": graph_name,
            "avg_softsweep_prob": avg_softsweep_prob,
            "avg_num_clones": avg_num_clones,
            "degree": degree,
            "degree_variance": degree_variance,
            "amplification": amplification,
            "acc": acc,
            "connectivity": con, 
            "transitivity": trans, 
            "connections": i,
            "degree_assortativity": ass 
        })
    # Make sure output folder exists
    os.makedirs(output_folder, exist_ok=True)

    # Write one CSV per mu_rate
    for mu_rate, rows in results_by_mu.items():
        out_path = os.path.join(output_folder, f"mu{mu_rate}.csv")
        df = pd.DataFrame(rows)
        df.to_csv(out_path, index=False)

process_files("simulation_results/bottlenecks_4_output", "params.csv", "results/bottlenecks_4")

Bottlenecks Regular

In [36]:
import pandas as pd
import os
from collections import defaultdict

def process_files(input_folder, stats_file, output_folder="results"):
    # Load graph statistics (graph_name is the index)
    stats_df = pd.read_csv(stats_file)
    stats_df = stats_df.set_index(["graph_name", "graph_type"])
    graph_type = "bottlenecks_regular"

    # Store results separately by mu_rate
    results_by_mu = defaultdict(list)

    # Loop through all files in the folder
    for fname in os.listdir(input_folder):

        # Extract graph_name and mu_rate
        base = fname.replace(".txt", "")

        graph_name, mu_rate = base.rsplit("_", 1)
        _, i = graph_name.rsplit("_", 1)


        file_path = os.path.join(input_folder, fname)

        # Read file and compute averages
        with open(file_path, "r") as f:
            lines = f.readlines()

        avg_softsweep_prob = float(lines[0].split(":")[1])
        avg_num_clones  = float(lines[1].split(":")[1])

        try: 
            degree = stats_df.loc[(graph_name, graph_type), "degree_mean"]
            degree_variance = stats_df.loc[(graph_name, graph_type), "degree_var"]
            amplification = stats_df.loc[(graph_name, graph_type), "amp"]
            acc = stats_df.loc[(graph_name, graph_type), "acc"]
            con = stats_df.loc[(graph_name, graph_type), "connectivity"]
            trans =  stats_df.loc[(graph_name, graph_type), "transitivity"]
            ass = stats_df.loc[(graph_name, graph_type), "degree_assortativity"]
        except KeyError:
            print(graph_name)
            continue

        results_by_mu[mu_rate].append({
            "file_name": graph_name,
            "avg_softsweep_prob": avg_softsweep_prob,
            "avg_num_clones": avg_num_clones,
            "degree": degree,
            "degree_variance": degree_variance,
            "amplification": amplification,
            "acc": acc,
            "connectivity": con, 
            "transitivity": trans, 
            "degree_assortativity": ass 
        })
    # Make sure output folder exists
    os.makedirs(output_folder, exist_ok=True)

    # Write one CSV per mu_rate
    for mu_rate, rows in results_by_mu.items():
        out_path = os.path.join(output_folder, f"mu{mu_rate}.csv")
        df = pd.DataFrame(rows)
        df.to_csv(out_path, index=False)

process_files("simulation_results/bottlenecks_regular_output", "params.csv", "results/bottlenecks_regular")

bottleneck_1
bottleneck_1
bottleneck_1
bottleneck_1
bottleneck_1


PA 

In [2]:
import pandas as pd
import os
from collections import defaultdict

def process_files(input_folder, stats_file, output_folder="results"):
    # Load graph statistics (graph_name is the index)
    stats_df = pd.read_csv(stats_file)
    stats_df = stats_df.set_index(["graph_name", "graph_type"])
    graph_type = "PA"

    # Store results separately by mu_rate
    results_by_mu = defaultdict(list)

    # Loop through all files in the folder
    for fname in os.listdir(input_folder):

        # Extract graph_name and mu_rate
        base = fname.replace(".txt", "")

        graph_name, mu_rate = base.rsplit("_", 1)
        _ , beta = graph_name.rsplit("_", 1)

        file_path = os.path.join(input_folder, fname)

        # Read file and compute averages
        with open(file_path, "r") as f:
            lines = f.readlines()

        avg_softsweep_prob = float(lines[0].split(":")[1])
        avg_num_clones  = float(lines[1].split(":")[1])


        try: 
            degree = stats_df.loc[(graph_name, graph_type), "degree_mean"]
            degree_variance = stats_df.loc[(graph_name, graph_type), "degree_var"]
            amplification = stats_df.loc[(graph_name, graph_type), "amp"]
            acc = stats_df.loc[(graph_name, graph_type), "acc"]
            con = stats_df.loc[(graph_name, graph_type), "connectivity"]
            trans =  stats_df.loc[(graph_name, graph_type), "transitivity"]
            ass = stats_df.loc[(graph_name, graph_type), "degree_assortativity"]
        except KeyError:
            print(graph_name)
            continue
        
        results_by_mu[mu_rate].append({
            "file_name": graph_name,
            "avg_softsweep_prob": avg_softsweep_prob,
            "avg_num_clones": avg_num_clones,
            "degree": degree,
            "degree_variance": degree_variance,
            "amplification": amplification,
            "acc": acc,
            "connectivity": con, 
            "transitivity": trans,
            "beta": beta,
            "degree_assortativity": ass 
        })
    # Make sure output folder exists
    os.makedirs(output_folder, exist_ok=True)

    # Write one CSV per mu_rate
    for mu_rate, rows in results_by_mu.items():
        out_path = os.path.join(output_folder, f"mu{mu_rate}.csv")
        df = pd.DataFrame(rows)
        df.to_csv(out_path, index=False)

process_files("simulation_results/PA_output", "params.csv", "results/PA")

PA Assortative

In [21]:
import pandas as pd
import os
from collections import defaultdict

def process_files(input_folder, stats_file, output_folder="results"):
    # Load graph statistics (graph_name is the index)
    stats_df = pd.read_csv(stats_file, dtype={"graph_name": str})
    stats_df["graph_name"] = stats_df["graph_name"].str.strip()

    stats_df = stats_df.set_index(["graph_name", "graph_type"])
    graph_type = "PA_assortative"

    # Store results separately by mu_rate
    results_by_mu = defaultdict(list)

    # Loop through all files in the folder
    for fname in os.listdir(input_folder):

        # Extract graph_name and mu_rate
        base = fname.replace(".txt", "")

        graph_name, mu_rate = base.rsplit("_", 1)
        #_ , beta = graph_name.rsplit("_", 1)

        file_path = os.path.join(input_folder, fname)

        # Read file and compute averages
        with open(file_path, "r") as f:
            lines = f.readlines()

        avg_softsweep_prob = float(lines[0].split(":")[1])
        avg_num_clones  = float(lines[1].split(":")[1])


        try: 
            degree = stats_df.loc[(graph_name, graph_type), "degree_mean"]
            degree_variance = stats_df.loc[(graph_name, graph_type), "degree_var"]
            amplification = stats_df.loc[(graph_name, graph_type), "amp"]
            acc = stats_df.loc[(graph_name, graph_type), "acc"]
            con = stats_df.loc[(graph_name, graph_type), "connectivity"]
            trans =  stats_df.loc[(graph_name, graph_type), "transitivity"]
            ass = stats_df.loc[(graph_name, graph_type), "degree_assortativity"]
        except KeyError:
            print(graph_name)
            continue
        
        results_by_mu[mu_rate].append({
            "file_name": graph_name,
            "avg_softsweep_prob": avg_softsweep_prob,
            "avg_num_clones": avg_num_clones,
            "degree": degree,
            "degree_variance": degree_variance,
            "amplification": amplification,
            "acc": acc,
            "connectivity": con, 
            "transitivity": trans,
            "degree_assortativity": ass 
        })
    # Make sure output folder exists
    os.makedirs(output_folder, exist_ok=True)

    # Write one CSV per mu_rate
    for mu_rate, rows in results_by_mu.items():
        out_path = os.path.join(output_folder, f"mu{mu_rate}.csv")
        df = pd.DataFrame(rows)
        df.to_csv(out_path, index=False)

process_files("simulation_results/PA_assortative_output", "params.csv", "results/PA_assortative")

PA Regular 2 Regular 10 

In [25]:
import pandas as pd
import os
from collections import defaultdict

def process_files(input_folder, stats_file, output_folder="results"):
    # Load graph statistics (graph_name is the index)
    stats_df = pd.read_csv(stats_file, dtype={"graph_name": str})
    stats_df["graph_name"] = stats_df["graph_name"].str.strip()

    stats_df = stats_df.set_index(["graph_name", "graph_type"]) 
    graph_type = "regular_4_regular_10"

    # Store results separately by mu_rate
    results_by_mu = defaultdict(list)
    mu_rate = 0
    # Loop through all files in the folder
    for fname in os.listdir(input_folder):

        # Extract graph_name and mu_rate
        base = fname.replace(".txt", "")
        #graph_name, mu_rate = base.rsplit("_", 1)
        graph_name = base
        #_ , beta = graph_name.rsplit("_", 1)

        #file_path = os.path.join(input_folder, fname)

        # Read file and compute averages
        #with open(file_path, "r") as f:
            #lines = f.readlines()
        """
        avg_softsweep_prob = float(lines[0].split(":")[1])
        avg_num_clones  = float(lines[1].split(":")[1])
        """
        avg_softsweep_prob = 0
        avg_num_clones  = 0


        degree = stats_df.loc[(graph_name, graph_type), "degree_mean"]
        degree_variance = stats_df.loc[(graph_name, graph_type), "degree_var"]
        amplification = stats_df.loc[(graph_name, graph_type), "amp"]
        acc = stats_df.loc[(graph_name, graph_type), "acc"]
        con = stats_df.loc[(graph_name, graph_type), "connectivity"]
        trans =  stats_df.loc[(graph_name, graph_type), "transitivity"]
        ass = stats_df.loc[(graph_name, graph_type), "degree_assortativity"]

        
        results_by_mu[mu_rate].append({
            "file_name": graph_name,
            "avg_softsweep_prob": avg_softsweep_prob,
            "avg_num_clones": avg_num_clones,
            "degree": degree,
            "degree_variance": degree_variance,
            "amplification": amplification,
            "acc": acc,
            "connectivity": con, 
            "transitivity": trans,
            "degree_assortativity": ass 
        })
    # Make sure output folder exists
    os.makedirs(output_folder, exist_ok=True)

    # Write one CSV per mu_rate
    for mu_rate, rows in results_by_mu.items():
        out_path = os.path.join(output_folder, f"mu{mu_rate}.csv")
        df = pd.DataFrame(rows)
        df.to_csv(out_path, index=False)

process_files("graphs/regular_4_regular_10", "assortative.csv", "results/regular_4_regular_10")