# Interactive examples of data presentation

In [None]:
import pandas as pd
import seaborn.objects as so
import matplotlib as plt
import numpy as np
import math
import statistics

## Data structure

In [None]:
# FIXME: set identifier prefix and simulator
prefix = "pow9pow21"
batches = 3

def gen_raw_dfs(backend: str) -> list[pd.DataFrame]:
    dfs = []
    for i in range(batches):
        dfs.append(pd.read_csv(f"data/{prefix}-{i+1}-{backend}.csv"))

    return dfs

gamdpy_dfs = gen_raw_dfs("gamdpy")
gamdpy_at_dfs = gen_raw_dfs("gamdpy-at")
lammps_dfs = gen_raw_dfs("lammps")
lammps_gpu_dfs = gen_raw_dfs("lammps-gpu")

In [None]:
# TPS
def gen_tps_dfs(dfs: list[pd.DataFrame]) -> list[pd.DataFrame]:
    n_tps_dfs = []
    for df in dfs:
        n_tps_dfs.append(pd.DataFrame({
            "n_atoms": df["n_atoms"],
            "tps": df["tps"],
        }).dropna().drop_duplicates("tps"))

    return n_tps_dfs

def get_avg_tps(dfs: list[pd.DataFrame]) -> list[float]:
    avg_tps = []
    for i in range(len(dfs[0])):
        sum = 0
        for df in dfs:
            sum += df.iloc[i]["tps"]
        avg = sum/3
        avg_tps.append(avg)

    return avg_tps

def get_sd_tps(dfs: list[pd.DataFrame]) -> list[float]:
    sd_tps = []
    for i in range(len(dfs[0])):
        std = []
        for df in dfs:
            std.append(df.iloc[i]["tps"])
        sd_tps.append(statistics.stdev(std))

    return sd_tps

gamdpy_tps_dfs = gen_tps_dfs(gamdpy_dfs)
gamdpy_at_tps_dfs = gen_tps_dfs(gamdpy_at_dfs)
lammps_tps_dfs = gen_tps_dfs(lammps_dfs)
lammps_gpu_tps_dfs = gen_tps_dfs(lammps_gpu_dfs)

gamdpy_avg_tps = get_avg_tps(gamdpy_tps_dfs)
gamdpy_at_avg_tps = get_avg_tps(gamdpy_at_tps_dfs)
lammps_avg_tps = get_avg_tps(lammps_tps_dfs)
lammps_gpu_avg_tps = get_avg_tps(lammps_gpu_tps_dfs)

gamdpy_sd_tps = get_sd_tps(gamdpy_tps_dfs)
gamdpy_at_sd_tps = get_sd_tps(gamdpy_at_tps_dfs)
lammps_sd_tps = get_sd_tps(lammps_tps_dfs)
lammps_gpu_sd_tps = get_sd_tps(lammps_gpu_tps_dfs)

In [None]:
# POWER
def get_avg_power(dfs: list[pd.DataFrame]) -> list[float]:
    avg_power = []
    for system_size in dfs[0].dropna().drop_duplicates("n_atoms")["n_atoms"]:
        sum = 0
        for df in dfs:
            sum += df.query(f"is_running == True and n_atoms == {system_size}")["total"].mean()
        avg = sum/3
        avg_power.append(avg)
    return avg_power

def get_sd_power(dfs: list[pd.DataFrame]) -> list[float]:
    sd_power = []
    for system_size in dfs[0].dropna().drop_duplicates("n_atoms")["n_atoms"]:
        sum = 0
        for df in dfs:
            sum += df.query(f"is_running == True and n_atoms == {system_size}")["total"].std()
        avg = sum/3
        sd_power.append(avg)

    return sd_power

gamdpy_avg_power = get_avg_power(gamdpy_dfs)
gamdpy_at_avg_power = get_avg_power(gamdpy_at_dfs)
lammps_avg_power = get_avg_power(lammps_dfs)
lammps_gpu_avg_power = get_avg_power(lammps_gpu_dfs)

gamdpy_sd_power = get_sd_power(gamdpy_dfs)
gamdpy_at_sd_power = get_sd_power(gamdpy_at_dfs)
lammps_sd_power = get_sd_power(lammps_dfs)
lammps_gpu_sd_power = get_sd_power(lammps_gpu_dfs)

In [None]:
# COMPLETE DATAFRAME
def gen_complete_df(
    n_tps_dfs: list[pd.DataFrame],
    avg_tps: list[float],
    sd_tps: list[float],
    avg_power: list[float],
    sd_power: list[float]
) -> pd.DataFrame:
    final_df = pd.DataFrame({
        "n_atoms": n_tps_dfs[0]["n_atoms"],
        "tps": avg_tps,
        "tps_sd": sd_tps,
        "power": avg_power,
        "power_sd": sd_power,
    })
    final_df["power_min"] = final_df["power"] - final_df["power_sd"]
    final_df["power_max"] = final_df["power"] + final_df["power_sd"]

    return final_df

gamdpy_complete_df = gen_complete_df(
    gamdpy_tps_dfs,
    gamdpy_avg_tps,
    gamdpy_sd_tps,
    gamdpy_avg_power,
    gamdpy_sd_power,
)
gamdpy_at_complete_df = gen_complete_df(
    gamdpy_at_tps_dfs,
    gamdpy_at_avg_tps,
    gamdpy_at_sd_tps,
    gamdpy_at_avg_power,
    gamdpy_at_sd_power,
)
lammps_complete_df = gen_complete_df(
    lammps_tps_dfs,
    lammps_avg_tps,
    lammps_sd_tps,
    lammps_avg_power,
    lammps_sd_power,
)
lammps_gpu_complete_df = gen_complete_df(
    lammps_gpu_tps_dfs,
    lammps_gpu_avg_tps,
    lammps_gpu_sd_tps,
    lammps_gpu_avg_power,
    lammps_gpu_sd_power,
)

In [None]:
def tps_power_plot(
    df: pd.DataFrame,
    title: str
) -> so.Plot:
    return (
        so.Plot(data=df,  x=df.tps, y=df.power)
        .add(so.Dot())
        .add(so.Range(), ymin="power_min", ymax="power_max")
        # .scale(x=so.Continuous().tick(at=np.arange(0, 90000, 10000)))
        .scale(x="log")
        .add(so.Line(), so.PolyFit(order=1))
        # .limit(x=(95000, 0))
        # .layout(extent=(10000, 10000, 10000, 10000))
        .label(
            x="Timesteps per second",
            y="Power draw (W)",
            title=title
        )

        # for x, y in zip(exponents, spike_stats['gpu']):
        # plt.text(x, y + 8, f"{y:.1f}", ha='center', va='bottom', fontsize=9, fontweight='bold', color='blue')
    )

gamdpy_tps_power_plot = tps_power_plot(
    gamdpy_complete_df,
    "gamdpy default"
)
gamdpy_at_tps_power_plot = tps_power_plot(
    gamdpy_at_complete_df,
    "gamdpy at"
)
lammps_tps_power_plot = tps_power_plot(
    lammps_complete_df,
    "lammps default"
)
def watts_tps(
    gamdpy_df: pd.DataFrame,
    gamdpy_at_df: pd.DataFrame,
    lammps_df: pd.DataFrame,
    lammps_gpu_df: pd.DataFrame
) -> so.Plot:
    return (
        so.Plot()
        .add(so.Dot(color="red"), data=gamdpy_df, x=gamdpy_df.n_atoms, y=(gamdpy_df.power/gamdpy_df.tps), label="gamdpy")
        #.add(so.Dot(color="green"), data=gamdpy_at_df, x=gamdpy_at_df.n_atoms, y=(gamdpy_at_df.power/gamdpy_at_df.tps), label="gamdpy w/\nautotuned parameters")
         #.add(so.Dot(color='blue'), data=lammps_df, x=lammps_df.n_atoms, y=(lammps_df.power/lammps_df.tps), label="lammps")
         #.add(so.Dot(color='purple'), data=lammps_gpu_df, x=lammps_gpu_df.n_atoms, y=(lammps_gpu_df.power/lammps_gpu_df.tps), label="lammps-gpu")
        # .scale(x=so.Continuous().tick(at=np.arange(0, 90000, 10000)))
        .scale(x="log")
        # .add(so.Line(), so.PolyFit(order=1))
        # .limit(x=(95000, 0))
        # .layout(extent=(10000, 10000, 10000, 10000))
        .label(
            x="System size (no. of atoms)",
            y="Energy per timestep (J/Timestep)",
            legend="Backend",
            title="gamdpy"
        )
    )

def watts_tps_normalized(
    gamdpy_df: pd.DataFrame,
    gamdpy_at_df: pd.DataFrame,
    lammps_df: pd.DataFrame,
    lammps_gpu_df: pd.DataFrame
) -> so.Plot:
    return (
        so.Plot()
        .add(so.Dot(color="red"), data=gamdpy_df, x=gamdpy_df.n_atoms, y=(gamdpy_df.power/gamdpy_df.tps)/gamdpy_df.n_atoms, label="gamdpy")
        #.add(so.Dot(color="green"), data=gamdpy_at_df, x=gamdpy_at_df.n_atoms, y=(gamdpy_at_df.power/gamdpy_at_df.tps)/gamdpy_at_df.n_atoms, label="gamdpy w/\nautotuned parameters")
         #.add(so.Dot(color='blue'), data=lammps_df, x=lammps_df.n_atoms, y=(lammps_df.power/lammps_df.tps)/lammps_df.n_atoms, label="lammps")
         #.add(so.Dot(color='purple'), data=lammps_gpu_df, x=lammps_gpu_df.n_atoms, y=(lammps_gpu_df.power/lammps_gpu_df.tps)/lammps_gpu_df.n_atoms, label="lammps-gpu")
        # .scale(x=so.Continuous().tick(at=np.arange(0, 90000, 10000)))
        .scale(x="log")
        # .add(so.Line(), so.PolyFit(order=1))
        # .limit(x=(95000, 0))
        # .layout(extent=(10000, 10000, 10000, 10000))
        .label(
            x="System size (no. of atoms)",
            y="Energy per timestep (J/Timestep/no. of molecules)",
            legend="Backend",
            title="gamdpy"
        )
    )

# watts_tps(gamdpy_complete_df, "gamdpy").show()
watts_tps(gamdpy_complete_df, gamdpy_at_complete_df, lammps_complete_df, lammps_gpu_complete_df).save("fig/stacked_gamdpy")
watts_tps_normalized(gamdpy_complete_df, gamdpy_at_complete_df, lammps_complete_df, lammps_gpu_complete_df).save("fig/stacked_normalized_gamdpy")



# gamdpy_tps_power_plot.show()
# gamdpy_at_tps_power_plot.show()
# lammps_tps_power_plot.show()

# gamdpy_tps_power_plot.save("fig/gamdpy_tps_power")
# gamdpy_at_tps_power_plot.save("fig/gamdpy_at_tps_power")
# lammps_tps_power_plot.save("fig/lammps_tps_power")

In [None]:
display("gamdpy default", gamdpy_complete_df)
display("gamdpy at", gamdpy_at_complete_df)
display("lammps default", lammps_complete_df)
display("lammps gpu", lammps_gpu_complete_df)

## Presentation & analysis

### Stacked graphs

Displays two graphs stacked on top of each other. Easily readable.

In [None]:
p = (
    so.Plot(data=df, x=df.time)
    .add(so.Area(edgewidth=0), y="total")
    .add(so.Line(linewidth=1), y="total", label="Total")
    .add(so.Area(edgewidth=0, color="green"), y="gpu")
    .add(so.Line(linewidth=1, color="green"), y="gpu", label="GPU")
    .label(
        x="Time (s)",
        y="Power draw (W)",
        title=f"{sim} {identifier}",
        legend="Hardware measured"
    )
)
#p.save(f"fig/{identifier}-{sim}-stacked")  # uncomment to save
p

### Paired graph

Displays two graphs side by side. Might have a purpose in the report later on.

In [None]:
p = (
    so.Plot(data=df, x=df.time)
    .pair(y=["gpu", "total"])
    .add(so.Area(edgewidth=0)).add(so.Line(linewidth=1))
    .label(
        x="Time (s)",
        y0="GPU power draw (W)",
        y1="Total power draw (W)",
        title=f"{sim} {identifier}"
    )
)
#p.save(f"fig/{identifier}-{sim}-paired")
p

### Band graph

Displays the interval between two y-values. Looks kinda goofy at this point.

In [None]:
p = (
    so.Plot(df, x=df.time, ymin="gpu", ymax="total")
    .add(so.Band(edgewidth=1))
    .label(
        x="Time (sec)",
        y="Power draw (W)",
        title=f"Power draw - {sim} {identifier}"
    )
)
#p.save(f"fig/{identifier}-{sim}-band")
p