In [None]:
import yaml
import pandas as pd

In [None]:
def get_energy_cpu(row, cluster_info):
	"""Calculate CPU energy consumption in kWh."""
	constraints = row["Constraints"]
	if pd.isna(constraints):
		tdp_w = float(cluster_info["partitions"]["baskerville-all"]["TDP_CPU"])
	else:
		tdp_w = float(cluster_info["partitions"][f"baskerville-{constraints}"]["TDP_CPU"])
	energy_kwh = (row["Run Time (sec)"] / 3600) * row["Number of cores"] * (tdp_w / 1000) #kWh 
	return energy_kwh

def get_energy_gpu(row, cluster_info):
	"""Calculate GPU energy consumption in kWh."""
	constraints = row["Constraints"]
	if pd.isna(constraints):
		tdp_w = float(cluster_info["partitions"]["baskerville-all"]["TDP"])
	else:
		tdp_w = float(cluster_info["partitions"][f"baskerville-{constraints}"]["TDP"])
	energy_kwh = (row["Run Time (sec)"] / 3600) * row["Number of GPU"] * (tdp_w / 1000) #kWh
	return energy_kwh

def get_energy_mem(row, cluster_info):
	"""Calculate memory energy consumption in kWh."""
	power_memory_perGB=0.3725 # W/GB
	energy_kwh = (row["Run Time (sec)"] / 3600) * row["Memory (GB)"] * (power_memory_perGB / 1000) #kWh
	return energy_kwh

In [None]:
data = pd.read_csv("./data/Baskerville_total_commas.csv")
data["End Time"] = pd.to_datetime(data["End Time"])

cluster_info = yaml.safe_load(open("./data/cluster_info.yaml"))

In [None]:
data["E_cpu"] = data.apply(axis=1, func=get_energy_cpu, cluster_info=cluster_info) 
data["E_gpu"] = data.apply(axis=1, func=get_energy_gpu, cluster_info=cluster_info)
data["E_mem"] = data.apply(axis=1, func=get_energy_mem, cluster_info=cluster_info)
data["E_total_compute"] = data["E_cpu"] + data["E_gpu"] + data["E_mem"]

data["E_total"] = data["E_total_compute"] * cluster_info["PUE"] # PUE is ratio, still in kWh
data["CO2_total_g"] = data["E_total"] * cluster_info["CI"] # CI is 125 gCO₂/kWh, output is gCO₂

In [None]:
data["CO2_total_g"].sum() / 1e6 # in tonnes CO2

In [None]:
data["YYYY-MM"] = data["End Time"].apply(lambda x: x.strftime("%Y-%m"))
data["YYYY"] = data["End Time"].apply(lambda x: x.strftime("%Y"))

In [None]:
data.groupby("YYYY-MM")["CO2_total_g"].sum().plot(kind="bar", title="Baskerville Monthly Emissions", ylabel="gCO2e", xlabel="Month", figsize=(10,5))

In [None]:
data.groupby("YYYY")["CO2_total_g"].sum()

In [None]:
data.groupby("YYYY")["CO2_total_g"].sum().plot(kind="bar", title="Baskerville Yearly Emissions", ylabel="gCO2e", xlabel="Year", figsize=(10,5))