In [1]:
from pathlib import Path
import numpy as np
from scipy.io import loadmat
import pandas as pd
import pickle

In [2]:
path_folder_unprocessed = Path.cwd() / "data" / "input" / "unprocessed"
path_folder_processed = Path.cwd() / "data" / "input" / "processed"

In [3]:
raw = loadmat(path_folder_unprocessed / "KPG193_ver1_2" / "network" / "mat" / "KPG193_ver1_2.mat")["mpc"][0, 0]

In [4]:
num_units = 122
num_buses = 197
fx_rate = 1000
idx_unit_sorted_by_cost_lin = np.argsort(raw["gencost"][:, -2])

---
---
---

**GENERATION UNITS**

In [5]:
# [KPG193] 0: Nuclear, 1: Coal, 2: LNG
unit_type = np.array([
    1, 1, 1, 2, 1, 1, 1, 2, 2, 1, 1, 2, 2, 2, 2, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 
    2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 
    0, 0, 0, 2, 1, 1, 1, 1, 1, 1, 2, 2, 2, 1, 1, 1, 1, 1, 2, 2, 2, 1, 2, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 
    0, 2, 2, 2, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
], dtype=np.uint64)[idx_unit_sorted_by_cost_lin]
idx_nuclear = np.arange(num_units)[unit_type == 0]
idx_coal = np.arange(num_units)[unit_type == 1]
idx_lng = np.arange(num_units)[unit_type == 2]

In [6]:
cost_const, cost_lin = (raw["gencost"][:, [-1, -2]] * fx_rate)[idx_unit_sorted_by_cost_lin].transpose()
p_max, p_min = raw["gen"][:, [8, 9]][idx_unit_sorted_by_cost_lin].transpose()
(
    ramp_up,
    ramp_down,
    startup_ramp,
    shutdown_ramp,
    min_up,
    min_down,
) = raw["genthermal"][:, [5, 6, 7, 8, 1, 2]][idx_unit_sorted_by_cost_lin].transpose()
min_up = min_up.astype(np.int64)
min_down = min_down.astype(np.int64)
# [KPG193] error fix
ramp_up[idx_nuclear] = p_min[idx_nuclear]
ramp_down[idx_nuclear] = p_min[idx_nuclear]
startup_ramp[idx_nuclear] = p_min[idx_nuclear]
shutdown_ramp[idx_nuclear] = p_min[idx_nuclear]

In [7]:
for var_str in (
    "unit_type",
    "idx_nuclear",
    "idx_coal",
    "idx_lng",
    "cost_const",
    "cost_lin",
    "p_max",
    "p_min",
    "ramp_up",
    "ramp_down",
    "startup_ramp",
    "shutdown_ramp",
    "min_up",
    "min_down",
):
    np.save(path_folder_processed / f"{var_str}.npy", eval(var_str))

---
---
---

**STARTUP COST**

In [8]:
startup_costs_raw = (raw["genthermal"][:, -6:-3] * fx_rate)[idx_unit_sorted_by_cost_lin]
tier_len_raw = raw["genthermal"][:, -3:][idx_unit_sorted_by_cost_lin].astype(np.int64)

In [9]:
cost_startup_step = [startup_costs_raw[i].tolist() for i in range(num_units)]
step_length = [tier_len_raw[i].tolist() for i in range(num_units)]

np.save(path_folder_processed / "cost_startup_step.npy", cost_startup_step)
np.save(path_folder_processed / "step_length.npy", step_length)

In [10]:
cost_startup_step_old = []
for idx_unit, (cost_startup_pseudo_i, startup_delay_i) in enumerate(zip(
    startup_costs_raw,
    tier_len_raw
)):
    
    cost_startup_step_old.append(
            [float(cost_startup_pseudo_i[0])] * startup_delay_i[0]
            + [float(cost_startup_pseudo_i[1])] * (startup_delay_i[1] - startup_delay_i[0])
            + [float(cost_startup_pseudo_i[2])] * (startup_delay_i[2] - startup_delay_i[1])
        )
num_cooling_steps_old = np.array([len(css_i) for css_i in cost_startup_step_old])

with open(path_folder_processed / "cost_startup_step_old.pkl", "wb") as f:
    pickle.dump(cost_startup_step_old, f)
np.save(path_folder_processed / "num_cooling_steps_old.npy", num_cooling_steps_old)

---
---
---

**2022 HOURLY TIMESTAMP**

In [7]:
timestamp_2022 = np.arange(np.datetime64("2022-01-01T00"), np.datetime64("2023-01-01T00"))
np.save(path_folder_processed / "timestamp_2022.npy", timestamp_2022)

---
---
---

**2022 HOURLY DEMAND** (KPX == KPG)

In [6]:
def get_demand_2022():
    demand_2022 = np.empty((365, 24, num_buses))
    path_folder_per_bus_demand = path_folder_unprocessed / "KPG193_ver1_2" / "profile" / "demand"
    
    for day in range(365):
        demand_2022[day] = (
            pd.read_csv(path_folder_per_bus_demand / f"daily_demand_{day + 1}.csv")
            .drop(columns="demandQ")
            .sort_values(by=["hour", "bus_id"])
            [["demandP"]]
            .values
            .reshape((24, num_buses))
        )

    return demand_2022.reshape((8760, num_buses)).sum(axis=1)

demand_2022 = get_demand_2022()
np.save(path_folder_processed / "demand_2022.npy", demand_2022)

---
---
---

**2022 HOURLY STATUS** (KPG SPECIFIC TO KPG RENEWABLE) (NOT USED)

In [13]:
def get_status_2022():
    status_2022 = np.empty((365, 24, num_units), dtype=np.int64)
    path_folder_per_unit_status = path_folder_unprocessed / "KPG193_ver1_2" / "profile" / "commitment_decision"

    for day in range(365):
        status_2022[day] = (
            pd.read_csv(path_folder_per_unit_status / f"commitment_decision_{day + 1}.csv")
            .sort_values(by=["hour", "generator_id"])
            [["status"]]
            .values
            .reshape((24, num_units))
        )

    return status_2022.reshape((8760, num_units)).transpose()[idx_unit_sorted_by_cost_lin]

status_2022 = get_status_2022()
# np.save(path_folder_processed / "status_2022.npy", status_2022)

---
---
---

**2022 HOURLY MUST-OFFS** (KPG SPECIFIC TO KPG RENEWABLE) (NOT USED)

In [14]:
def get_must_off_2022():
    mustoff_2022 = pd.read_csv(path_folder_unprocessed / "KPG193_ver1_2" / "mustoff" / "nuclear_mustoff.csv").to_numpy()

    # [KPG193] mustoff 'off_end_day' exceeding 365
    mask_exceed_365 = mustoff_2022[:, 3] > 365
    mustoff_2022[:, 3][mask_exceed_365] = 365
    mustoff_2022[:, 4][mask_exceed_365] = 24

    mustoff_2022 -= 1 # 0-based indexing bus and time
    mustoff_2022[:, 1] = mustoff_2022[:, 1] * 24 + mustoff_2022[:, 2]
    mustoff_2022[:, 2] = mustoff_2022[:, 3] * 24 + mustoff_2022[:, 4]

    return mustoff_2022[:, :3] # unit; mustoff_start; mustoff_end

mustoff_2022 = get_must_off_2022()
# np.save(path_folder_processed / "mustoff_2022.npy", mustoff_2022)

---
---
---

**2022 HOURLY RENEWABLE** (KPG SPECIFIC) (NOT USED)

In [None]:
def get_renewable_ratio_2022():
    solar_ratio_2022, wind_ratio_2022, hydro_ratio_2022 = np.empty((365, 24, num_buses)), np.empty((365, 24, num_buses)), np.empty((365, 24, num_buses))
    path_folder_reg_profile = path_folder_unprocessed / "KPG193_ver1_2" / "profile" / "renewables"
    
    for day in range(365):

        df = (
            pd.read_csv(path_folder_reg_profile / f"renewables_{day + 1}.csv")
            # [KPG193] random CSV nans
            .fillna(0)
            # [KPG193] missing bus 151, duplicate 153 lines for 24 hours
            .assign(bus_id=lambda d: d.bus_id.where(d.bus_id != 152, 151))
            .pipe(lambda d: pd.concat(
                [
                    d[d.bus_id != 153],
                    (
                        d
                        .query("bus_id == 153")
                        .drop_duplicates("hour")
                        .pipe(lambda x: pd.concat([x.assign(bus_id=152),x.assign(bus_id=153)]))
                    )
                ],
                ignore_index=True
            ))
            .sort_values(["hour", "bus_id"])
        )

        solar_ratio_2022[day] = df.pv_profile_ratio.values.reshape(24, num_buses)
        wind_ratio_2022[day] = df.wind_profile_ratio.values.reshape(24, num_buses)
        hydro_ratio_2022[day] = df.hydro_profile_ratio.values.reshape(24, num_buses)

    return solar_ratio_2022.reshape((8760, num_buses)), wind_ratio_2022.reshape((8760, num_buses)), hydro_ratio_2022.reshape((8760, num_buses))


def get_renewable_capacity_2022():
    # [KPG193] 4 missing buses in solar and hydro
    solar_capacity_2022, wind_capacity_2022, hydro_capacity_2022 = np.zeros(num_buses), np.zeros(num_buses), np.zeros(num_buses)
    path_folder_reg_capacity = path_folder_unprocessed / "KPG193_ver1_2" / "renewables_capacity"
    reg_dict = {"solar": solar_capacity_2022, "wind": wind_capacity_2022, "hydro": hydro_capacity_2022}

    for reg_str, reg_var in reg_dict.items():
        raw = (
            pd.read_csv(path_folder_reg_capacity / f"{reg_str}_generators_2022.csv")
            # [KPG193] random CSV nans
            .fillna(0)
            [["bus_ID", "Pmax [MW]"]]
        )
        # [KPG193] 4 missing buses in solar and hydro
        reg_var[raw["bus_ID"].values - 1] = raw["Pmax [MW]"].values 
    
    return solar_capacity_2022, wind_capacity_2022, hydro_capacity_2022


solar_ratio_2022_kpg, wind_ratio_2022_kpg, hydro_ratio_2022_kpg = get_renewable_ratio_2022()
solar_cap_2022_kpg, wind_cap_2022_kpg, hydro_cap_2022_kpg = get_renewable_capacity_2022()

# # # all renewable sources 
# solar_gen_2022_kpg, wind_gen_2022_kpg, hydro_gen_2022_kpg = solar_ratio_2022_kpg * solar_cap_2022_kpg, wind_ratio_2022_kpg * wind_cap_2022_kpg, hydro_ratio_2022_kpg * hydro_cap_2022_kpg
# renewable_gen_2022_kpg = (solar_gen_2022_kpg + wind_gen_2022_kpg + hydro_gen_2022_kpg).sum(axis=1)
# np.save(path_folder_processed / "renewable_gen_2022_kpg.npy", renewable_gen_2022_kpg)

# renewable_cap_2022_kpg = solar_cap_2022_kpg.sum() + wind_cap_2022_kpg.sum() + hydro_cap_2022_kpg.sum() # each arr is per-bus cap
# renewable_cap_2022_kpg = np.full((8760), renewable_cap_2022_kpg) # its just same for the whole year # 32.588 GW
# np.save(path_folder_processed / "renewable_cap_2022_kpg.npy", renewable_cap_2022_kpg)

# renewable_ratio_2022_kpg = renewable_gen_2022_kpg / renewable_cap_2022_kpg
# np.save(path_folder_processed / "renewable_ratio_2022_kpg.npy", renewable_ratio_2022_kpg)

# only solar (maybe for comparison with KPX)
# solar_gen_2022_kpg = (solar_ratio_2022_kpg * solar_cap_2022_kpg).sum(axis=1)
# np.save(path_folder_processed / "solar_gen_2022_kpg.npy", solar_gen_2022_kpg)

# solar_cap_2022_kpg = np.full((8760), solar_cap_2022_kpg.sum())
# np.save(path_folder_processed / "solar_cap_2022_kpg.npy", solar_cap_2022_kpg)

# solar_ratio_2022_kpg = solar_gen_2022_kpg / solar_cap_2022_kpg
# np.save(path_folder_processed / "solar_ratio_2022_kpg.npy", solar_ratio_2022_kpg)

---
---
---

**2022 HOURLY RENEWABLE** (KPX SPECIFIC) (USED)

In [32]:
df = pd.read_csv(path_folder_unprocessed / "historical_renewable" / "한국전력거래소_신재생 발전량 및 설비용량_20221231.csv", encoding="cp949")

# df = (
#     df.groupby(["거래일자", "거래시간"], as_index=False)
#       [["설비용량(MW)", "전력거래량(MWh)"]]
#       .sum()
# )
# renewable_cap_2022 = df["설비용량(MW)"].values
# renewable_gen_2022 = df["전력거래량(MWh)"].values

# I'm only going to consider solar cus i can't do demand distribution + solar + wind + ... multiple distribution like this 
# just 2 distribution. i don't have time and i don't even think triple p distribution could be done in one semester too
df = df[df["원료원"] == "태양광"]
df = (
    df.groupby(["거래일자", "거래시간"], as_index=False)
      [["설비용량(MW)", "전력거래량(MWh)"]]
      .sum()
)
renewable_cap_2022 = df["설비용량(MW)"].values
renewable_gen_2022 = df["전력거래량(MWh)"].values
renewable_ratio_2022 = renewable_gen_2022 / renewable_cap_2022
np.save(path_folder_processed / "renewable_cap_2022.npy", renewable_cap_2022)
np.save(path_folder_processed / "renewable_gen_2022.npy", renewable_gen_2022)
np.save(path_folder_processed / "renewable_ratio_2022.npy", renewable_ratio_2022)
# discussion of modelling issue with the use of KPG renewable + status data
# I'm probably gonna have to briefly discuss the """Deviation""" from the reality due to this (or just how might >2-distr. result might look like)
# (although SMP itself is just linear cost not the """avg. fuel cost""" that KPX uses and reality gap ins't even the goal of this project)
# either way using KPG renewable + KPG status / KPX renewable + my own status, it was much farther away from reality bc reneable was too much in KPG
# with KPG renewable it does NOT make sense NOT TO include curtailment which is not the goal of this project
# last and most importantly, historical renewable should be used for question 3 analysis
# idk how other teams just went on with this or any of the stuffs i've done but anyways i think this is just something so basic and can't be ignored for accuracy of analysis

---
---
---

**2019 ~ 2022 HISTORICAL RENEWABLE AND DEMAND** (KPX) (USED FOR Q3) (LEAP DAY IN 2020 = 2020-09-28 REMOVED)

In [33]:
df_2022 = pd.read_csv(path_folder_unprocessed / "historical_renewable" / "한국전력거래소_신재생 발전량 및 설비용량_20221231.csv", encoding="cp949")
df_2022 = df_2022[df_2022["원료원"] == "태양광"]

df = pd.read_csv(path_folder_unprocessed / "historical_renewable" / "2019_2021년 신재생에너지 시간대별 연료원별 거래량 및 설비용량.csv", encoding="cp949")
df = df[df["연료원구분"] == "태양광"]
df = (
    df.groupby(["거래일자", "거래시간"], as_index=False)
      [[df.columns[-2], df.columns[-1]]]
      .sum()
)

# as an enginnering undergraudate i just removed the leap date in 2022
timestamp_temp = df["거래일자"].values.astype("datetime64") + (df["거래시간"].values.astype("timedelta64[h]") - np.timedelta64(1, "h"))

idx_leap_date_start = np.where(timestamp_temp == np.datetime64("2020-02-29T00"))[0][0]

timestamp_hist = np.concatenate((
    timestamp_temp[:idx_leap_date_start], 
    timestamp_temp[idx_leap_date_start + 23 + 1:],
    np.arange(
        np.datetime64("2022-01-01T00"),
        np.datetime64("2023-01-01T00"),
    )
))

renewable_cap_temp, renewable_gen_temp = df["설비용량(MW)"].values, df[df.columns[-1]].values
renewable_cap_hist = np.concatenate((
    renewable_cap_temp[:idx_leap_date_start],
    renewable_cap_temp[idx_leap_date_start + 23 + 1:],
    renewable_cap_2022
))
renewable_gen_hist = np.concatenate((
    renewable_gen_temp[:idx_leap_date_start],
    renewable_gen_temp[idx_leap_date_start + 23 + 1:],
    renewable_gen_2022,
))
renewable_ratio_hist = renewable_gen_hist / renewable_cap_hist

In [34]:
df = pd.read_csv(path_folder_unprocessed / "historical_demand" / "2013~2020 수요관리후 발전단 전력수요실적.csv", encoding="cp949")
demand_hist = df[(df["날짜"] >= "2019-01-01") & (df["날짜"] != "2020-02-29")].to_numpy()[:, 1:].reshape(-1,).astype(float)
df = pd.read_csv(path_folder_unprocessed / "historical_demand" / "2021년 1_12월 수요관리후 발전단 수요실적.csv", encoding="cp949")
demand_hist = np.concatenate((demand_hist, df.to_numpy()[:, 1:].reshape(-1).astype(float), demand_2022))

In [35]:
np.save(path_folder_processed / "timestamp_hist.npy", timestamp_hist)
np.save(path_folder_processed / "renewsable_cap_hist.npy", renewable_cap_hist)
np.save(path_folder_processed / "renewable_gen_hist.npy", renewable_gen_hist)
np.save(path_folder_processed / "renewable_ratio_hist.npy", renewable_ratio_hist)
np.save(path_folder_processed / "demand_hist.npy", demand_hist)