In [1]:
import os
import pandas as pd
import numpy as np
import glob
import re

os.chdir("../..")
from utils.utils import *
os.chdir("..")

pd.options.mode.chained_assignment = None

In [2]:
# parameter values of pv systems considered

# commissioning date must lie in [t_start, t_end]
# t_start = 0 -> accumulate over all t until t_end
t_start = 0
t_end = 2021
# net nominal capacity must be smaller or equal to max_nom_cap
max_nom_cap = 10

In [3]:
def add_pv_m_over_t(
    t_start, t_end, pow, col_id_m, col_name_m, col_count_pv, col_power_accum_pv
):
    """
    Adds up annually installed power/net nominal capacity of PV systems in municipalities. Returns a dataframe containing number and accumulated net nominal capacity of pv systems on municipality-level with a commissioning date between year "t_start" and "t_end" (i.e. [t_start,t_end]) and a maximum net nominal capacity of "pow".
    @param t_start: beginning of time interval for considered commissioning dates
    @param t_end: end of time interval for considered commissioning dates
    @param pow: maximum net nominal capacity considered
    @param col_id_m: ags of municipalities
    @param col_name_m: names of municipalities
    @param col_count_pv: number of pv systems of a municipality
    @param col_power_accum_pv: accumulated net nominal capacity of all systems within a municipality
    @return: dataframe with number of pv system and accum. net nominal capacity for all municipalities. Systems considered: Comissioning date in [t_start, t_end], net nominal capacity smaller or equal to pow. Municipalities as rows.
    """
    if t_start == t_end:
        return pd.read_csv(
            f"data/intermediate_data/PV_in_municipalities/pv_m_{t_end}_max_{pow}.csv",
            sep=";",
            usecols=[col_id_m, col_name_m, col_count_pv, col_power_accum_pv],
        )
    else:
        csv_list = glob.glob(
            f"data/intermediate_data/PV_in_municipalities/pv_m_*_max_{pow}.csv"
        )
        list_years = np.array(
            [int(re.findall("[0-9]{4}", file_name)[0]) for file_name in csv_list]
        )
        df_pv_m_add = pd.DataFrame(
            columns=[col_id_m, col_name_m, col_count_pv, col_power_accum_pv]
        )
        for year in list_years[(list_years >= t_start) & (list_years <= t_end)]:
            df_temp = pd.read_csv(
                        f"data/intermediate_data/PV_in_municipalities/pv_m_{year}_max_{pow}.csv",
                        sep=";",
                        usecols=[
                            col_id_m,
                            col_name_m,
                            col_count_pv,
                            col_power_accum_pv,
                        ],
                    )
            if not df_temp.empty:
                df_pv_m_add = pd.concat(
                    [df_pv_m_add, df_temp],
                    ignore_index=True,
                )
        df_pv_m_accum = pd.DataFrame(
            df_pv_m_add.groupby(by=col_id_m, as_index=False).agg(
                {col_name_m: "unique", col_count_pv: "sum", col_power_accum_pv: "sum"}
            )
        )
        df_pv_m_accum[col_name_m] = df_pv_m_accum[col_name_m].apply(
            lambda elem: elem[0]
        )
        return df_pv_m_accum

In [4]:
def aggregate_pv_ma(
    df_pv_m,
    df_map_m_to_ma,
    col_id_m,
    col_id_ma,
    col_name_ma,
    col_count_pv,
    col_power_accum,
):
    """
    Transfer data on pv systems (count and accum. nominal capacity) from municipality-level to municipality association-level.
    @param df_pv_m: Dataframe with pv data on municipality-level
    @param df_map_m_to_ma: Dataframe giving mapping of m to ma
    @param col_id_m: ags of municipalities
    @param col_id_ma: rs of municipality associations
    @param col_name_ma: names of municipality associations
    @param col_count_pv: number of pv systems considered
    @param col_power_accum: accumulated net nominal capacity of pv systems
    @return: Dataframe with ma's as rows and data on pv systems as columns (count and accum. nominal capacity).
    """
    df_pv_m_merged = df_pv_m.merge(
        df_map_m_to_ma[[col_id_m, col_id_ma, col_name_ma]], on=col_id_m, how="left"
    )
    assert (
        df_pv_m_merged[col_id_ma].isna().sum() == 0
    ), "Some municipalities are not assigned to any municipal association."
    df_pv_ma = pd.DataFrame(
        df_pv_m_merged[[col_id_ma, col_name_ma, col_count_pv, col_power_accum]]
        .groupby(by=col_id_ma, as_index=False)
        .agg({col_name_ma: "unique", col_count_pv: "sum", col_power_accum: "sum"})
    )
    df_pv_ma[col_name_ma] = df_pv_ma[col_name_ma].apply(lambda elem: elem[0])
    return df_pv_ma

In [5]:
def get_pv_ma(
    t_start,
    t_end,
    pow_max,
    col_id_m=col_id_m,
    col_name_m=col_name_m,
    col_id_ma=col_id_ma,
    col_name_ma=col_name_ma,
    col_count_pv=col_count_pv,
    col_power_accum_pv=col_power_accum_pv,
):
    """
    On the input of a time interval for the commissioning date and a maximum net nominal capacity, the function returns a dataframe with the number and accum. net nominal capacity of pv systems installed within municipality associations. It uses the mapping of m to ma given by "mapping_m_to_ma_extended_2000_2019.csv" which includes all changes in ags between 2000 and 2019.
    @param t_start: Earliest year of commissioning dates for pv systems to be considered
    @param t_end: Latest year of commissioning dates for pv systems to be considered
    @param pow_max: Maximum net nominal capacity for pv systems to be considered
    @return: Dataframe with count and accum. net nominal capacity of pv systems on level of municipality associations.
    """
    df_accum_m = add_pv_m_over_t(
        t_start=t_start,
        t_end=t_end,
        pow=pow_max,
        col_id_m=col_id_m,
        col_name_m=col_name_m,
        col_count_pv=col_count_pv,
        col_power_accum_pv=col_power_accum_pv,
    )
    df_map_m_to_ma = pd.read_csv(
        "data/intermediate_data/mapping_municipalities_2000_2019.csv", sep=";"
    )
    df_accum_ma = aggregate_pv_ma(
        df_accum_m,
        df_map_m_to_ma,
        col_id_m=col_id_m,
        col_id_ma=col_id_ma,
        col_name_ma=col_name_ma,
        col_count_pv=col_count_pv,
        col_power_accum=col_power_accum_pv,
    )
    return df_accum_ma

In [6]:
df_pv_accum_ma = get_pv_ma(t_start=t_start, t_end=t_end, pow_max=max_nom_cap)
df_pv_accum_ma.to_csv(
    f"data/intermediate_data/pv_ma_{t_start}_{t_end}_max_{max_nom_cap}.csv",
    sep=";",
    index=False,
)

  df_pv_m_add = pd.concat(
