In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os
from scipy.stats import norm
import filtering
import tool

In [2]:
# For a given time slot, if charging is correctly identified, return 1, otherwise return 0.
def cnt(row, Rate_EST):
    if row["estimation"] == Rate_EST:
        return 1
    else:
        return 0

In [3]:
# Calculate the average temperature among the suspicious charging periods (time slots that are identified as charging periods by the algorithm).
def ave_tem(data):
    cnt = 1
    total = 0
    for tem, k in zip(data["temperature"], data["estimation"]):
        if k > 2:
            cnt += 1
            total += tem
    return total / cnt

In [4]:
customer_iid = [
    1642,
    4373,
    6139,
    7719,
    8156,
    2335,
    2361,
    2818,
    3039,
    3456,
    3538,
    4031,
    5746,
    7536,
    7800,
    7901,
    7951,
    8386,
    8565,
    9019,
    9160,
    9278,
    9922,
]

for i in range(23):
    epochs = 3
    customer_id = customer_iid[i]

    Rate_EST = 3.0
    print_condition = 1

    if i == 0:
        print("The first 5 users are EV users")
    if i == 5:
        print("The following 17 users are NonEV users")

    data = pd.read_csv(f"data/user_{customer_id}.csv")

    if customer_id == 6139:
        data = data.loc[~data["Month"].isin([9, 10, 11])]
        data.reset_index(drop=True, inplace=True)

    # Filter out AC load
    acfilter = filtering.ACfilter()
    helper = tool.DisaggregationTool()
    data = acfilter.acfiltering(customer_id, data)

    # Find out base load of customers in each month
    data_base = data[["Month", "consumption"]]
    base_load = data_base.groupby(["Month"], as_index=False).min()
    base_load.rename(columns={"consumption": "base_load"}, inplace=True)
    data = pd.merge(data, base_load, on=["Month"])

    # Initialize the non-charging periods
    dic = dict()
    for i in range(len(base_load)):
        dic[(base_load.iloc[i]["Month"])] = base_load.iloc[i]["base_load"]

    data["under"] = data.apply(
        lambda x: 1
        if (
            x["consumption"] < x["base_load"] + Rate_EST
            or x["forward"] == 0
            or x["backward"] == 0
        )
        else 0,
        axis=1,
    )

    # Initialize the regular load profile
    data_NEV = data[data["under"] == 1]
    data_group = data_NEV.groupby(by=["Month", "Hour", "Minute"], as_index=False).mean()

    data_group["regular_profile"] = data_group["consumption"]
    data_group = data_group[["Month", "Hour", "Minute", "regular_profile"]]
    data_total = pd.merge(data, data_group, on=["Month", "Hour", "Minute"])
    data_total = data_total.sort_values(by=["Month", "Day", "Hour", "Minute"])
    data_total["prev1"] = data_total["consumption"].shift(1)
    data_total["dif1"] = data_total["consumption"] - data_total["prev1"]
    data_total["prev2"] = data_total["consumption"].shift(2)
    data_total["dif2"] = data_total["consumption"] - data_total["prev2"]
    data_total.to_csv(f"data/user_{customer_id}_profile.csv", index=False)

    for epoch in range(epochs):

        # Read users' load profile from previous epoch
        data = pd.read_csv(f"data/user_{customer_id}_profile.csv")
        data = data.fillna(0)

        # Identify the potential start and end of charging periods
        data["higher"] = data.apply(
            lambda x: helper.charging_status(x, Rate_EST, 2 / 3), axis=1
        )
        data["start"] = data.apply(
            lambda x: helper.change_point_status_start(x, Rate_EST, 2 / 3), axis=1
        )
        data["end"] = data.apply(
            lambda x: helper.change_point_status_end(x, Rate_EST, 2 / 3), axis=1
        )
        start_candidate = []
        candidate = []

        for i in range(len(data)):
            if data.iloc[i]["start"] == 1 and data.iloc[i]["higher"] == 1:
                start_candidate.append(i)

        # Identify the (start,end) pair of a potential charging period
        visited = set()
        start_visited = set()
        for start in start_candidate:
            for end in range(start + 1, len(data)):
                if (
                    data.iloc[end]["end"] == -1 and data.iloc[end]["higher"] <= 0
                ) and start not in start_visited:
                    candidate.append([start, end])
                    start_visited.add(start)
                    for i in range(start, end + 1):
                        visited.add(i)
                    break
                elif end - start > 24:
                    break

        start_visited = set()
        for start in start_candidate:
            for end in range(start + 1, len(data)):
                if (
                    data.iloc[end]["end"] == -1
                    and data.iloc[end + 1]["end"] == -1
                    and data.iloc[end + 2]["end"] == -1
                    and start not in start_visited
                ):
                    candidate.append([start, end])
                    start_visited.add(start)
                    for i in range(start, end + 1):
                        visited.add(i)
                    break
                elif end - start > 24:
                    break

        # Delete detected consecutive periods (most likely they are not true charging periods)
        candidate = helper.consecutive_filter(candidate)

        if len(candidate) == 0:
            print(
                f"id:{customer_id}, No chanrging periods detected. customer {customer_id} does not have EV."
            )
            print_condition = 0
            break

        # If the (start, end) pair can pass the ChargingDetection test, then it will be detected as charging periods
        EVcharging = []
        for start, end in candidate:
            mean, std = helper.distribution(data, start, end, visited)
            if helper.ChargingDetection(data, start, end, mean, std, Rate_EST, dic):
                EVcharging.append([start, end - 1])

        Chargingrate = []
        cr1 = []
        for start, end in EVcharging:
            Chargingrate.append(max(data.iloc[start]["dif2"], data.iloc[start]["dif1"]))
            cr1.append(max(data.iloc[start]["dif2"], data.iloc[start]["dif1"]))

        # Estimate the charging rate
        lower = np.percentile(Chargingrate, 25)
        upper = np.percentile(Chargingrate, 75)
        Rate_EST = np.mean([i for i in Chargingrate if lower < i < upper])
        Rate_EST = max(Rate_EST, 3)

        dif = []
        visited = set()
        for start, end in EVcharging:
            for k in range(start, end + 1):
                if k not in visited:
                    dif.append(
                        data.iloc[k]["consumption"] - data.iloc[k]["regular_profile"]
                    )
                    visited.add(k)

        Charging_period = set()
        bound = set()
        for start, end in EVcharging:
            for k in range(start, end + 1):
                Charging_period.add(k)
            bound.add(start - 1)
            bound.add(end + 1)

        data["estimation"] = helper.estimation(
            Rate_EST, len(data), Charging_period, bound
        )
        avg_temperature = ave_tem(data)

        # Collect all periods that are detected as non-carging periods
        Non_EVPeriods = data[data["estimation"] < 1]
        Non_EVPeriods = Non_EVPeriods[
            [
                "dataid",
                "local_15min",
                "grid",
                "solar",
                "car",
                "Month",
                "Day",
                "Hour",
                "Minute",
                "temperature",
                "consumption",
            ]
        ]

        Non_EVPeriods.to_csv(f"data/user_{customer_id}_Updated.csv", index=False)

        ######################UPDATE#####################

        if epoch == epochs - 1:
            break

        # Update users' regular load profile
        data = pd.read_csv(f"data/user_{customer_id}_Updated.csv")
        user = pd.read_csv(f"data/user_{customer_id}.csv")

        if customer_id == 6139:
            user = user.loc[~user["Month"].isin([9, 10, 11])]
            user.reset_index(drop=True, inplace=True)

        base_load = data.groupby(["Month"], as_index=False).min()[
            ["Month", "consumption"]
        ]
        base_load.rename(columns={"consumption": "base_load"}, inplace=True)
        user = pd.merge(user, base_load, how="left", on=["Month"])
        data_group = data.groupby(by=["Month", "Hour", "Minute"], as_index=False).mean()
        data_group.rename(columns={"consumption": "regular_profile"}, inplace=True)

        data_group = data_group[["Month", "Hour", "Minute", "regular_profile"]]
        data_group = pd.merge(
            user, data_group, how="left", on=["Month", "Hour", "Minute"]
        )
        data_group = data_group.sort_values(by=["Month", "Day", "Hour", "Minute"])

        data_group["prev1"] = data_group["consumption"].shift(1)
        data_group["dif1"] = data_group["consumption"] - data_group["prev1"]
        data_group["prev2"] = data_group["consumption"].shift(2)
        data_group["dif2"] = data_group["consumption"] - data_group["prev2"]
        data_group = data_group.sort_values(by=["Month", "Day", "Hour", "Minute"])

        for_back = pd.read_csv(f"data/forward_{customer_id}.csv")
        for_back = for_back[["Month", "Day", "Hour", "Minute", "forward", "backward"]]
        data_group = pd.merge(
            data_group, for_back, on=["Month", "Day", "Hour", "Minute"]
        )

        data_group.to_csv(f"data/user_{customer_id}_profile.csv")

    if print_condition:
        data["cnt"] = data.apply(lambda x: cnt(x, Rate_EST), axis=1)

    if customer_id != 7719:
        print(
            f"customer_id:{customer_id}, mean: {np.round(np.mean(dif), 2)}, median: {np.round(np.median(dif), 2)}, Average Charging Hours/Week:{np.round(len(dif)/4/52, 2)}, Average Temperature:{np.round(avg_temperature,2)}"
        )
    else:
        print(
            f"customer_id:{customer_id}, mean: {np.round(np.mean(dif), 2)}, median: {np.round(np.median(dif), 2)}, Average Charging Hours/Week:{np.round(len(dif)/4/31, 2)}, Average Temperature:{np.round(avg_temperature,2)}"
        )

    os.remove(f"data/user_{customer_id}_profile.csv")
    os.remove(f"data/user_{customer_id}_Updated.csv")
    os.remove(f"data/forward_{customer_id}.csv")

The first 5 users are EV users
customer_id:1642, mean: 3.63, median: 3.47, Average Charging Hours/Week:7.87, Average Temperature:21.22
customer_id:4373, mean: 3.48, median: 3.3, Average Charging Hours/Week:11.91, Average Temperature:21.81
customer_id:6139, mean: 3.27, median: 3.1, Average Charging Hours/Week:5.87, Average Temperature:19.16
customer_id:7719, mean: 3.12, median: 3.02, Average Charging Hours/Week:2.7, Average Temperature:23.86
customer_id:8156, mean: 3.71, median: 3.49, Average Charging Hours/Week:7.41, Average Temperature:21.3
The following 17 users are NonEV users
customer_id:2335, mean: 3.49, median: 3.42, Average Charging Hours/Week:3.75, Average Temperature:25.57
customer_id:2361, mean: 3.13, median: 3.04, Average Charging Hours/Week:5.99, Average Temperature:30.35
customer_id:2818, mean: 3.55, median: 3.21, Average Charging Hours/Week:0.03, Average Temperature:18.1
customer_id:3039, mean: 3.33, median: 3.25, Average Charging Hours/Week:6.07, Average Temperature:27.1

  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)


id:3538, No chanrging periods detected. customer 3538 does not have EV.
customer_id:3538, mean: 2.28, median: 2.28, Average Charging Hours/Week:0.0, Average Temperature:0.0
customer_id:4031, mean: 3.82, median: 3.7, Average Charging Hours/Week:1.92, Average Temperature:20.81
customer_id:5746, mean: 2.08, median: 2.13, Average Charging Hours/Week:0.05, Average Temperature:25.68
customer_id:7536, mean: 3.33, median: 3.18, Average Charging Hours/Week:1.03, Average Temperature:30.01
customer_id:7800, mean: 2.41, median: 2.41, Average Charging Hours/Week:2.39, Average Temperature:28.8
customer_id:7901, mean: 3.78, median: 3.77, Average Charging Hours/Week:1.61, Average Temperature:22.0
customer_id:7951, mean: 4.26, median: 4.17, Average Charging Hours/Week:1.02, Average Temperature:18.14
customer_id:8386, mean: 2.8, median: 2.63, Average Charging Hours/Week:0.4, Average Temperature:28.06
customer_id:8565, mean: 2.89, median: 2.93, Average Charging Hours/Week:2.17, Average Temperature:19.72
