In [7]:
import numpy as np 
import gym
import random
import time
import math
import matplotlib.pyplot as plt
from matplotlib.ticker import MaxNLocator
import pickle
import pandas as pd
import json
import os
from gym import Env, spaces
from scipy.stats import lognorm
from gym_env import RoadCharging, ConstrainAction
# from show_trajectory import show_trajectory

def base_policy(env, state):
    alpha = state["RideTime"]
    beta = state["ChargingStatus"]
    theta = state["SoC"]

    action = np.zeros(env.n, dtype=int)
    for i in range(env.n):
        if state["RideTime"][i] >= 2:  # if on a ride, must not charge
            action[i] = 0
        elif state["SoC"][i] >= 1.0:  # if full capacity, not charge
            action[i] = 0
        elif state["SoC"][i] <= env.low_SoC:  # if low capacity has to charge
            action[i] = 1

    total_start = sum(1 for a, s in zip(action, state["ChargingStatus"]) if s == 0 and a == 1)
    total_continue = sum(1 for a, s in zip(action, state["ChargingStatus"]) if s == 1 and a == 1)
    total_charging = sum(action)

    if total_charging > env.m:  # limit charging requests to available charging capacity
        print('Exceed charger capacity!')
        requesting_agents = [i for i, (a, s) in enumerate(zip(action, state["ChargingStatus"])) if s == 0 and a == 1]

        available_capacity = env.m - total_continue

        if available_capacity <= 0:
            print('No charger available now.')
            # flip all
            to_flip = requesting_agents
            action[to_flip] = 0

        elif available_capacity > 0:
            if np.any(action == 1):
                # Scheme #1:
                # Randomly select from the set of agents requesting charging and set their charging actions to 0
                to_flip = random.sample(requesting_agents, total_start - available_capacity)
                print('Agents requesting charging:', requesting_agents)
                print('Flip agents:', to_flip)

                action[to_flip] = 0

    return action

def main():

    n_EVs = 5
    n_chargers = 1
    avg_return = 0
    SoC_data_type = "polarized"
    data_folder = "test_cases"
    results_folder = "results"
    policy_name = "base_policy"

    tt = os.listdir(data_folder)
    print(tt)

    instance_count = 20
    for instance_num in range(1, 1 + instance_count):
        test_case = f"all_days_negativePrices_{SoC_data_type}InitSoC_{n_chargers}for{n_EVs}"
        test_cases_dir = os.path.join(data_folder, test_case)  
        data_file = os.path.join(test_cases_dir, f"config{instance_num}_{n_EVs}EVs_{n_chargers}chargers.json")
        env = ConstrainAction(RoadCharging(data_file))
        env.seed(42)

        # Number of agents, states, and actions
        n_steps = env.k
        n_agents = env.n
        n_states = 3  # 3 possible states per agent
        n_actions = 2  # 2 action options per agent

        print(f"Number of agents {env.n}")
        print(f"Number of time steps {env.k}")

        # Training loop
        n_episodes = 1
        ep_return = []
        for episode in range(n_episodes):
            state = env.reset()
            done = False

            while not done:

                action = base_policy(env, state)

                # Perform joint actions in the environment
                next_state, rewards, done, _ = env.step(action)

                print(f"return up to now is {env.ep_return}")
                ep_return.append(env.ep_return)

                state = next_state

        solution = {
            "actions": env.trajectory['actions'].tolist(),
            "RideTime": env.trajectory['RideTime'].tolist(),
            "ChargingStatus": env.trajectory['ChargingStatus'].tolist(),
            "SoC": env.trajectory['SoC'].tolist(),
            "final_return": env.ep_return
        }
        save_dir = os.path.join(results_folder, test_case, policy_name)
        os.makedirs(save_dir, exist_ok=True)    
        with open(os.path.join(save_dir, f"instance{instance_num}_solutoin.json"), "w") as f:
            json.dump(solution, f, indent=4)  # Use indent for readability

        # show_trajectory(env.n, env.k, env.trajectory, save_dir)

        avg_return += env.ep_return

        # Close the env
        env.close()
    avg_return /= instance_count
    print(f"average return over {instance_count} instances:", avg_return)

if __name__ == "__main__":
    main()


['all_days_negativePrices_polarizedInitSoC_1for20', 'all_days_negativePrices_polarizedInitSoC_1for8', 'all_days_negativePrices_polarizedInitSoC_1for5', 'all_days_negativePrices_polarizedInitSoC_1for10']


ValueError: Mixing dicts with non-Series may lead to ambiguous ordering.

In [8]:
import pandas as pd

# 原始数据
ride_time_probs_data = {
    "bin_edges": [1.11, 1.368, 1.626, 1.885, 2.143, 2.401, 2.66, 2.918, 3.176, 3.435, 3.693, 3.952, 4.21, 4.468, 4.727, 4.985, 5.243, 5.502, 5.76, 6.018, 6.277],
    "probabilities": {
        "all_days": [0.0005, 0.0025, 0.0111, 0.0335, 0.0804, 0.1337, 0.1704, 0.1774, 0.1575, 0.1172, 0.069, 0.0314, 0.0111, 0.0035, 0.0007, 0.0001, 0.0, 0.0, 0.0, 0.0],
        "weekends": [0.0003, 0.0023, 0.0119, 0.0336, 0.0852, 0.1411, 0.1697, 0.1844, 0.1586, 0.1202, 0.0627, 0.0216, 0.007, 0.0009, 0.0004, 0.0, 0.0, 0.0001, 0.0, 0.0],
        "weekdays": [0.0006, 0.0023, 0.0107, 0.0335, 0.0781, 0.1302, 0.1707, 0.1741, 0.1569, 0.1158, 0.072, 0.0361, 0.0131, 0.0047, 0.0008, 0.0002, 0.0001, 0.0, 0.0, 0.0001],
        "holidays": [0.0, 0.0027, 0.0054, 0.0431, 0.1132, 0.1563, 0.1941, 0.1402, 0.0997, 0.1294, 0.0755, 0.0377, 0.0027, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
        "nonholidays": [0.0005, 0.0027, 0.0112, 0.0333, 0.0798, 0.1333, 0.17, 0.178, 0.1584, 0.117, 0.0689, 0.0313, 0.0113, 0.0035, 0.0007, 0.0001, 0.0, 0.0, 0.0, 0.0]
    }
}

# 解构数据
bin_edges = ride_time_probs_data["bin_edges"]
probabilities = ride_time_probs_data["probabilities"]

# 构建 DataFrame
df = pd.DataFrame(probabilities)
df["bin_edges"] = bin_edges

# 显示结果
print(df)


ValueError: Length of values (21) does not match length of index (20)