In [1]:
import sys
!{sys.executable} -m pip install tqdm
!{sys.executable} -m pip install torch torchvision
!{sys.executable} -m pip install matplotlib

Collecting tqdm
  Using cached tqdm-4.65.0-py3-none-any.whl (77 kB)
Installing collected packages: tqdm
Successfully installed tqdm-4.65.0
Collecting torch
  Using cached torch-2.0.0-cp310-cp310-manylinux1_x86_64.whl (619.9 MB)
Collecting torchvision
  Using cached torchvision-0.15.1-cp310-cp310-manylinux1_x86_64.whl (6.0 MB)
Collecting nvidia-cusolver-cu11==11.4.0.1
  Using cached nvidia_cusolver_cu11-11.4.0.1-2-py3-none-manylinux1_x86_64.whl (102.6 MB)
Collecting filelock
  Using cached filelock-3.12.0-py3-none-any.whl (10 kB)
Collecting nvidia-nccl-cu11==2.14.3
  Using cached nvidia_nccl_cu11-2.14.3-py3-none-manylinux1_x86_64.whl (177.1 MB)
Collecting nvidia-cusparse-cu11==11.7.4.91
  Using cached nvidia_cusparse_cu11-11.7.4.91-py3-none-manylinux1_x86_64.whl (173.2 MB)
Collecting triton==2.0.0
  Using cached triton-2.0.0-1-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl (63.3 MB)
Collecting nvidia-cuda-cupti-cu11==11.7.101
  Using cached nvidia_cuda_cupti_cu11-11.7.101-py

In [6]:
import pandas as pd
from tqdm import tqdm

import DQN
import DQN_env
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.patches as patches
import matplotlib.cm as cm
import torch
import os
import json
import matplotlib.ticker as ticker
from datetime import datetime

MEMORY_CAPACITY = 2000
PATH = 'saved_nets'
N_DAYS = 2
N_STEPS = 7 * 24 * 4
BATTERY_SIZE = 100
MAX_BATTERY_ENERGY = 5.8 * 1 / 4 * 2

seed = 42
df = pd.read_csv('working_data_peak.csv')

median_market_price = np.median(np.array(df['SMP']))

def format_fn(tick_val, tick_pos):
    if int(tick_val) in steps:
        return labels[int(tick_val)]
    else:
        return ''

def just_date(timestamp):
    dates = []
    for stamp in timestamp:
        stamp = datetime.strptime(stamp, '%Y-%m-%d %H:%M:%S')
        stamp = stamp.strftime('%-d.%-m.')
        dates.append(stamp)
    return dates

def run_episode(n_steps, env1, env2, dqn, epsilon, a, b, timestamp):
    seed = np.random.randint(0, 1000)
    # ------------------------------------------------------
    env1.reset(seed)
    state = env1.next_observation_normalized()
    cummulative_rewardRL = []
    actions = []
    battery_too_full = []
    rewards = []
    sRL = 0
    # ------------------------------------------------------
    env2.reset(seed)
    cummulative_reward_rand = []
    s_rand = 0
    for step in range(n_steps):

        action_rand = 1  # np.random.randint(0, 2)
        obs_rand, reward_rand, terminated_rand = env2.step(action_rand)
        # -------------------------------------------
        action = dqn.choose_action(state, epsilon)
        actions.append(action)
        obs, reward, terminated = env1.step(action)
        past_state = state
        state = env1.next_observation_normalized()

        capacity = obs[1, -1]
        past_capacity = obs[1, -2]
        market_price = obs[5, -1]
        median_market_price = np.median(obs[5,:])
        my_reward = battery_penalty_expand(capacity, env1.full_battery_capacity, 0.2, 0.8) \
                    + 10 * a * slope_market_price(capacity, past_capacity, market_price, median_market_price) \
                    + b * reward \
                    + 0 * action_price(action, market_price, median_market_price)

        rewards.append(my_reward)

        # the reinforcement learning tends to save some energy in the battery, therefore we
        # "sell" all the energy left in the battery and add it to the cumulative reward
        if step == n_steps - 1:
            left_in_battery = obs[1, -1]
            last_price = obs[5, -1]
            left_in_battery_sold = left_in_battery * last_price
            sRL += left_in_battery_sold

            left_in_battery_rand = obs_rand[1, -1]
            last_price_rand = obs_rand[5, -1]
            left_in_battery_sold_rand = left_in_battery_rand * last_price_rand
            s_rand += left_in_battery_sold_rand

        s_rand += reward_rand
        cummulative_reward_rand.append(s_rand)

        sRL += reward
        cummulative_rewardRL.append(sRL)
        
        dqn.store_transition(past_state, action, my_reward, state)

        if dqn.memory_counter > MEMORY_CAPACITY:
            dqn.learn()
        
        if terminated:
            break

    #if env1.test:
        #plot_results(env1, env2, cummulative_rewardRL, cummulative_reward_rand, actions, rewards, timestamp)

    return sRL, s_rand


def plot_results(env1, env2, cummulative_rewardRL, cummulative_reward_rand, actions, rewards, timestamp):
    history = np.array(env1.history)
    steps = history[:, 0]
    battery_capacity = history[:, 1]
    energy_consumption = history[:, 3]
    market_price = history[:, 5] * 100
    amount_paid = history[:, 6] * 100
    time_of_day = history[:, 7]
    # print(np.mean(energy_consumption))
    fig, ax = plt.subplots()
    labels = just_date(timestamp)
    ax.xaxis.set_major_formatter(format_fn)
    ax.xaxis.set_major_locator(MaxNLocator(integer=True))
    ax.plot(steps, battery_capacity, label='DQN battery charge', c='blue', linewidth=0.7)
    # plt.plot(steps, amount_paid, label='amount paid', c='brown', linewidth=0.5)
    ax.plot(steps, market_price, label='market price', c='orange', linewidth=0.5)
    ax.plot(steps, np.array(cummulative_rewardRL) / 5, label='DQN cost', c='darkblue', linewidth=0.7)
    ax.plot(steps, np.array(actions) * 10, label='action', c='lightgreen', linewidth=0.5, alpha=0.4)
    ax.plot(steps, np.array(rewards) * 10, c='red', linewidth=0.3, alpha=0.5)
    '''
    cmap = cm.get_cmap('Purples')
    for i in range(n_steps - 1):
        h = ax.get_ylim()[1] - ax.get_ylim()[0]
        w = 1
        c1 = (steps[i], plt.ylim()[0])
        rect = patches.Rectangle(c1, w, h, color=cmap(time_of_day[i]))
        ax.add_patch(rect)
    '''
    history_rand = np.array(env2.history)
    ax.plot(steps, np.array(cummulative_reward_rand) / 5, label='compare cost', c='magenta', linewidth=0.7)
    battery_capacity_rand = history_rand[:, 1]
    ax.plot(steps, battery_capacity_rand, label='compare battery charge', c='magenta', linewidth=0.7, alpha=0.5)

    ax.legend(loc='lower left', prop={'size': 6})
    fig.show()

    
def print_episode_results(s1, s2, episode, epsilon):   
    response = "Episode:" + episode + " | Epsilon: " + str(round(epsilon, 2)) + " | DQN cost:" + str(round(s1, 2)) + " | compare cost:" + str(round(s2, 2)) + " | Gain over compare: " + str(round(s1 - s2, 2)) + "( " + str(round(100 * (s1 - s2) / abs(s2), 2)) + "% )"
    print(response)    
    return round(100 * (s1 - s2) / abs(s1), 2), response


def action_price(action, market_price, median_market_price):
    above_median = 1 if market_price > median_market_price else 0
    if (market_price > median_market_price) == (action == 1):
        return 1
    return -1


def battery_penalty(capacity, full_capacity):
    relative_c = capacity / full_capacity
    if relative_c < 0.5:
        f = - ((4 * relative_c - 2) ** 2) * 2
    else:
        f = - ((4 * relative_c - 2) ** 2) * 4
    return f + 2


def battery_penalty_expand(capacity, full_capacity, zero_low, zero_high):
    x = capacity / full_capacity
    if x < zero_low:
        f = - (2 / zero_low * x - 2) ** 2 / 2
    elif x > zero_high:
        f = - (2 / (1 - zero_high) * (x - zero_high)) ** 2 * 4
    else:
        f = 0
    return f + 1


def slope_market_price(capacity, past_capacity, market_price, avg_market_price):
    slope = capacity - past_capacity
    relative_market_price = market_price - avg_market_price
    return - (slope * relative_market_price)

def save_properties(path, df, epsilon, a, b):
    d = {
        "battery": BATTERY_SIZE,
        "power": MAX_BATTERY_ENERGY,
        "n_days": N_DAYS,
        "epsilon": epsilon,
        "a": a,
        "b": b
    }
    df.to_csv(path + '/df.csv')
    
    with open(path + "/properties.json", "w") as outfile:
            json.dump(d, outfile)

def load_properties(path):
    d = {}       
    with open(path + "/properties.json", "r") as openfile:
        d = json.load(openfile)
    return d

def test(data, netpath):
    print("--- TESTING ---")
    df = pd.read_csv(data)
    
    #properties = load_properties(netpath)
    #epsilon=properties['epsilon']
    
    epsilon = 0.02
    n = 30000
    test_n_steps = len(df) - n
    timestamp = df['Timestamp'][n:]
    envRL_test = DQN_env.Env(df, full_battery_capacity=BATTERY_SIZE, max_energy=MAX_BATTERY_ENERGY, n_days=N_DAYS, n_steps=test_n_steps, low=n, high=len(df), test=True)
    env_comp_test = DQN_env.Env(df, full_battery_capacity=BATTERY_SIZE, max_energy=MAX_BATTERY_ENERGY, n_days=N_DAYS, n_steps=test_n_steps, low=n, high=len(df), test=True)
    dqn = DQN.DQN(envRL_test.next_observation_normalized().shape[0], 4)
    dqn.load(netpath)
    cost_dqn, cost_comp = run_episode(test_n_steps, envRL_test, env_comp_test, dqn, epsilon, 3, 3, timestamp)
    gain, response = print_episode_results(cost_dqn, cost_comp, "Testing episode: "+ netpath, epsilon)
    return gain

def run(a, b, cost_to_beat, k, csv_path):
    
    # ENVIRONMENT AND AGENT INITIALIZATION
    envRL = DQN_env.Env(df, BATTERY_SIZE, MAX_BATTERY_ENERGY, N_DAYS, N_STEPS)
    envRL.reset(seed)
    
    dqn = DQN.DQN(envRL.next_observation_normalized().shape[0], 4)
    #dqn.load('saved_nets/420percent_peak')
    env_rand = DQN_env.Env(df, BATTERY_SIZE, MAX_BATTERY_ENERGY, N_DAYS, N_STEPS)
    env_rand.reset(seed)
    # TRAINING
    n_episodes = 200
    epsilon = 1
    for episode in tqdm(range(n_episodes)):
        epsilon = epsilon * 0.98
        cost_dqn, cost_comp = run_episode(N_STEPS, envRL, env_rand, dqn, epsilon, a, b, None)
    
    path = 'saved_nets/it_' + str(k)
    os.mkdir(path)
    dqn.save(path)
    save_properties(path, df, epsilon, a, b)
    dqn.load(path)
    # TESTING
    print("--- TESTING ---")
    n = 30000
    test_n_steps = len(df) - n
    timestamp = df['Timestamp'][n:]
    envRL_test = DQN_env.Env(df, full_battery_capacity=BATTERY_SIZE, max_energy=MAX_BATTERY_ENERGY, n_days=N_DAYS, n_steps=test_n_steps, low=n, high=len(df), test=True)
    env_comp_test = DQN_env.Env(df, full_battery_capacity=BATTERY_SIZE, max_energy=MAX_BATTERY_ENERGY, n_days=N_DAYS, n_steps=test_n_steps, low=n, high=len(df), test=True)
    
    cost_dqn, cost_comp = run_episode(test_n_steps, envRL_test, env_comp_test, dqn, epsilon, a, b, timestamp)
    gain, response = print_episode_results(cost_dqn, cost_comp, "Testing episode, iteration: " + str(k), epsilon)    
    if cost_dqn > cost_to_beat:
        dqn.load(path)
        dqn.save('saved_nets')
        print("--- SAVED ---")
        cost_to_beat = cost_dqn
        
    #test('working_data_peak.csv', 'saved_nets')
    return cost_to_beat


In [30]:
import shutil
import os

BATTERY_SIZE = 100
MAX_BATTERY_ENERGY = 5.8 * 1 / 4 * 2

cost = -600
k = 1
l = 1


for i in range(0, k):
    if os.path.exists('saved_nets/it_' + str(i)):
        shutil.rmtree('saved_nets/it_' + str(i))
    cost = run(3,3,-600, i, 'working_data.csv')
    with open('results.txt', 'a') as f:
        f.write(str(i) + ' ' + str(cost) + '\n')
        

for i in range(k, l):
    if os.path.exists('saved_nets/it_' + str(i)):
        shutil.rmtree('saved_nets/it_' + str(i))
    cost = run(3,3,-600, i, 'working_data.csv')
    with open('results.txt', 'a') as f:
        f.write(str(i) + ' ' + str(cost) + '\n')
        

100%|██████████| 1/1 [00:00<00:00,  1.90it/s]


KeyboardInterrupt: 

In [29]:
BATTERY_SIZE = 60
MAX_BATTERY_ENERGY = 5.8 * 1 / 4 * 4
test('working_data.csv', 'saved_nets/24percent_real_price')

--- TESTING ---
Episode:Testing episode: saved_nets/24percent_real_price | Epsilon: 0.02 | DQN cost:-399.67 | compare cost:-459.56 | Gain over compare: 59.89( 13.03% )


14.98

Kaj če ne bi imeli generation? Kakšen bi bil zaslužek?