In [None]:
import tensorflow as tf
import tensorflow_quantum as tfq
from collections import deque, Counter
import matplotlib.pyplot as plt
from args_config import args
import copy
import numpy as np
import time
import csv
import os
from datetime import datetime
from env import SchedulingEnv
from cirq.contrib.svg import SVGCircuit
import cirq, sympy
from tensorflow.keras.utils import plot_model
from tensorflow.python.client import device_lib
from QDQN_model import generate_model_Qlearning, generate_memory, QDQN_test, get_gpu_info
tf.get_logger().setLevel('ERROR')

In [None]:
env = SchedulingEnv(args)
gpu_info = get_gpu_info()

In [3]:
gamma = 0.89 # Q-decay
layers_num = 11
iteration = 2000
target_update = 3 # steps for update target-network
batch_size = 128
opt_in = 0.1
opt_var = 0.01
opt_out = 0.1
optimizer_in = tf.keras.optimizers.Adam(learning_rate=opt_in, amsgrad=True)
optimizer_var = tf.keras.optimizers.Adam(learning_rate=opt_var, amsgrad=True)
optimizer_out = tf.keras.optimizers.Adam(learning_rate=opt_out, amsgrad=True)
w_in, w_var, w_out = 1, 0, 2


In [4]:
model = generate_model_Qlearning(args.n_qubits, layers_num, args.n_actions, False)
model_target = generate_model_Qlearning(args.n_qubits, layers_num, args.n_actions, True)
model_target.set_weights(model.get_weights())

In [None]:
TrainJobNum = 3000
TrainArriveRate = 20
TrainJobType = 0.5
replay_memory = generate_memory(env, TrainJobNum, TrainArriveRate, TrainJobType, args)

TrainJobNum2 = 3000
TrainArriveRate2 = 20
replay_memory2 = generate_memory(env, TrainJobNum2, TrainArriveRate2, TrainJobType, args)

for item in replay_memory2:
    replay_memory.append(item)

TrainJobNum2 = 3000
TrainArriveRate2 = 20
replay_memory2 = generate_memory(env, TrainJobNum2, TrainArriveRate2, TrainJobType, args)

for item in replay_memory2:
    replay_memory.append(item)
    
print(len(replay_memory))

In [6]:
@tf.function
def Q_learning_update(states, actions, rewards, next_states, model, gamma, n_actions):
    states = tf.convert_to_tensor(states)
    actions = tf.convert_to_tensor(actions)
    rewards = tf.convert_to_tensor(rewards)
    next_states = tf.convert_to_tensor(next_states)

    Q_target = model_target([next_states])
    target_q_values = rewards + (gamma * tf.reduce_max(Q_target, axis=1))
    masks = tf.one_hot(actions, n_actions)

    with tf.GradientTape() as tape:
        tape.watch(model.trainable_variables)
        q_values = model([states])
        q_values_masked = tf.reduce_sum(tf.multiply(q_values, masks), axis=1)
        loss = tf.keras.losses.Huber()(target_q_values, q_values_masked)

    grads = tape.gradient(loss, model.trainable_variables)
    for optimizer, w in zip([optimizer_in, optimizer_var, optimizer_out], [w_in, w_var, w_out]):
        optimizer.apply_gradients([(grads[w], model.trainable_variables[w])])

    return loss

In [None]:
TrainStartT = time.time()

loss_list = []
for episode in range( args.Epoch ):
    print("*******************TrainEpisode:",episode,"*******************")
    epoch = 1
    while True:            
        training_batch = np.random.choice(replay_memory, size=batch_size)
        loss = Q_learning_update(np.asarray([x['state'] for x in training_batch]),
                                np.asarray([x['action'] for x in training_batch]),
                                np.asarray([x['reward'] for x in training_batch], dtype=np.float32),
                                np.asarray([x['next_state'] for x in training_batch]),
                                model, gamma, args.n_actions)
        loss_list.append(loss)
        # Update target model
        if epoch % target_update == 0:
            model_target.set_weights(model.get_weights())

        epoch += 1
        if epoch % 20 == 0:
            loss_mean = round(np.mean(loss_list[-20:]), 4)
            loss_var = round(np.var(loss_list[-20:]), 4)
            print("last[",epoch,"]iterations loss mean:",loss_mean,"var:",loss_var)
        if  epoch == iteration:
            break

TrainEndT= time.time()
TrainT = TrainEndT - TrainStartT


In [None]:
loss_mean_of_last_20 = round(np.mean(loss_list[-20:]), 4)

TestStartT = time.time()

TestJobNum = 8000
TestArriveRate = 25
TestJobType = 0.5
responseT, successRate, utiRate, cost = QDQN_test(model, env, TestJobNum, TestArriveRate, TestJobType)

TestEndT = time.time()
TestT = TestEndT - TestStartT

In [None]:
# save parameter and result
params = {
    'gamma': gamma,
    'layers_num': layers_num,
    'iteration': iteration,
    'target_update': target_update,
    'batch_size': batch_size,
    'opt_in': opt_in,
    'opt_var': opt_var,
    'opt_out': opt_out,
    'TrainJobNum': TrainJobNum,
    'TrainArriveRate': TrainArriveRate,
    'TrainJobType': TrainJobType,
    'TestJobNum': TestJobNum,
    'TestArriveRate': TestArriveRate,
    'TestJobType': TestJobType
}
results = {
    'responseT': responseT,
    'successRate': successRate,
    'utilizationRate': utiRate,
    'cost': cost,
    'TrainT': TrainT,
    'TestT': TestT,
    'Final 20 loss mean': loss_mean_of_last_20
}

folder_name = '/home/user1/'
os.makedirs(folder_name, exist_ok=True)

timestamp = datetime.now().strftime('%Y_%m_%d_%H_%M_%S')
file_name = f"{timestamp}_responseT_{responseT}_successRate_{successRate}_utiRate_{utiRate}_cost_{cost}.csv"
csv_file_path = os.path.join(folder_name, file_name)

with open(csv_file_path, mode='w', newline='') as file:
    writer = csv.writer(file)
    writer.writerow(['Parameters', 'Value'])
    for key, value in params.items():
        writer.writerow([key, value])
    writer.writerow([])
    writer.writerow(['Results', 'Value'])
    for key, value in results.items():
        writer.writerow([key, value])
    writer.writerow([])
    writer.writerow(['GPU Information', 'Value'])
    writer.writerow(['gpu_info', gpu_info])



print(f"Experiment data has been saved to {csv_file_path}")
#print(f"Neural network weights have been saved to {model_file_path}")