In [6]:
import torch, json
from harmonization_env_package.harmonization_environment import *

device = 'cpu'
melody = torch.tensor([60, 62, 64, 65, 67, 69, 71, 72], dtype = torch.int32)
melody += 12

In [None]:
env = HarmonizationEnv(melody, device)

net_ac = NetAC(path = 'ac_params_low_entr.pth')
net_ae = NetAE(path = 'ae_params.pth')
net_m = NetM(path = 'm_params.pth')

agent_ac = Agent(env, net_ac, entropy_weight = 0.01)
agent_ae = Agent(env, net_ae, entropy_weight = 0.1)
# agent_m = Agent(env, net_m)

print(net_ac.info)
print(' ')
print(net_ae.info)
print(' ')
print(net_m.info)

 shared: 13600 params 
 actor head: 2492 params 
 critic head: 1969 params 
 total: 18062
 
 encoder: 14829 params 
 decoder: 19806 params 
 actor head: 1852 params 
 critic head: 1561 params 
 total: 38049
 
 encoder: 11565 params 
 decoder: 13278 params 
 lstm encoder: 7192 params 
 lstm decoder: 6224 params 
 actor head: 1116 params 
 critic head: 393 params 
 total: 39769


In [8]:
def train(agent, num_iterations, iter_per_batch, blocks):

    progresses = []

    for i in range(len(num_iterations)):

        prog = agent.train(
            num_iterations = num_iterations[i],
            check_step = 10,
            iter_per_batch = iter_per_batch[i],
            save = True,
            track_progress = True,
            blocks = blocks[i]
        )

        progresses.append(prog)

    progress = {'info' : agent.net.info, 'type' : agent.net.type_}
    progress['loss'] = {}

    for key in progresses[0].keys():

        if key == "loss": continue
        
        progress[key] = []
        for i in range(len(progresses)): progress[key] += progresses[i][key]
       

    for key in progresses[0]['loss'].keys():

        progress['loss'][key] = []
        for i in range(len(progresses)): progress['loss'][key] += progresses[i]['loss'][key]
    
    return progress

In [9]:
progress_ac = train(
    agent = agent_ac, 
    num_iterations = [8000], 
    iter_per_batch = [2], 
    blocks = [['full']]
)

filename = 'stats_ac_low_entr.json'

with open(filename, 'w') as f:
    json.dump(progress_ac, f, indent = 4)

n_iter: 0/8000, grad norm: 0.007. Training block: full
actor: 0.099, critic: 0.243, entropy: -2.131, ae: 0.000, lstm: 0.000
Average reward: 0.305

n_iter: 10/8000, grad norm: 0.003. Training block: full
actor: 0.015, critic: 0.170, entropy: -2.115, ae: 0.000, lstm: 0.000
Average reward: 0.332

n_iter: 20/8000, grad norm: 0.004. Training block: full
actor: 0.009, critic: 0.165, entropy: -2.107, ae: 0.000, lstm: 0.000
Average reward: 0.336

n_iter: 30/8000, grad norm: 0.003. Training block: full
actor: -0.037, critic: 0.142, entropy: -2.077, ae: 0.000, lstm: 0.000
Average reward: 0.346

n_iter: 40/8000, grad norm: 0.003. Training block: full
actor: 0.034, critic: 0.143, entropy: -2.081, ae: 0.000, lstm: 0.000
Average reward: 0.319

n_iter: 50/8000, grad norm: 0.002. Training block: full
actor: -0.069, critic: 0.134, entropy: -2.064, ae: 0.000, lstm: 0.000
Average reward: 0.362

n_iter: 60/8000, grad norm: 0.002. Training block: full
actor: -0.080, critic: 0.130, entropy: -2.037, ae: 0.00

In [10]:
progress_ae = train(
    agent = agent_ae, 
    num_iterations = [2000, 2000, 6000], 
    iter_per_batch = [1, 2, 2], 
    blocks = [['ae'], ['actor', 'critic'], ['full']]
)

filename = 'stats_ae.json'

with open(filename, 'w') as f:
    json.dump(progress_ae, f, indent = 4)

n_iter: 0/2000, grad norm: 0.016. Training block: ae
actor: -0.754, critic: 0.433, entropy: -2.170, ae: 1.335, lstm: 0.000
Average reward: 0.338

n_iter: 10/2000, grad norm: 0.010. Training block: ae
actor: -0.768, critic: 0.442, entropy: -2.194, ae: 1.213, lstm: 0.000
Average reward: 0.342

n_iter: 20/2000, grad norm: 0.012. Training block: ae
actor: -0.687, critic: 0.407, entropy: -2.207, ae: 1.123, lstm: 0.000
Average reward: 0.332

n_iter: 30/2000, grad norm: 0.008. Training block: ae
actor: -0.600, critic: 0.349, entropy: -2.206, ae: 1.009, lstm: 0.000
Average reward: 0.313

n_iter: 40/2000, grad norm: 0.007. Training block: ae
actor: -0.774, critic: 0.445, entropy: -2.193, ae: 0.956, lstm: 0.000
Average reward: 0.350

n_iter: 50/2000, grad norm: 0.005. Training block: ae
actor: -0.785, critic: 0.453, entropy: -2.196, ae: 0.884, lstm: 0.000
Average reward: 0.351

n_iter: 60/2000, grad norm: 0.007. Training block: ae
actor: -0.668, critic: 0.377, entropy: -2.188, ae: 0.838, lstm: 0

In [None]:
progress_m = train(
    agent = agent_m, 
    num_iterations = [1000, 2000, 2000, 5000], 
    iter_per_batch = [1, 1, 2, 2], 
    blocks = [['ae'], ['lstm'], ['actor', 'critic'], ['full']]
)

filename = 'stats_m_low_entr.json'

with open(filename, 'w') as f:
    json.dump(progress_m, f, indent = 4)

n_iter: 0/1000, grad norm: 0.012. Training block: ae
actor: -0.125, critic: 0.157, entropy: -2.049, ae: 1.270, lstm: 1.362
Average reward: 0.325

n_iter: 10/1000, grad norm: 0.010. Training block: ae
actor: -0.137, critic: 0.209, entropy: -2.077, ae: 1.107, lstm: 1.356
Average reward: 0.332

n_iter: 20/1000, grad norm: 0.008. Training block: ae
actor: -0.098, critic: 0.176, entropy: -2.077, ae: 1.003, lstm: 1.401
Average reward: 0.330

n_iter: 30/1000, grad norm: 0.006. Training block: ae
actor: -0.132, critic: 0.166, entropy: -2.066, ae: 0.925, lstm: 1.417
Average reward: 0.336

n_iter: 40/1000, grad norm: 0.005. Training block: ae
actor: -0.096, critic: 0.171, entropy: -2.061, ae: 0.870, lstm: 1.442
Average reward: 0.324

n_iter: 50/1000, grad norm: 0.006. Training block: ae
actor: -0.080, critic: 0.184, entropy: -2.063, ae: 0.826, lstm: 1.443
Average reward: 0.317

n_iter: 60/1000, grad norm: 0.005. Training block: ae
actor: -0.114, critic: 0.161, entropy: -2.066, ae: 0.783, lstm: 1

In [None]:
filename = 'stats_m'

with open(filename, 'w') as f:
    json.dump(progress_m, f, indent = 4)