In [2]:
import optuna
import torch
from torch import nn

import numpy as np
import aet_net
from itertools import combinations

import matplotlib.pyplot as plt

In [2]:
print(torch.__version__)

1.10.0


In [3]:
DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# model parameters

nn_dim_ = [28,68,3]   # [quadrant size, number of hidden nodes, number of output nodes]
eta_ = 0.1          # learning rate
mini_sz_ = 1          # mini batch size (1 = use SGD)
num_epo_ = 80
kl_reg_ = [0.1, 0.1]  # identified with optuna

sig_param = [1, 0] # sigmoid slope and shift in x direction

# loss function & final layer activation (for binary crossentropy use sigmoid)
lossfun = [nn.MSELoss(), nn.Softmax(dim=-1)]

params = nn_dim_,eta_,mini_sz_,num_epo_,kl_reg_,sig_param

# initialize model and weights
model = aet_net.net(params,lossfun)
model = aet_net.init_params(model,weight_init='uni')
optimizer = torch.optim.SGD(model.parameters(),lr=eta_)

model.to(DEVICE)
loss_hist = model.train(optimizer,noise=False,print_loss=False)


In [4]:
data,output = aet_net.aet_stim.mkstim()

In [5]:
 # "tangledness" of hidden represenations

idx = np.array((0,5,10))#,-1))
inp_combi = list(combinations(idx,2))           # possible input combinations
all_angle_sum = torch.zeros(len(inp_combi))

for i,c in enumerate(inp_combi):

    H1 = data[c[0]]
    H2 = data[c[1]]
    H3 = data[c[0]]+data[c[1]]

    # apply layer
    _,H1,_ = model.forw_conv(H1)
    _,H2,_ = model.forw_conv(H2)
    _,H3,_ = model.forw_conv(H3)

    H1_2 = H1+H2


    num_ = torch.matmul(H3,H1_2)
    denom_ = torch.linalg.vector_norm(H3)*torch.linalg.vector_norm(H1_2)
    all_angle_sum[i] = (torch.acos(num_/denom_)*180/torch.pi).cpu().detach()


In [6]:
def objective(trial):
    
    # to be optimized params
    eta__ = trial.suggest_float('ETA_',0.001,0.2)
    beta_ = trial.suggest_float('BETA_',0.001,0.01)
    p_ = trial.suggest_float('P_',1e-4,2e-2)
    
    kl_reg_ = [beta_, p_]  # identified with optuna

    params = nn_dim_,eta_,mini_sz_,num_epo_,kl_reg_,sig_param
    
    # initialize model and weights
    model = aet_net.net(params,lossfun)
    model = aet_net.init_params(model,weight_init='uni')
    optimizer = torch.optim.SGD(model.parameters(),lr=eta_)
    
    model.to(DEVICE)
    loss_hist = model.train(optimizer,noise=False,print_loss=False)
    
    # "tangledness" of hidden represenations

    idx = np.array((0,5,10))#,-1))
    inp_combi = list(combinations(idx,2))           # possible input combinations
    all_angle_sum = torch.zeros(len(inp_combi))

    for i,c in enumerate(inp_combi):

        H1 = data[c[0]]
        H2 = data[c[1]]
        H3 = data[c[0]]+data[c[1]]

        # apply layer
        _,H1,_ = model.forw_conv(H1)
        _,H2,_ = model.forw_conv(H2)
        _,H3,_ = model.forw_conv(H3)

        H1_2 = H1+H2


        num_ = torch.matmul(H3,H1_2)
        denom_ = torch.linalg.vector_norm(H3)*torch.linalg.vector_norm(H1_2)
        all_angle_sum[i] = (torch.acos(num_/denom_)*180/torch.pi).cpu().detach()

    
    return torch.mean(loss_hist[:-20]) + torch.mean(all_angle_sum)

study = optuna.create_study(direction="minimize")
study.optimize(objective, n_trials=100, timeout=600)

# pruned_trials = study.get_trials(deepcopy=False, states=[TrialState.PRUNED])
# complete_trials = study.get_trials(deepcopy=False, states=[TrialState.COMPLETE])

# print("Study statistics: ")
# print("  Number of finished trials: ", len(study.trials))
# print("  Number of pruned trials: ", len(pruned_trials))
# print("  Number of complete trials: ", len(complete_trials))

print("Best trial:")
trial = study.best_trial

print("  Value: ", trial.value)

print("  Params: ")
for key, value in trial.params.items():
    print("    {}: {}".format(key, value))

[32m[I 2023-03-16 18:15:56,453][0m A new study created in memory with name: no-name-116be064-37b5-4d0c-9a3e-1d47af5c93bc[0m
[32m[I 2023-03-16 18:16:02,581][0m Trial 0 finished with value: 19.360801696777344 and parameters: {'ETA_': 0.18087075359947438, 'BETA_': 0.009983022815511462, 'P_': 0.009983923285973797}. Best is trial 0 with value: 19.360801696777344.[0m
[32m[I 2023-03-16 18:16:08,677][0m Trial 1 finished with value: 17.280771255493164 and parameters: {'ETA_': 0.1429557025472215, 'BETA_': 0.008742070386435753, 'P_': 0.004930216032260411}. Best is trial 1 with value: 17.280771255493164.[0m
[32m[I 2023-03-16 18:16:14,803][0m Trial 2 finished with value: 19.084514617919922 and parameters: {'ETA_': 0.13440137488176088, 'BETA_': 0.008372394775758813, 'P_': 0.010251713762814934}. Best is trial 1 with value: 17.280771255493164.[0m
[32m[I 2023-03-16 18:16:20,937][0m Trial 3 finished with value: 31.733999252319336 and parameters: {'ETA_': 0.042376846612126, 'BETA_': 0.00218

Best trial:
  Value:  9.646111488342285
  Params: 
    ETA_: 0.12757746868719266
    BETA_: 0.009092935020869497
    P_: 0.0008926984462542586


In [7]:
import pickle

In [8]:
optuna_aet_result = {key: value for key, value in study.best_trial.params.items()}

In [9]:
optuna_aet_result = [{'loss': study.best_trial.value},optuna_aet_result]

In [10]:
optuna_aet_result

[{'loss': 9.646111488342285},
 {'ETA_': 0.12757746868719266,
  'BETA_': 0.009092935020869497,
  'P_': 0.0008926984462542586}]

In [11]:
with open('optuna_aet_trial.pkl', 'wb') as fp:
    pickle.dump(optuna_aet_result, fp)
    print('dictionary saved successfully to file')

dictionary saved successfully to file


In [12]:
del optuna_aet_result

In [13]:
with open('optuna_aet_trial.pkl', 'rb') as fp:
    optuna_aet_result = pickle.load(fp)
    print('optuna result')
    print(optuna_aet_result)

optuna result
[{'loss': 9.646111488342285}, {'ETA_': 0.12757746868719266, 'BETA_': 0.009092935020869497, 'P_': 0.0008926984462542586}]
