In [1]:
import numpy as np
import pandas as pd
import os
import json
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.model_selection import train_test_split

# User defined files and classes
import sys
from read_data import inputs
import utils_dataset as utilsd
from environment import Environment
from qlearning import QNetwork
from predictor_models import predictor_models

In [2]:
## Main Function

# Reading the input json file with dataset filename and path information
with open('inputs.json', "r") as f:
    input_dict = json.load(f)

run_folder = input_dict['RunFolder']
input_type = input_dict['InputType']
input_path = input_dict['InputPath']
input_file = input_dict['InputFile']
output_dir = input_dict['OutputDirectory']

# Create a new output directory if it does not exist
isExist = os.path.exists(output_dir)
if not isExist:
    os.makedirs(output_dir)
    print("The new directory is created!", output_dir)

input_data = inputs(input_type=input_type,
                           input_path=input_path,
                           input_file=input_file)

X_data, Y_data, descriptors = input_data.read_inputs()
X_stand, X_stand_df, scalerX = utilsd.standardize_data(X_data)
Y_stand, Y_stand_df, scalerY = utilsd.standardize_data(pd.DataFrame({'target':Y_data[:,0]}))
X_stand, X_test_stand_df, Y_stand, Y_test_stand_df = train_test_split(X_stand_df, Y_stand_df, test_size=0.1, random_state=0)

#---- Uncomment for synthetic dataset
# test_file = input_dict['TestFile']
# input_data2 = inputs(input_type=input_type,
#                            input_path=input_path,
#                            input_file=test_file)
# X_testdata, Y_testdata, descriptors_test = input_data2.read_inputs()
# X_test_stand = scalerX.transform(X_testdata)
# X_test_stand_df = pd.DataFrame(X_test_stand, columns=X_stand_df.columns)
# Y_test_stand_df = pd.DataFrame(scalerY.transform(pd.DataFrame({'target':Y_testdata[:,0]})), columns=Y_stand_df.columns)

# Dataset parameters
total_num_features = len(descriptors)

# Environment parameters
state_size = total_num_features  # Size of the state space
N_agents = total_num_features # Number of agents
action_size = 2  # Number of possible actions
N_steps = 100 # Number of steps to take per episode
predictor_model = predictor_models()

# Hyperparameters
epsilon = 1.0  # Exploration rate
epsilon_decay = 0.995  # Decay rate of exploration
gamma = 0.95  # Discount factor
learning_rate = 0.001

# Initialize environment and Q-networks for each agent
env = Environment(state_size,action_size,N_agents,N_steps)
agent_model = {}
agent_optimizer = {}
agent_qvalue = {}

for i_agent in range(N_agents):
    model_name = 'agent'+str(i_agent)+'_model'
    saveModel_filename = output_dir+model_name+'.pt'
    optimizer_name = 'agent'+str(i_agent)+'_optimizer'
    agent_model[model_name] = QNetwork(env.state_size, env.action_size)
    agent_optimizer[optimizer_name] = optim.Adam(agent_model[model_name].parameters(), lr=learning_rate)
    agent_model[model_name].load_state_dict(torch.load(saveModel_filename))


Reading data for the input dataset type:  Gryffin
This class contains the predictor models to generate rewards for agents


In [3]:
# Testing the trained agents
state = env.reset()
total_rewards = np.zeros(N_agents)

while True:
    with torch.no_grad():
        actions_list = []
        for i_agent in range(N_agents):
            model_name = 'agent'+str(i_agent)+'_model'
            q_values = agent_model[model_name](torch.tensor(state, dtype=torch.float32))
            actions_list.append(torch.argmax(q_values).item())        
        actions = np.array(actions_list)

    next_state, done = env.step(actions)
    rewards, feature_importance_dict_rl, mse_rl = env.get_rewards_test(predictor_model,X_test_stand_df,Y_test_stand_df)    
    total_rewards += rewards  
    state = next_state

    if done:
        break

importance_df_rl = pd.DataFrame.from_dict(data=feature_importance_dict_rl, orient='index')
# importance_df_rl.to_csv(output_dir+'rl_test.csv')
mse_test_rl = pd.DataFrame(data=[mse_rl], columns=['RL MSE'])
# mse_test_rl.to_csv(output_dir+'mse_rl_test.csv')
print(f"Test Total Rewards: {total_rewards}, state: {state}")


Test Total Rewards: [1362.03502408 1362.03502408 1362.03502408 1362.03502408 1362.03502408
 1362.03502408 1362.03502408 1362.03502408 1362.03502408 1362.03502408
 1362.03502408 1362.03502408 1362.03502408 1362.03502408], state: [0 0 0 1 1 0 1 1 1 1 1 1 1 0]


In [4]:
mse_rl

0.07341955106308268

In [5]:
importance_df_rl

Unnamed: 0,0
organic-homo,0.0
organic-lumo,0.0
organic-dipole,0.0
organic-atomization,0.093922
organic-r_gyr,0.297415
organic-total_mass,0.0
anion-electron_affinity,0.228656
anion-ionization_energy,1.044831
anion-mass,0.0
anion-electronegativity,0.0
