In [None]:
# This file costructs surrogate models for the input datasets
import numpy as np     
import pandas as pd
import os
import random
import json
import math
import time

# Sklearn modules
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

# Torch specific module imports
import torch
import gpytorch
from torch import nn
from torch.utils.data import DataLoader, Dataset
from torchvision import datasets, transforms
from torch.nn import functional as F

np.random.seed(0)
torch.manual_seed(0)    

# Plotting libraries
import matplotlib as mpl
import matplotlib.pyplot as plt
%matplotlib inline
%load_ext autoreload
%autoreload 2

# User defined files and classes
import sys
sys.path.append('../')
from src.ReLMM.read_data import Inputs
from expert_trainers import expert
import utils_dataset as utilsd


  Referenced from: /Users/maitreyeesharma/opt/anaconda3/envs/torch/lib/python3.11/site-packages/torchvision/image.so
  warn(


In [2]:
class MainNeuralNetwork(nn.Module):
    def __init__(self,in_features,out_variables,num_nodes):
        super(MainNeuralNetwork,self).__init__()

        self.in_features = in_features
        # self.layer1 = nn.Linear(in_features, num_nodes,bias=True)
        # self.layer2 = nn.Linear(num_nodes,out_variables,bias=True)
        self.layer1 = nn.Linear(in_features, num_nodes,bias=True)
        self.layer2 = nn.Linear(num_nodes, num_nodes,bias=True)
        self.layer3 = nn.Linear(num_nodes,out_variables,bias=True)
        
    def forward(self, x):
        layer1_out = F.relu(self.layer1(x))
        layer2_out = F.relu(self.layer2(layer1_out))
        output = self.layer3(layer2_out)
        
        # layer1_out = F.relu(self.layer1(x)) #F.linear(self.layer1(x))
        # output = self.layer2(layer1_out)
        
        return output
    
class TargetNeuralNetwork(nn.Module):
    def __init__(self,in_features,out_variables,num_nodes):
        super(TargetNeuralNetwork,self).__init__()

        self.in_features = in_features
        # self.layer1 = nn.Linear(in_features, num_nodes,bias=True)
        # self.layer2 = nn.Linear(num_nodes,out_variables,bias=True)
        self.layer1 = nn.Linear(in_features, num_nodes,bias=True)
        self.layer2 = nn.Linear(num_nodes, num_nodes,bias=True)
        self.layer3 = nn.Linear(num_nodes,out_variables,bias=True)
        
    def forward(self, x):
        layer1_out = F.relu(self.layer1(x))
        layer2_out = F.relu(self.layer2(layer1_out))
        output = self.layer3(layer2_out)
        
        # layer1_out = F.relu(self.layer1(x)) #F.linear(self.layer1(x))
        # output = self.layer2(layer1_out)
        
        return output
    
class Train_NN():
    
    def __init__(self):
        print('Starting training')
        
    def train_loop(self, dataloader, model, loss_fn, optimizer,lambda1,lambda2):
        
        size = len(dataloader.dataset)
        num_batches = len(dataloader)
        train_loss = 0.0
        l1_regularization, l2_regularization = 0.0, 0.0
        
        for batch, sample_batched in enumerate(dataloader):
            # Compute prediction and loss
            X = sample_batched['in_features']
            y = sample_batched['labels']
            var = sample_batched['variance']
            pred = model(X)
            train_loss += loss_fn(pred, y).item()
            pred_loss = loss_fn(pred, y)
            
            all_linear1_params = torch.cat([x.view(-1) for x in model.layer1.parameters()])
            all_linear2_params = torch.cat([x.view(-1) for x in model.layer2.parameters()])
            all_linear3_params = torch.cat([x.view(-1) for x in model.layer3.parameters()])
            l1_regularization = lambda1 * (torch.norm(all_linear1_params, 1)+torch.norm(all_linear2_params, 1)+torch.norm(all_linear3_params, 1))
            l2_regularization = lambda2 * (torch.norm(all_linear1_params, 2)+torch.norm(all_linear2_params, 2)+torch.norm(all_linear3_params, 2))

            # l1_regularization = lambda1 * (torch.norm(all_linear1_params, 1)+torch.norm(all_linear2_params, 1))
            # l2_regularization = lambda2 * (torch.norm(all_linear1_params, 2)+torch.norm(all_linear2_params, 2))

            loss = pred_loss + l1_regularization + l2_regularization 

            # Backpropagation
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

        train_loss /=num_batches
        return train_loss


    def test_loop(self, dataloader, model, loss_fn):
        size = len(dataloader.dataset)
        num_batches = len(dataloader)
        test_loss, correct = 0, 0

        with torch.no_grad():
            for sample_batched in dataloader:
                X = sample_batched['in_features']
                y = sample_batched['labels']  
                var = sample_batched['variance']
                pred = model(X)
                test_loss += loss_fn(pred, y).item()

        test_loss /= num_batches
        correct /= size
        # print(f"Test Error Avg loss: {test_loss:>8f} \n")
        return test_loss


In [3]:
def instantiate_agents(num_features,num_nodes,in_features,out_variables,learning_rate = 1e-3):
    
    model_main = {}
    model_target = {}
    optimizer_dict = {}
    
    # NN Policies for Q-Learning
    for iFeature in range(0,num_features):
        agent_name = 'agent_'+str(iFeature)
        model_main[agent_name] = MainNeuralNetwork(in_features,out_variables,num_nodes)
        model_target[agent_name] = TargetNeuralNetwork(in_features,out_variables,num_nodes)
        optimizer_dict[agent_name] = torch.optim.Adam(model_main[agent_name].parameters(), lr=learning_rate)
    
    # Initializing loss and optimizer
    loss_fn = nn.MSELoss()
    
    return loss_fn, optimizer_dict, model_main, model_target


In [36]:
def train_agents(replay_memory, agent_main, agent_target, loss_fn, optimizer, train_NN, predict_NN, saveModel_NN, saveModel_filename, test_size, epochs = 1000):

    min_replay_size = 5
    if len(replay_memory) < MIN_REPLAY_SIZE:
        return
    
    
    X_train, X_test, Y_train, Y_test = train_test_split(X_stand_training, Y_stand_training, test_size=test_size,random_state=40)
    Var_train = torch.ones(len(Y_train)) 
    Var_test = torch.ones(len(Y_test)) 
        
    # NN parameters
    batch_size = 1
    l1 = 1e-5
    l2 = 1e-5
 
    if (train_NN):
        # Dataloader for pytorch
        train_data = utilsd.InputDataset(X_train,Y_train,Var_train,descriptors)
        test_data = utilsd.InputDataset(X_test,Y_test,Var_test,descriptors)

        train_dataloader = DataLoader(train_data, batch_size=batch_size, shuffle=True)
        test_dataloader = DataLoader(test_data, batch_size=batch_size, shuffle=True)

        user_training = Train_NN()

        train_loss = []
        test_loss = []
        for t in range(epochs):
            train_loss_epoch = user_training.train_loop(train_dataloader, agent_main, loss_fn, optimizer, l1, l2)
            test_loss_epoch = user_training.test_loop(test_dataloader, agent_main, loss_fn)
            train_loss.append(train_loss_epoch)
            test_loss.append(test_loss_epoch)
            if ((t+1)%100 == 0):
                print(f"Epoch {t+1}---> training error: {train_loss_epoch:>7f}, val error: {test_loss_epoch:>7f}")
            if((t+1)%10 == 0):
                agent_target.load_state_dict(agent_main.state_dict())
                # model_target.eval()
                # output_target = model_target(X_train)
                
        fig, ax = plt.subplots(figsize=(6,4))
        ax.plot(range(epochs),train_loss, label=f'Training Error,{train_loss_epoch:>7f}')
        ax.plot(range(epochs),test_loss, label=f'Validation Error,{test_loss_epoch:>7f}')
        ax.set_xlabel('Num. of epochs')
        ax.set_ylabel('MSE Loss')
        plt.legend()
        print("NN training Done!")
        
        if saveModel_NN:
            torch.save(model.state_dict(), saveModel_filename)
        
    return

def predict_agents(X_predict,agent_main):
    X_predict = torch.tensor(X_predict).to(torch.float32)
    agent_main.eval()

    target_output = agent_main(X_predict)

    return target_output

class environment():
    
    def __init__(self):
        self.action_space = [0, 1]
        
    def step(self,action):
        state = torch.tensor(action).to(torch.int32)
        return state
    
    def compute_accuracy_reward_DT(self,state,X,Y):
    
        predAccuracy_reward = random.sample([1,2,3,4,5], 1)
        print(predAccuracy_reward)

        return predAccuracy_reward
        

In [40]:
if __name__=="__main__":
    
    run_folder = '/Users/maitreyeesharma/WORKSPACE/PostDoc/Chemistry/SPIRAL/codes/RL/RL_FS/'
    
    # Read and preprocess input data
    input_data=inputs()
    X_stand_DT, Y_stand_DT, descriptors = input_data.read_inputs()
    
    env = environment()
    
    # RL training inputs
    train_episodes = 10
    min_replay_size = 5
    num_batch = 10
    epsilon = 1 # Epsilon-greedy algorithm in initialized at 1 meaning every step is random at the start
    max_epsilon = 1 # You can't explore more than 100% of the time
    min_epsilon = 0.01 # At a minimum, we'll always explore 1% of the time
    decay = 0.01
    
    # Initialize and instantiate feature agents, each agent have 8 neurons in the hidden layer
    num_nodes = 8
    num_features = np.size(descriptors)
    in_features = np.size(descriptors) ## State vector size
    out_variables = 2
    loss_fn, optimizer_dict, model_main, model_target = instantiate_agents(num_features,num_nodes,in_features,out_variables,learning_rate = 1e-3)
    
    steps_to_update_target_model = 0
    observation = torch.ones(in_features,dtype=torch.int32)
    replay_memory = []
    
    for episode in range(train_episodes):
        total_training_rewards = 0
        # done = False
        # while not done:
        steps_to_update_target_model += 1
        random_number = np.random.rand()
        # 2. Explore using the Epsilon Greedy Exploration Strategy
        if random_number <= epsilon:
            # Explore
            action = random.sample(env.action_space, 1)[0]
        else:
            # Exploit best known action
            predicted_Qvalues = predict_agents(current_state,model_main['agent_0'])
            print(predicted_Qvalues)
            action = torch.argmax(predicted_Qvalues).item()
        
        new_observation_for_iFeature = env.step(action)
        # Collect all new observations to create new state
        new_state = torch.cat((new_observation_for_iFeature.reshape(1),current_state[1:]), dim=0)
        reward = env.compute_accuracy_reward_DT(new_state,X_stand_DT, Y_stand_DT)
        replay_memory.append([current_state, action, reward, new_state])

#             # 3. Update the Main Network using the Bellman Equation
#             if steps_to_update_target_model % 4 == 0 or done:
#                 train(env, replay_memory, model, target_model, done)
                # train_agents(state_input_training, qvalue_training, model_main[iAgent], model_target[iAgent], loss_fn, optimizer_dict[iAgent], train_NN, predict_NN, saveModel_NN, saveModel_filename, test_size)

        current_state = new_state

#                 if steps_to_update_target_model >= 100:
#                     print('Copying main network weights to the target network weights')
#                     target_model.set_weights(model.get_weights())
#                     steps_to_update_target_model = 0
#                 break

        epsilon = min_epsilon + (max_epsilon - min_epsilon) * np.exp(-decay * episode)
    

Reading data for the input dataset type:  PerovAlloys
0
tensor([0., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.])
[2]
0
tensor([0., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.])
[1]
tensor([ 0.0446, -0.1929], grad_fn=<AddBackward0>)
0
tensor([0., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.])
[4]
0
tensor([0., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.])
[1]
0
tensor([0., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.])
[2]
1
tensor([1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.])
[5]
1
tensor([1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.])
[5]
tensor([ 0.0525, -0.1658], grad_fn=<AddBackward0>)
0
tensor([0., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.])
[3]
1
tensor([1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.])
[5]
1
tensor([1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.])
[4]


  X_predict = torch.tensor(X_predict).to(torch.float32)


In [None]:
if __name__=="__main__":
    
    run_folder = '/Users/maitreyeesharma/WORKSPACE/PostDoc/Chemistry/SPIRAL/codes/RL/RL_FS/'
    
    # Read and preprocess input data
    input_data=inputs()
    X_stand, Y_stand, descriptors = input_data.read_inputs()

    # Initialize feature agents
    num_nodes = 8
    num_features = np.size(descriptors)
    in_features = np.size(descriptors)
    out_variables = 2
    loss_fn, optimizer_dict, model_main, model_target = instantiate_agents(num_features,num_nodes,in_features,out_variables,learning_rate = 1e-3)
    
    # Initialize expert trainer
    expert_trainer = expert()
    
    # Game data to train policy networks
    num_game_instances = 50
    state = []
    reward = []
    for iGame in range(0,num_game_instances):
        sampled_descriptors = random.sample(list(descriptors), 5)
        X_stand_training = pd.DataFrame(X_stand, columns=sampled_descriptors)
        Y_stand_training = Y_stand

        # XGboost data for training policy
        feature_importance_dict, reward_game = expert_trainer.expert_xgboost(X_stand_training,Y_stand_training,sampled_descriptors,onlyTopChoices=False)

        state_game = []
        for idescriptor in range(0,len(list(descriptors))):
            if descriptors[idescriptor] in feature_importance_dict.keys():
                state_game.append(int(1))
            else:
                state_game.append(int(0))
        
        state.append(state_game)
        reward.append(reward_game)
    
    # Training agents using 50 feature subsets
    for iAgent in model_main.keys():
        absent_index = 0
        absent_reward = 0.0
        present_index = 0 
        present_reward = 0.0
        qvalue_training = np.zeros((len(reward),2))
            
        feature_index = int(iAgent.split('agent_')[1])
        train_NN = True
        saveModel_NN = False 
        predict_NN = False
        test_size = 0.1
        saveModel_filename = '../RL_FS_output/alloys_' + str(int(test_size*100)) + 'test_'+str(num_nodes)+'nodes_l1_1em3.pt'
        state_input_training = state
        
        # First game:
        if state[0][feature_index] == 0:
            absent_index += 1
            absent_reward = reward[0]
            qvalue_training[0][0] = reward[0]
        elif state[0][feature_index] == 1:
            present_index += 1
            present_reward = reward[0]
            qvalue_training[0][1] = reward[0]
        
        # Remaining games:
        for iGame in range(1,num_game_instances):
            if state[iGame][feature_index] == 0:
                absent_index += 1
                absent_reward = reward[iGame]
                qvalue_training[iGame][0] = absent_reward
            elif state[iGame][feature_index] == 1:
                present_index += 1
                present_reward = reward[iGame]
                qvalue_training[iGame][1] = present_reward
   
        
        train_agents(state_input_training, qvalue_training, model_main[iAgent], model_target[iAgent], loss_fn, optimizer_dict[iAgent], train_NN, predict_NN, saveModel_NN, saveModel_filename, test_size)
 
    # Predicting using agents   
    num_game_instances = 10
    state = []
    reward = []
    for iGame in range(0,num_game_instances):
        sampled_descriptors = random.sample(list(descriptors), 5)
        X_stand_training = pd.DataFrame(X_stand, columns=sampled_descriptors)
        Y_stand_training = Y_stand

        # XGboost data for training policy
        feature_importance_dict, reward_game = expert_trainer.expert_xgboost(X_stand_training,Y_stand_training,sampled_descriptors,onlyTopChoices=False)

        state_game = []
        for idescriptor in range(0,len(list(descriptors))):
            if descriptors[idescriptor] in feature_importance_dict.keys():
                state_game.append(int(1))
            else:
                state_game.append(int(0))
        
        state.append(state_game)
        reward.append(reward_game)
        print(state_game, reward_game)
        
    for iAgent in model_main.keys():
        feature_index = int(iAgent.split('agent_')[1])
        train_NN = True
        saveModel_NN = False 
        predict_NN = False
        test_size = 0.1
        saveModel_filename = '../RL_FS_output/alloys_' + str(int(test_size*100)) + 'test_'+str(num_nodes)+'nodes_l1_1em3.pt'
        state_input_training = state

        target_output = predict_agents(state,model_main[iAgent])
        print(target_output)
