In [1]:
# This file costructs surrogate models for the input datasets
import numpy as np   
import pandas as pd
import os
import shutil
import json
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
import math
import time
import warnings

# Torch specific module imports
import torch
import gpytorch 
from torch import nn
from torch.utils.data import DataLoader, Dataset
from torchvision import datasets, transforms
from torch.nn import functional as F

# botorch specific modules
from botorch.fit import fit_gpytorch_model
from botorch.models.gpytorch import GPyTorchModel
from botorch import fit_gpytorch_mll
from botorch.acquisition.monte_carlo import (
    qExpectedImprovement,
    qNoisyExpectedImprovement,
)
from botorch.sampling.normal import SobolQMCNormalSampler
from botorch.exceptions import BadInitialCandidatesWarning
from botorch.acquisition import UpperConfidenceBound, ExpectedImprovement

# Plotting libraries
import matplotlib as mpl
import matplotlib.pyplot as plt
%matplotlib inline
%load_ext autoreload
%autoreload 2

# Tick parameters
plt.rcParams['xtick.labelsize'] = 15
plt.rcParams['ytick.labelsize'] = 15
plt.rcParams['xtick.major.size'] = 5
plt.rcParams['xtick.major.width'] = 1
plt.rcParams['xtick.minor.size'] = 5
plt.rcParams['xtick.minor.width'] = 1
plt.rcParams['ytick.major.size'] = 5
plt.rcParams['ytick.major.width'] = 1
plt.rcParams['ytick.minor.size'] = 5
plt.rcParams['ytick.minor.width'] = 1

plt.rcParams['axes.labelsize'] = 15
plt.rcParams['axes.titlesize'] = 15
plt.rcParams['legend.fontsize'] = 15

# User defined python classes and files
import sys
sys.path.insert(0, '/Users/maitreyeesharma/WORKSPACE/PostDoc/EngChem/MatDisc_ML/python_notebook_bo/')
sys.path.insert(0, '/Users/maitreyeesharma/WORKSPACE/PostDoc/EngChem/MatDisc_ML/feature_engineering/')

import input_class 
import code_verification as verification
import surrogate_model_inputs as model_input
import utils_dataset as utilsd
import surrogate_models


  Referenced from: /Users/maitreyeesharma/opt/anaconda3/envs/torch/lib/python3.11/site-packages/torchvision/image.so
  warn(


Using cpu device


In [2]:
random_seed = 1
# Reading the input test datafile
with open(model_input.run_folder+'inputs_testing.json', "r") as f:
    input_dict = json.load(f)

input_type = input_dict['InputType']
input_path = input_dict['InputPath']
input_file = input_dict['InputFile']
add_target_noise = input_dict['AddTargetNoise']

input = input_class.inputs(input_type=input_type,
                           input_path=input_path,
                           input_file=input_file,
                           add_target_noise=add_target_noise,
                           composition_MPEA = True)

XX, YY, descriptors = input.read_inputs(model_input.verbose)

# Reading the BO output files
if model_input.GP_0_BO:
    best_observed_df_gp0 = pd.read_csv(model_input.output_folder+'gp0_best.csv')
    newy_observed_df_gp0 = pd.read_csv(model_input.output_folder+'gp0_newTarget.csv')
    newy_var_observed_df_gp0 = pd.read_csv(model_input.output_folder+'gp0_newTarget_variance.csv')
    newx_observed_df_gp0 = pd.read_csv(model_input.output_folder+'gp0_newRecommendation.csv')
    index_observed_df_gp0 = pd.read_csv(model_input.output_folder+'gp0_IndexRecommendation.csv')
    
if model_input.GP_L_BO:
    best_observed_df_gpL = pd.read_csv(model_input.output_folder+'gpL_best.csv')
    newy_observed_df_gpL = pd.read_csv(model_input.output_folder+'gpL_newTarget.csv')
    newy_var_observed_df_gpL = pd.read_csv(model_input.output_folder+'gpL_newTarget_variance.csv')
    newx_observed_df_gpL = pd.read_csv(model_input.output_folder+'gpL_newRecommendation.csv')
    index_observed_df_gpL = pd.read_csv(model_input.output_folder+'gpL_IndexRecommendation.csv')
    
if model_input.GP_NN_BO:
    best_observed_df_gpNN = pd.read_csv(model_input.output_folder+'gpNN_best.csv')
    newy_observed_df_gpNN = pd.read_csv(model_input.output_folder+'gpNN_newTarget.csv')
    newy_var_observed_df_gpNN = pd.read_csv(model_input.output_folder+'gpNN_newTarget_variance.csv')
    newx_observed_df_gpNN = pd.read_csv(model_input.output_folder+'gpNN_newRecommendation.csv')
    index_observed_df_gpNN = pd.read_csv(model_input.output_folder+'gpNN_IndexRecommendation.csv')
    

Reading data for the input dataset type:  MPEA


In [3]:
comp = XX.loc[index_observed_df_gpL['gpL_trial1']].reset_index()
target = newy_observed_df_gpL[:]
comp['Hardness Value'] = target.mean(axis=1)
comp = comp.sort_values(by=['Hardness Value'], ascending=False)

In [4]:
comp.to_csv(model_input.output_folder+'Recommendations.csv',index=False)

In [None]:
Round1_reco = pd.read_csv(model_input.output_folder+'../mpea_hv_forEddie_0.01p_FirstPass_Aug/Recommendations.csv')

In [None]:
comp_shortlist = comp[:][0:20]
Round1_reco_shortlist = Round1_reco[:][0:20]

In [8]:
comp_shortlist.merge(Round1_reco_shortlist, how='inner', on=list(comp.columns[1:30]))

Unnamed: 0,index_x,Ag,Al,B,C,Ca,Co,Cr,Cu,Fe,...,Sn,Ta,Ti,V,W,Y,Zn,Hardness Value_x,index_y,Hardness Value_y
0,19,0.0,0.174,0.0,0.0,0.0,0.174,0.174,0.0,0.174,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,781.023966,26,689.99029
1,93,0.0,0.0,0.0,0.0,0.0,0.25,0.25,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,719.522634,114,686.433322
2,34,0.0,0.147,0.0,0.0,0.0,0.147,0.147,0.147,0.0,...,0.0,0.0,0.147,0.0,0.0,0.118,0.0,666.35011,46,686.464888


In [14]:
comp[:][0:20]

Unnamed: 0,index,Ag,Al,B,C,Ca,Co,Cr,Cu,Fe,...,Sc,Si,Sn,Ta,Ti,V,W,Y,Zn,Hardness Value
53,19,0.0,0.174,0.0,0.0,0.0,0.174,0.174,0.0,0.174,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,781.023966
21,194,0.0,0.0,0.0,0.0,0.0,0.25,0.25,0.0,0.25,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,753.927218
1,93,0.0,0.0,0.0,0.0,0.0,0.25,0.25,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,719.522634
5,228,0.0,0.25,0.0,0.0,0.0,0.0,0.25,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,691.219248
64,229,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,684.431618
4,149,0.0,0.0,0.0,0.0,0.0,0.0,0.222,0.0,0.222,...,0.0,0.0,0.0,0.0,0.0,0.111,0.222,0.0,0.0,684.382558
6,119,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.2,0.2,0.2,0.2,0.0,0.0,681.25126
30,53,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.25,0.25,0.25,0.0,0.0,0.0,680.704406
54,187,0.0,0.0,0.0,0.0,0.0,0.18,0.125,0.0,0.55,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,678.126926
39,201,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.25,0.25,0.0,0.0,0.0,0.0,676.659324


In [15]:
Round1_reco[:][0:20]

Unnamed: 0,index,Ag,Al,B,C,Ca,Co,Cr,Cu,Fe,...,Sc,Si,Sn,Ta,Ti,V,W,Y,Zn,Hardness Value
0,21,0.0,0.167,0.0,0.167,0.0,0.167,0.167,0.0,0.167,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,916.03658
1,242,0.0,0.65,0.0,0.0,0.0,0.0,0.0,0.05,0.05,...,0.0,0.15,0.0,0.0,0.0,0.0,0.0,0.0,0.05,910.212578
2,20,0.0,0.154,0.0,0.231,0.0,0.154,0.154,0.0,0.154,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,908.023724
3,245,0.0,0.65,0.0,0.0,0.0,0.0,0.0,0.05,0.0,...,0.0,0.15,0.0,0.0,0.0,0.0,0.0,0.0,0.05,903.966934
4,244,0.0,0.65,0.0,0.0,0.0,0.0,0.05,0.05,0.0,...,0.0,0.15,0.0,0.0,0.0,0.0,0.0,0.0,0.05,896.490874
5,246,0.0,0.65,0.0,0.0,0.0,0.0,0.0,0.05,0.0,...,0.0,0.15,0.0,0.0,0.0,0.0,0.0,0.0,0.05,851.750286
6,243,0.0,0.65,0.0,0.0,0.0,0.0,0.0,0.05,0.0,...,0.0,0.15,0.0,0.0,0.0,0.0,0.0,0.0,0.05,818.205998
7,63,0.0,0.801,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.05,0.0,0.0,0.0,0.0,0.0,0.05,807.734198
8,254,0.0,0.0,0.0,0.0,0.0,0.111,0.0,0.111,0.111,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,763.680668
9,62,0.0,0.801,0.0,0.0,0.0,0.0,0.0,0.05,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.05,754.045896
