In [1]:
import os
import torch
import pandas as pd
from polymerlearn.utils import get_IV_add, GraphDataset

# Load data from local path:
data = pd.read_csv(os.path.join('/Users/owenqueen/Desktop/eastman_project-confidential/Eastman_Project/CombinedData', 
            'pub_data.csv'))

add = get_IV_add(data)

dataset = GraphDataset(
    data = data,
    structure_dir = '../Structures/AG/xyz',
    Y_target=['IV'],
    test_size = 0.2,
    add_features=add
)

  result = getattr(ufunc, method)(*inputs, **kwargs)


In [2]:
from polymerlearn.models.gnn import PolymerGNN_IV
from polymerlearn.utils import CV_eval

model_kwargs = {
    'input_feat': 6,         # How many input features on each node; don't change this
    'hidden_channels': 32,   # How many intermediate dimensions to use in model
                            # Can change this ^^
    'num_additional': 4      # How many additional resin properties to include in the prediction
                            # Corresponds to the number in get_IV_add
}

model = PolymerGNN_IV(**model_kwargs)

optimizer_gen = torch.optim.AdamW
criterion = torch.nn.MSELoss()

all_predictions, all_y, all_inds, state_dicts = CV_eval(
    dataset = dataset,
    model_generator = PolymerGNN_IV,
    optimizer_generator = optimizer_gen,
    criterion = criterion,
    model_generator_kwargs = model_kwargs,
    optimizer_kwargs = {'lr': 0.0001, 'weight_decay':0.01},
    epochs = 800,
    batch_size = 64,
    verbose = 1,
    save_state_dicts=True
)

  return self.test_data, torch.tensor(self.Ytest).float(), self.add_test, test_inds


Fold: 1 	 Epoch: 0,                     	 Train r2: -22.1721 	 Train Loss: 44.6549
Fold: 1 	 Epoch: 50,                     	 Train r2: 0.3647 	 Train Loss: 2.7650
Fold: 1 	 Epoch: 100,                     	 Train r2: 0.4047 	 Train Loss: 1.2710
Fold: 1 	 Epoch: 150,                     	 Train r2: 0.6124 	 Train Loss: 1.1223
Fold: 1 	 Epoch: 200,                     	 Train r2: 0.6868 	 Train Loss: 1.4641
Fold: 1 	 Epoch: 250,                     	 Train r2: 0.6115 	 Train Loss: 1.4694
Fold: 1 	 Epoch: 300,                     	 Train r2: 0.6351 	 Train Loss: 0.9036
Fold: 1 	 Epoch: 350,                     	 Train r2: 0.8235 	 Train Loss: 0.4980
Fold: 1 	 Epoch: 400,                     	 Train r2: 0.7001 	 Train Loss: 1.2849
Fold: 1 	 Epoch: 450,                     	 Train r2: 0.7411 	 Train Loss: 0.7745
Fold: 1 	 Epoch: 500,                     	 Train r2: 0.7871 	 Train Loss: 0.5950
Fold: 1 	 Epoch: 550,                     	 Train r2: 0.7716 	 Train Loss: 0.5620
Fold: 1 	 Epoch:

In [3]:
torch.save(state_dicts, open('state_dicts.pt', 'wb')) # Save state dicts to load later
torch.save(dataset, 'dataset.pt')

In [4]:
from polymerlearn.explain import PolymerGNN_IV_EXPLAIN, PolymerGNNExplainer

mexplain = PolymerGNN_IV_EXPLAIN(**model_kwargs)
mexplain.load_state_dict(model.state_dict()) # Load weights from trained model over to explaining one

explainer = PolymerGNNExplainer(mexplain)

test_batch, Ytest, add_test = dataset.get_test()
test_inds = dataset.test_mask

exp_summary = []

for i in range(Ytest.shape[0]):
    scores = explainer.get_explanation(test_batch[i], add_test[i])
    scores['A'] = torch.sum(scores['A'], dim = 1)
    scores['G'] = torch.sum(scores['G'], dim = 1)
    scores['table_ind'] = test_inds[i]

    exp_summary.append(scores)

  torch.tensor(add_test).float())


In [7]:
# Summarize importance scores:
from polymerlearn.utils.graph_prep import get_AG_info

# Mw summary:
acid_scores = []
glycol_scores = []

mw_scores = []
an_scores = []
ohn_scores = []
tmp_scores = []

acid_names = pd.Series([c[1:] for c in data_mask.columns[20:33].tolist()])
glycol_names = pd.Series([c[1:] for c in data_mask.columns[34:46].tolist()])
acids, glycols, _, _ = get_AG_info(data_mask)

acid_key = {a:[] for a in acid_names}
glycol_key = {g:[] for g in glycol_names}

for i in range(len(exp_summary)):

    df_ind = exp_summary[i]['table_ind']

    for a in range(len(acids[df_ind])):
        acid_key[acids[df_ind][a]].append(exp_summary[i]['A'][a].item()) 
    
    for g in range(len(glycols[df_ind])):
        glycol_key[glycols[df_ind][g]].append(exp_summary[i]['G'][g].item()) 

    acid_scores.append(torch.sum(exp_summary[i]['A']).item())
    glycol_scores.append(torch.sum(exp_summary[i]['G']).item())

    # Break down individual scores:
    mw_scores.append(exp_summary[i]['add'][0].item())
    an_scores.append(exp_summary[i]['add'][1].item())
    ohn_scores.append(exp_summary[i]['add'][2].item())
    tmp_scores.append(exp_summary[i]['add'][3].item())

print(acid_scores)
print(glycol_scores)

print(mw_scores)
print(an_scores)
print(ohn_scores)
print(tmp_scores)

print(acid_key)
print(glycol_key)


[-0.5220816731452942, -0.871830940246582, -1.372873067855835, -0.7051889300346375, -1.479537844657898, -0.8235787749290466, -0.49047982692718506, -0.4644251763820648, -1.7646515369415283, -1.5553362369537354, -1.144914150238037, -2.20985746383667, -1.4301812648773193, -0.5704778432846069, -0.9277279376983643, -0.6290164589881897, -0.811713457107544, -0.4857237935066223, -1.2340717315673828, -0.7832549214363098, -2.0962419509887695, -0.3163483738899231, -0.6890739798545837, -1.1152818202972412, -0.6931972503662109, -0.519429087638855, -0.70504230260849, -0.5535339117050171, -0.7099688053131104, -0.5150319337844849, -1.429985761642456, -0.777635931968689, -0.45038893818855286, -0.8560777902603149, -0.5053524971008301, -0.9263220429420471, -0.41691508889198303, -1.2905853986740112, -1.724608302116394, -1.264465093612671, -1.064122200012207, -0.8285700678825378, -0.9279764890670776, -0.7006344199180603, -2.206166982650757, -0.6776403188705444, -2.2430825233459473, -0.9685702323913574, -2.1