In [1]:
import joblib
import pandas as pd

from rdkit import Chem
from sklearn.metrics import roc_auc_score
from torch_geometric.data import Data
import torch_geometric.loader as graph_loader
from torch_geometric.nn import GCNConv, global_mean_pool, global_add_pool
import matplotlib.pyplot as plt
import torch
import torch.nn.functional as F
import numpy as np
from rdkit.Chem import rdFingerprintGenerator
import deepchem as dc
import random
import optuna
from sklearn.decomposition import PCA
from sklearn.metrics import (
    accuracy_score,
    log_loss,
    roc_auc_score,
    precision_score,
    recall_score,
    confusion_matrix,
    f1_score,
)
from sklearn.model_selection import StratifiedKFold
from sklearn.ensemble import ExtraTreesClassifier
from sklearn.svm import SVC
from sklearn.preprocessing import PolynomialFeatures
from sklearn.ensemble import RandomForestClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import train_test_split
from torch.utils.data import Dataset
import torch.utils.data as data_loader
import torch.nn as nn
import torch.optim as optim
    
import json

  from .autonotebook import tqdm as notebook_tqdm
No normalization for SPS. Feature removed!
No normalization for AvgIpc. Feature removed!
Skipped loading some Tensorflow models, missing a dependency. No module named 'tensorflow'
Skipped loading modules with pytorch-geometric dependency, missing a dependency. No module named 'dgl'
Skipped loading modules with pytorch-lightning dependency, missing a dependency. No module named 'lightning'
Skipped loading some Jax models, missing a dependency. No module named 'jax'
Skipped loading some PyTorch models, missing a dependency. No module named 'tensorflow'


In [2]:
# Check if a GPU is available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

Using device: cuda


In [46]:
# set seed for reproducibility
random_state = 42
np.random.seed(random_state)
torch.manual_seed(random_state)
if torch.cuda.is_available():
    torch.cuda.manual_seed(random_state)

# Load pretrain models

In [1]:
# Fingerprints

In [4]:
# load the results back
loaded_results = joblib.load("models/tpot_results.joblib")

# Convert results to DataFrame
tpot_df = pd.DataFrame(loaded_results)
tpot_df.head()

Unnamed: 0,Best model,PCA Components,Model Name,Parameters,AUC,Precision,Recall,Sensitivity,Specificity
0,"((ExtraTreeClassifier(max_features=0.55, min_s...",10,ExtraTreesClassifier,"{'bootstrap': False, 'ccp_alpha': 0.0, 'class_...",0.860355,0.911765,0.841629,0.841629,0.723077
1,"((ExtraTreeClassifier(criterion='entropy', max...",20,ExtraTreesClassifier,"{'bootstrap': False, 'ccp_alpha': 0.0, 'class_...",0.901636,0.930348,0.846154,0.846154,0.784615
2,"((ExtraTreeClassifier(criterion='entropy', max...",50,ExtraTreesClassifier,"{'bootstrap': False, 'ccp_alpha': 0.0, 'class_...",0.899617,0.919811,0.882353,0.882353,0.738462
3,"(MinMaxScaler(), (ExtraTreeClassifier(max_feat...",100,ExtraTreesClassifier,"{'bootstrap': False, 'ccp_alpha': 0.0, 'class_...",0.916881,0.92093,0.895928,0.895928,0.738462
4,"(KNeighborsClassifier(n_neighbors=4, weights='...",200,KNeighborsClassifier,"{'algorithm': 'auto', 'leaf_size': 30, 'metric...",0.89906,0.938144,0.823529,0.823529,0.815385


In [5]:
fingerprints_model = loaded_results[3]["Best model"]
fingerprints_model

In [6]:
# NLP

In [7]:
class AttentionRNNModel(nn.Module):
    def __init__(self, vocab_size, embedding_dim, hidden_dim, output_dim):
        super(AttentionRNNModel, self).__init__()
        self.embedding = nn.Embedding(vocab_size, embedding_dim)
        self.rnn = nn.RNN(embedding_dim, hidden_dim, batch_first=True)
        self.attention = nn.Linear(hidden_dim, 1)  # Attention weights
        self.fc = nn.Linear(hidden_dim, output_dim)

    def forward(self, x):
        embedded = self.embedding(x)
        output, hidden = self.rnn(embedded)
        attention_weights = torch.softmax(self.attention(output), dim=1)
        context_vector = torch.sum(attention_weights * output, dim=1)
        out = self.fc(context_vector)
        return out


In [8]:
# load model from pth
with open("models/nlp_AttentionRNNModel_params.json", "r") as f:
    nlp_params = json.load(f)
print(nlp_params)

{'hidden_dim': 52, 'lr': 0.00017228662206929206, 'model_class': 'AttentionRNNModel', 'embedding_dim': 46}


In [9]:
vocab_size = 38
embedding_dim = nlp_params["embedding_dim"]
hidden_dim = nlp_params["hidden_dim"]
nlp_model = AttentionRNNModel(vocab_size, embedding_dim, hidden_dim, output_dim=1).to(device)
nlp_model.load_state_dict(torch.load("models/nlp_AttentionRNNModel.pth"))
nlp_model

  nlp_model.load_state_dict(torch.load("models/nlp_AttentionRNNModel.pth"))


AttentionRNNModel(
  (embedding): Embedding(38, 46)
  (rnn): RNN(46, 52, batch_first=True)
  (attention): Linear(in_features=52, out_features=1, bias=True)
  (fc): Linear(in_features=52, out_features=1, bias=True)
)

In [36]:
# NLP OpenAI

In [37]:
# load the results back
loaded_results = joblib.load("models/tpot_nlp_openai_pca.joblib")

# Convert results to DataFrame
tpot_df = pd.DataFrame(loaded_results)
tpot_df.head()

Unnamed: 0,Best model,PCA Components,Model Name,Parameters,AUC,Precision,Recall,Sensitivity,Specificity
0,"(ZeroCount(), (ExtraTreeClassifier(criterion='...",10,ExtraTreesClassifier,"{'bootstrap': False, 'ccp_alpha': 0.0, 'class_...",0.689871,0.82243,0.79638,0.79638,0.415385


In [38]:
nlp_openai_model = loaded_results[0]["Best model"]
nlp_openai_model

In [10]:
# Graph

In [11]:
class GCN(torch.nn.Module):
    def __init__(
        self,
        num_node_features,
        num_classes,
        num_layers=3,
        hidden_dim=64,
        dropout_prob=0.5,
        activation="relu",
    ):
        super(GCN, self).__init__()

        # Store activation function dynamically
        if activation == "relu":
            self.activation = F.relu
        elif activation == "tanh":
            self.activation = F.tanh
        else:
            raise ValueError("Unsupported activation function")

        self.dropout_prob = dropout_prob

        # Dynamically define the GCN layers
        self.convs = torch.nn.ModuleList()
        self.convs.append(GCNConv(num_node_features, hidden_dim))
        for _ in range(num_layers - 1):
            self.convs.append(GCNConv(hidden_dim, hidden_dim))

        # Final fully connected layer
        self.fc = torch.nn.Linear(hidden_dim * 2, num_classes)

    def forward(self, data):
        x, edge_index, batch = data.x, data.edge_index, data.batch

        # Apply GCN layers dynamically
        for conv in self.convs:
            x = conv(x, edge_index)
            x = self.activation(x)

        # Global pooling (combine different pooling methods)
        x = torch.cat([global_mean_pool(x, batch), global_add_pool(x, batch)], dim=1)

        # Apply dropout
        x = F.dropout(x, p=self.dropout_prob, training=self.training)

        # Final classification layer
        return F.log_softmax(self.fc(x), dim=1)

In [12]:
# load model from pth
graph_params = {
    "hidden_dim": 210,
    "dropout_prob": 0.4169127419744815,
    "learning_rate": 0.0007529508911659432,
    "weight_decay": 9.903281752862725e-06,
    "num_layers": 5,
    "activation": "relu",
}


graph_model = GCN(
    num_node_features=70,
    num_classes=2,
    num_layers=graph_params["num_layers"],
    hidden_dim=graph_params["hidden_dim"],
    dropout_prob=graph_params["dropout_prob"],
    activation=graph_params["activation"],
).to(device)

graph_model.load_state_dict(torch.load("models/graph_gcn_x20_filtered_outliers.pth"))
graph_model

  graph_model.load_state_dict(torch.load("models/graph_gcn_x20_filtered_outliers.pth"))


GCN(
  (convs): ModuleList(
    (0): GCNConv(70, 210)
    (1-4): 4 x GCNConv(210, 210)
  )
  (fc): Linear(in_features=420, out_features=2, bias=True)
)

# Data preparation

## Fingerprints

In [13]:
# Load pd_train_fp
pd_train_fp = pd.read_parquet("data/training_class.parquet")
pd_train_fp["label"] = pd_train_fp["Class"].apply(
    lambda x: 1 if x == "Hepatotoxicity" else 0
)
print(pd_train_fp.shape)

(1241, 16094)


In [14]:
# Load pd_test_fp
pd_test_fp = pd.read_parquet("data/testing_class.parquet")
pd_test_fp["label"] = pd_test_fp["Class"].apply(
    lambda x: 1 if x == "Hepatotoxicity" else 0
)
print(pd_test_fp.shape)

(286, 16094)


In [15]:
X_train_fp = pd_train_fp.drop(columns=["Class", "label"])
y_train_fp = pd_train_fp["label"]

X_test_fp = pd_test_fp.drop(columns=["Class", "label"])
y_test_fp = pd_test_fp["label"]

In [16]:
# get X PCA components 100
pca = PCA(n_components=100)
X_train_fp_pca = pca.fit_transform(X_train_fp)
X_test_fp_pca = pca.transform(X_test_fp)

## NLP

In [17]:
# Load pd_train_nlp
pd_train_nlp = pd.read_csv("data_smiles/Training_Group.csv")
pd_train_nlp["label"] = pd_train_nlp["Liver"].apply(lambda x: 1 if x == "Hepatotoxicity" else 0)
print(pd_train_nlp.shape)
pd_train_nlp.head()

(1241, 3)


Unnamed: 0,Smiles,Liver,label
0,S=C=Nc1c2c(ccc1)cccc2,Hepatotoxicity,1
1,c1(c(cc(cc1[N+](=O)[O-])[N+](=O)[O-])[N+](=O)[...,Hepatotoxicity,1
2,c1(c(cc(cc1)[N+](=O)[O-])[N+](=O)[O-])O,Hepatotoxicity,1
3,O(CCO)CC,Hepatotoxicity,1
4,Oc1cc2c(cc1)cccc2,Hepatotoxicity,1


In [18]:
# Load pd_test_nlp
pd_test_nlp = pd.read_csv("data_smiles/Testing_Group.csv")
pd_test_nlp["label"] = pd_test_nlp["Liver"].apply(lambda x: 1 if x == "Hepatotoxicity" else 0)
print(pd_test_nlp.shape)
pd_test_nlp.head()

(286, 3)


Unnamed: 0,Smiles,Liver,label
0,C[C@@H](C(=O)N[C@@H](C)C(=O)NC1[C@H]2[C@@H]1CN...,Hepatotoxicity,1
1,C=CCOC1=C(C=C(C=C1)CC(=O)O)Cl,Hepatotoxicity,1
2,CCCN(CCC)C(=O)CC1=C(N=C2N1C=C(C=C2)Cl)C3=CC=C(...,Hepatotoxicity,1
3,C1CC2=CC=CC=C2C(C3=CC=CC=C31)NCCCCCCC(=O)O,Hepatotoxicity,1
4,C1=CC=C(C=C1)CN2C3=CC=CC=C3C(=N2)OCC(=O)O,Hepatotoxicity,1


In [19]:
# Tokenize SMILES
df = pd.concat([pd_train_nlp, pd_test_nlp], axis=0)
unique_chars = set("".join(df["Smiles"]))
char_to_idx = {char: idx for idx, char in enumerate(unique_chars)}
idx_to_char = {idx: char for char, idx in char_to_idx.items()}
vocab_size = len(char_to_idx)


def tokenize_smiles(smiles):
    return [char_to_idx[char] for char in smiles]


# tokenize for X_train_nlp, y_train_nlp, X_test_nlp, y_test_nlp
X_train_nlp = [tokenize_smiles(s) for s in pd_train_nlp["Smiles"]]
y_train_nlp = pd_train_nlp["label"].values

X_test_nlp = [tokenize_smiles(s) for s in pd_test_nlp["Smiles"]]
y_test_nlp = pd_test_nlp["label"].values

print(f"X_train_nlp: {len(X_train_nlp)}")
print(f"y_train_nlp: {len(y_train_nlp)}")

print(f"X_test_nlp: {len(X_test_nlp)}")
print(f"y_test_nlp: {len(y_test_nlp)}")

X_train_nlp: 1241
y_train_nlp: 1241
X_test_nlp: 286
y_test_nlp: 286


In [20]:
class SmilesDataset(Dataset):
    def __init__(self, smiles, labels, max_len=200):
        self.smiles = [s[:max_len] for s in smiles]
        self.labels = labels
        self.max_len = max_len

    def __len__(self):
        return len(self.smiles)

    def __getitem__(self, idx):
        x = np.zeros(self.max_len, dtype=int)
        x[: len(self.smiles[idx])] = self.smiles[idx]
        y = self.labels[idx]
        return torch.tensor(x, dtype=torch.long), torch.tensor(y, dtype=torch.float)


# Create train and test sets
train_dataset_nlp = SmilesDataset(X_train_nlp, y_train_nlp)
test_dataset_nlp = SmilesDataset(X_test_nlp, y_test_nlp)

train_loader_nlp = data_loader.DataLoader(train_dataset_nlp, batch_size=32, shuffle=True)
test_loader_nlp = data_loader.DataLoader(test_dataset_nlp, batch_size=32, shuffle=False)

## NLP OpenAI

In [29]:
# load the embeddings
pd_train_nlp_openai = joblib.load("data_smiles/Training_Group_openai_embeddings.pkl")
pd_test_nlp_openai = joblib.load("data_smiles/Testing_Group_openai_embeddings.pkl")

In [30]:
# Prepare features and labels for the split
X_train_nlp_openai = np.vstack(
    pd_train_nlp_openai["Embeddings"].values
)  # Stack embeddings into a 2D array
y_train_nlp_openai = pd_train_nlp_openai["label"].values

print("X_train_nlp_openai Shape:", X_train_nlp_openai.shape)
print("y_train_nlp_openai Shape:", y_train_nlp_openai.shape)

X_train_nlp_openai Shape: (1241, 1536)
y_train_nlp_openai Shape: (1241,)


In [31]:
X_test_nlp_openai = np.vstack(
    pd_test_nlp_openai["Embeddings"].values
)  # Stack embeddings into a 2D array
y_test_nlp_openai = pd_test_nlp_openai["label"].values

print("X_test_nlp_openai Shape:", X_test_nlp_openai.shape)
print("y_test_nlp_openai Shape:", y_test_nlp_openai.shape)

X_test_nlp_openai Shape: (286, 1536)
y_test_nlp_openai Shape: (286,)


In [32]:
# get X PCA components 10
pca = PCA(n_components=10)
X_train_nlp_openai_pca = pca.fit_transform(X_train_nlp_openai)
X_test_nlp_openai_pca = pca.transform(X_test_nlp_openai)

## Graph

In [33]:
def featurize_smiles(smiles):
    featurizer = dc.feat.MolGraphConvFeaturizer(use_edges=True)
    graph_data = featurizer.featurize([smiles])[0]

    # Get DeepChem atom features
    atom_features_deepchem = graph_data.node_features

    return atom_features_deepchem


# Function to generate Morgan Fingerprints (ECFP)
def generate_ecfp(smiles):
    # Morgan fingerprint generator
    mfgen = rdFingerprintGenerator.GetMorganGenerator(radius=2, fpSize=4096)

    molecule = Chem.MolFromSmiles(smiles)
    if molecule is None:
        return None
    return mfgen.GetFingerprintAsNumPy(molecule)


# Function to convert SMILES to PyTorch Geometric Data object using DeepChem featurizer
def smiles_to_graph_featurizer(smiles):
    # Featurization using DeepChem
    featurizer = dc.feat.MolGraphConvFeaturizer(use_edges=True)

    # Featurize the SMILES string using DeepChem
    graph_data = featurizer.featurize([smiles])[0]
    return graph_data.node_features, graph_data.edge_features, graph_data.edge_index


# Function to extract atom features
def atom_features(atom, ecfp):
    # Get the atom index for corresponding ECFP value
    atom_idx = atom.GetIdx()

    return [
        atom.GetAtomicNum(),  # Atomic number
        atom.GetDegree(),  # Number of bonds
        atom.GetTotalNumHs(),  # Total number of hydrogens
        atom.GetFormalCharge(),  # Formal charge of the atom
        atom.GetImplicitValence(),  # Implicit valence
        atom.GetNumRadicalElectrons(),  # Number of radical electrons
        int(atom.GetIsAromatic()),  # Is the atom aromatic?
        atom.GetMass(),  # Atomic mass
        atom.GetHybridization().real,  # Hybridization state (SP, SP2, SP3, etc.)
        ecfp[atom_idx],  # Morgan fingerprint (ECFP) for the atom
    ]


# Function to extract bond features
def bond_features(bond):
    bond_type = bond.GetBondTypeAsDouble()  # Bond type as a float
    is_aromatic = bond.GetIsAromatic()  # Aromatic bond
    is_conjugated = bond.GetIsConjugated()  # Conjugated bond
    is_in_ring = bond.IsInRing()  # Whether the bond is part of a ring
    stereo = bond.GetStereo()  # Bond stereochemistry

    # Convert stereo information to a one-hot encoded format
    stereo_one_hot = [0, 0, 0, 0]  # Stereo options: None, E, Z, Other
    if stereo == Chem.BondStereo.STEREONONE:
        stereo_one_hot[0] = 1
    elif stereo == Chem.BondStereo.STEREOE:
        stereo_one_hot[1] = 1
    elif stereo == Chem.BondStereo.STEREOZ:
        stereo_one_hot[2] = 1
    else:
        stereo_one_hot[3] = 1

    # Combine all features into a single tensor
    return [
        bond_type,
        float(is_aromatic),
        float(is_conjugated),
        float(is_in_ring),
    ] + stereo_one_hot


# Convert SMILES to PyTorch Geometric Data object
def smiles_to_graph(smiles, label):
    mol = Chem.MolFromSmiles(smiles)

    atom_features_list = []
    edge_index = []
    edge_attr = []

    # DeepChem features
    atom_features_deepchem = featurize_smiles(smiles)

    # Generate Morgan Fingerprint (ECFP)
    ecfp_features = generate_ecfp(smiles)

    # Generate Molecule Graph Convolution features
    mol_graph_node_features, mol_graph_edge_features, mol_graph_edge_index = (
        smiles_to_graph_featurizer(smiles)
    )

    # Nodes (atoms)
    for atom in mol.GetAtoms():
        atom_features_list.append(atom_features(atom, ecfp_features))

    atom_features_list = np.array(atom_features_list)

    # Edges (bonds)
    for bond in mol.GetBonds():
        i = bond.GetBeginAtomIdx()
        j = bond.GetEndAtomIdx()

        # Append bidirectional edges for undirected graphs
        edge_index.append([i, j])
        edge_index.append([j, i])

        # Append bond features for both directions
        edge_attr.append(bond_features(bond))
        edge_attr.append(bond_features(bond))

    # Convert atom features to a tensor
    combined_features = np.concatenate(
        (atom_features_list, atom_features_deepchem, mol_graph_node_features), axis=1
    )
    x = torch.tensor(combined_features, dtype=torch.float)

    # Convert edge indices and features to tensors, handle empty edge case
    edge_index = torch.tensor(edge_index, dtype=torch.long).t().contiguous()

    # combine edge features from ECFP and MolGraphConv
    edge_attr = np.array(edge_attr)
    edge_attr = np.concatenate((edge_attr, mol_graph_edge_features), axis=1)
    edge_attr = torch.tensor(edge_attr, dtype=torch.float)

    # Label (target)
    y = torch.tensor([label], dtype=torch.long)

    return Data(x=x, edge_index=edge_index, edge_attr=edge_attr, y=y)


# Function to load data from CSV and apply SMILES augmentation for training
def load_data_from_csv(file_path):
    df = pd.read_csv(file_path)

    smiles_list = df["Smiles"].values
    labels = df["Liver"].apply(lambda x: 1 if x == "Hepatotoxicity" else 0).values

    data_list = []

    # Initialize the SmilesEnumerator for data augmentation
    for smiles, label in zip(smiles_list, labels):
        # For test data, no augmentation, just use canonical SMILES
        graph_data = smiles_to_graph(smiles, label)
        data_list.append(graph_data)

    return data_list

In [34]:
# Load training and testing data
training_data_graph = load_data_from_csv("data_smiles/Training_Group.csv")
testing_data_graph = load_data_from_csv("data_smiles/Testing_Group.csv")

In [35]:
# Create data loaders
train_loader_graph = graph_loader.DataLoader(training_data_graph, batch_size=32, shuffle=True)
test_loader_graph = graph_loader.DataLoader(testing_data_graph, batch_size=32, shuffle=False)

# Model training

## Weighted Averaging Ensembles

In [90]:
def objective(trial):
    try:
        # Hyperparameter Suggestions
        weight_etc = trial.suggest_float("weight_etc", 0.0, 1.0)
        weight_gcn = trial.suggest_float("weight_gcn", 0.0, 1.0 - weight_etc)
        weight_nlp = trial.suggest_float("weight_nlp", 0.0, 1.0 - weight_etc - weight_gcn)
        weight_nlp_openai = 1.0 - weight_etc - weight_gcn - weight_nlp

        # Device setup
        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")


        ## PREDICTIONS ##
        etc_probs = fingerprints_model.predict_proba(X_test_fp_pca)[:, 1]

        # Get predictions from GCN
        graph_model.eval()
        gcn_probs = []
        for batch in test_loader_graph:
            batch = batch.to(device)
            with torch.no_grad():
                out = graph_model(batch)
            gcn_probs.append(out[:, 1].cpu().numpy())  # Probabilities for class 1
        gcn_probs = np.concatenate(gcn_probs)


        # Get predictions from NLP
        nlp_model.eval()
        nlp_probs = []
        for batch in test_loader_nlp:
            inputs, labels = batch  # Unpack the tuple
            inputs = inputs.to(device)  # Move inputs to the specified device
            with torch.no_grad():
                out = nlp_model(inputs)
            nlp_probs.append(out[:, 0].cpu().numpy())
        nlp_probs = np.concatenate(nlp_probs)

        # Get predictions from NLP OpenAI
        nlp_openai_probs = nlp_openai_model.predict_proba(X_test_nlp_openai_pca)[:, 1]

        # Ensemble predictions with average weights
        final_probs_i = (
            weight_etc * etc_probs
            + weight_gcn * gcn_probs
            + weight_nlp * nlp_probs
            + weight_nlp_openai * nlp_openai_probs
        )

        # Evaluate AUC
        auc = roc_auc_score(y_test_fp, final_probs_i)

        # Save the best model
        global best_model_auc, best_weights
        if auc > best_model_auc["auc"]:
            best_model_auc["auc"] = auc
            best_weights["weight_etc"] = weight_etc
            best_weights["weight_gcn"] = weight_gcn
            best_weights["weight_nlp"] = weight_nlp
            best_weights["weight_nlp_openai"] = weight_nlp_openai

        # Return AUC
        return auc

    except Exception as e:
        # Log the error and continue
        print(f"Trial failed with error: {e}")
        return float("-inf")  # Return a very low value to indicate failure

In [91]:
# Run Optuna
study = optuna.create_study(direction="maximize")
study.optimize(objective, n_trials=100)

[I 2024-11-22 22:03:01,230] A new study created in memory with name: no-name-46717c64-6f9f-45e7-8a1c-52dfa120eeaa
[I 2024-11-22 22:03:01,497] Trial 0 finished with value: 0.8872258962756701 and parameters: {'weight_etc': 0.8700429955207939, 'weight_gcn': 0.11431731521002467, 'weight_nlp': 0.001967138420191507}. Best is trial 0 with value: 0.8872258962756701.
[I 2024-11-22 22:03:01,638] Trial 1 finished with value: 0.8336233901844763 and parameters: {'weight_etc': 0.14264184656062173, 'weight_gcn': 0.5207453527202859, 'weight_nlp': 0.09739042437991409}. Best is trial 0 with value: 0.8872258962756701.
[I 2024-11-22 22:03:01,757] Trial 2 finished with value: 0.89975635224504 and parameters: {'weight_etc': 0.9262725645875486, 'weight_gcn': 0.06498356453834128, 'weight_nlp': 0.0017398943170201702}. Best is trial 2 with value: 0.89975635224504.
[I 2024-11-22 22:03:01,838] Trial 3 finished with value: 0.8574312565262792 and parameters: {'weight_etc': 0.5735899608420294, 'weight_gcn': 0.323210

Trial failed with error: The `low` value must be smaller than or equal to the `high` value (low=0.0, high=-0.08399700468066393).


[I 2024-11-22 22:03:02,686] Trial 14 finished with value: 0.9164636268708668 and parameters: {'weight_etc': 0.7557804867279583, 'weight_gcn': 0.008346279163048944, 'weight_nlp': 0.03756107764754335}. Best is trial 14 with value: 0.9164636268708668.
[I 2024-11-22 22:03:02,780] Trial 15 finished with value: 0.9016359206404455 and parameters: {'weight_etc': 0.7502811647380484, 'weight_gcn': 0.04479350832196252, 'weight_nlp': 0.03664380392775722}. Best is trial 14 with value: 0.9164636268708668.
[I 2024-11-22 22:03:02,869] Trial 16 finished with value: 0.8547163243995823 and parameters: {'weight_etc': 0.5844160005888921, 'weight_gcn': 0.3488692341133256, 'weight_nlp': 0.02689408449978329}. Best is trial 14 with value: 0.9164636268708668.
[I 2024-11-22 22:03:02,957] Trial 17 finished with value: 0.9023320570831883 and parameters: {'weight_etc': 0.7972644011901205, 'weight_gcn': 0.0445398471006634, 'weight_nlp': 0.03650899186780428}. Best is trial 14 with value: 0.9164636268708668.
[I 2024-1

In [92]:
# Output the Best Parameters and AUC
print("Best AUC Score:", best_model_auc["auc"])
print("Best Weights:", best_weights)

Best AUC Score: 0.9208492864601462
Best Weights: {'weight_etc': 0.8613187142966218, 'weight_gcn': 0.01305786893311622, 'weight_nlp': 0.0002993376690055543, 'weight_nlp_openai': 0.12532407910125645}
