In [6]:
import pandas as pd
import torch
import torch.utils.data
import seaborn as sns
from torchmetrics import MetricCollection, classification
from scipy.stats import friedmanchisquare

from Modules import PyTorch_Training, Fingerprint_Generator, Misc_Utils, Fingerprint_Comparator

In [7]:
torch.manual_seed(42)

df_train = pd.read_csv('Transformed_Data/rega_train.csv')
df_test = pd.read_csv('Transformed_Data/rega_test.csv')

print(df_train.head(2))

              smiles  label
0  CN(C)C(=N)N=C(N)N      0
1   COC(=O)C=CC(=O)O      0


In [8]:
# Set the device to GPU if available
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using {device} device: {torch.cuda.get_device_name(0)}")

DILI_model = PyTorch_Training.DILI_Models.DILI_Predictor_Sequential(2048, 512, 1).to(device)

Using cuda device: NVIDIA GeForce GTX 1070


In [9]:
metric_collection = MetricCollection([
    classification.Accuracy(task='binary', average='macro'),  # Balanced accuracy
    classification.BinaryAUROC(),
    classification.BinaryMatthewsCorrCoef(),
    classification.BinaryPrecision(),
    classification.BinaryF1Score(),
    classification.BinarySpecificity(),
    classification.BinaryJaccardIndex(),
    ]).to(device)

In [10]:
regular_fingerprints = Fingerprint_Generator.Fingerprint_Lists().regular_fingerprints(abridged_set=True, abridged_count=2)

comparator = Fingerprint_Comparator.Pytorch_Train(df_train.iloc[:,0], df_train.iloc[:,1], DILI_model, 2048, metric_collection)

comparator_results, comparator_results_multiindex = comparator.regular_fingerprint(regular_fingerprints, k_folds=10, epochs=10)

comparator_results.reset_index(inplace=True)


-------------------------------------------------------------maccs-------------------------------------------------------------


  return torch.tensor([features], dtype=torch.float32), torch.tensor([labels], dtype=torch.float32)


Fold 1 final results after 10 epochs: Train Acc: 0.509 Train Loss: 0.667 (n = 2109) | Test Acc: 0.558 Test Loss: 0.658 (n = 235) 
Fold 2 final results after 10 epochs: Train Acc: 0.437 Train Loss: 0.648 (n = 2109) | Test Acc: 0.586 Test Loss: 0.674 (n = 235) 
Fold 3 final results after 10 epochs: Train Acc: 0.557 Train Loss: 0.653 (n = 2109) | Test Acc: 0.597 Test Loss: 0.663 (n = 235) 
Fold 4 final results after 10 epochs: Train Acc: 0.456 Train Loss: 0.659 (n = 2109) | Test Acc: 0.599 Test Loss: 0.657 (n = 235) 
Fold 5 final results after 10 epochs: Train Acc: 0.478 Train Loss: 0.666 (n = 2110) | Test Acc: 0.468 Test Loss: 0.674 (n = 234) 
Fold 6 final results after 10 epochs: Train Acc: 0.636 Train Loss: 0.648 (n = 2110) | Test Acc: 0.542 Test Loss: 0.658 (n = 234) 
Fold 7 final results after 10 epochs: Train Acc: 0.635 Train Loss: 0.665 (n = 2110) | Test Acc: 0.578 Test Loss: 0.641 (n = 234) 
Fold 8 final results after 10 epochs: Train Acc: 0.506 Train Loss: 0.651 (n = 2110) | Test

In [11]:
comparator_results.head(1)

Unnamed: 0,index,Fingerprint,Fold,test_BinaryAccuracy,test_BinaryAUROC,test_BinaryMatthewsCorrCoef,test_BinaryPrecision,test_BinaryF1Score,test_BinarySpecificity,test_BinaryJaccardIndex
0,0,maccs,0,0.558,0.473,0.647,0.732,0.294,0.637,0.675


In [None]:
mean_comparator_output, mean_score = Misc_Utils.get_average_score(comparator_results, "test_BinaryAccuracy", "Fingerprint", normalize_scores=True)

# comparator_results_means = comparator_results.iloc[:,2] - mean_score
# comparator_results_means = comparator_results_means.sort_values(ascending=False)
fp_plot = sns.barplot(
    data = mean_comparator_output,
    x='Fingerprint',
    y='test_BinaryAccuracy',
    hue="Fingerprint",
    errorbar=None,
    bottom=mean_score,
)


# The semicolons at the line end stop Seaborn printing a dataframe
fp_plot.xaxis.set_ticks(comparator_results["Fingerprint"])
# Prevent Seaborn error if x-axis ticks not explicitly defined before the next line is run
fp_plot.set_xticklabels(fp_plot.get_xticklabels(), rotation=40, ha="right");  # Rotate X-axis labels

In [None]:
list_of_scores = []

for fingerprint in regular_fingerprints:
    fp_and_acc = comparator_results.filter(["Fingerprint", "test_BinaryAccuracy"], axis=1)
    fp_and_acc = (fp_and_acc.query('Fingerprint == @fingerprint'))
    list_of_scores.append(fp_and_acc)


friedman_stat, friedman_p = friedmanchisquare(*[list_of_scores[x].iloc[:, 1] for x in range(len(list_of_scores))])
print(friedman_p)

In [None]:
import matplotlib.pyplot as plt


def plot_loss_curve(results: dict[str, list[float]]):
    train_loss = results["train_loss"]
    test_loss = results["test_loss"]

    # Plot loss
    plt.figure(figsize=(10, 6))
    
    plt.plot(train_loss, label="train_loss")
    plt.plot(test_loss, label="test_loss")
    
    plt.title("Loss vs Epochs")
    plt.ylabel("Loss")
    plt.xlabel("Epochs")
    plt.legend()


plot_loss_curve(model0_loss)

In [None]:
# from Modules import My_Pytorch_Utilities

# dummy_data = torch.rand([1, 2048])

# dummy_data.shape

# My_Pytorch_Utilities.save(model0, "DILIst", dummy_data.to(device))