<a href="https://colab.research.google.com/github/shreekar2005/Cheminformatics_DC/blob/main/DC2_their_RF.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
import numpy as np
import pandas as pd
from scipy import io
from sklearn.metrics import classification_report, confusion_matrix, roc_auc_score
from sklearn.ensemble import RandomForestClassifier
from tabulate import tabulate

# Load data
y_tr = pd.read_csv('drive/MyDrive/DATASET/tox21_labels_train.csv.gz', index_col=0, compression="gzip")
y_te = pd.read_csv('drive/MyDrive/DATASET/tox21_labels_test.csv.gz', index_col=0, compression="gzip")
x_tr_dense = pd.read_csv('drive/MyDrive/DATASET/tox21_dense_train.csv.gz', index_col=0, compression="gzip").values
x_te_dense = pd.read_csv('drive/MyDrive/DATASET/tox21_dense_test.csv.gz', index_col=0, compression="gzip").values
x_tr_sparse = io.mmread('drive/MyDrive/DATASET/tox21_sparse_train.mtx.gz').tocsc()
x_te_sparse = io.mmread('drive/MyDrive/DATASET/tox21_sparse_test.mtx.gz').tocsc()

# Filter out very sparse features
sparse_col_idx = ((x_tr_sparse > 0).mean(0) > 0.05).A.ravel()
x_tr = np.hstack([x_tr_dense, x_tr_sparse[:, sparse_col_idx].toarray()])
x_te = np.hstack([x_te_dense, x_te_sparse[:, sparse_col_idx].toarray()])

# Results storage
results_table = []
macro_precisions = []
macro_recalls = []
macro_f1s = []
weighted_precisions = []
weighted_recalls = []
weighted_f1s = []
total_support = 0

print("\n" + "-"*60)
print("Final Classification Report for Selected Assays (Class 1 only):")
print("-"*60)

# Loop over each assay
for i, target in enumerate(y_tr.columns):
    print(f"training of {i} : {target}")
    rows_tr = np.isfinite(y_tr[target]).values
    rows_te = np.isfinite(y_te[target]).values

    rf = RandomForestClassifier(n_estimators=100, n_jobs=4)
    rf.fit(x_tr[rows_tr], y_tr[target][rows_tr])

    preds = rf.predict(x_te[rows_te])
    true_labels = y_te[target][rows_te].values

    # Evaluate only if positives exist in both predictions and labels
    if np.sum(preds) == 0 and np.sum(true_labels) == 0:
        continue
    elif np.sum(preds) == 0 or np.sum(true_labels) == 0:
        continue

    tn, fp, fn, tp = confusion_matrix(true_labels, preds).ravel()
    support_1 = fn + tp
    support_total = tn + fp + fn + tp

    # Metrics for class 1
    precision_1 = tp / (tp + fp) if (tp + fp) > 0 else 0.0
    recall_1 = tp / (tp + fn) if (tp + fn) > 0 else 0.0
    f1_1 = (2 * precision_1 * recall_1) / (precision_1 + recall_1) if (precision_1 + recall_1) > 0 else 0.0

    results_table.append([
        f"Assay {i+1}: {target}", f"{precision_1:.4f}", f"{recall_1:.4f}", f"{f1_1:.4f}", support_total,
        f"{tn}", f"{fp}", f"{fn}", f"{tp}"
    ])

    # For average computation
    macro_precisions.append(precision_1)
    macro_recalls.append(recall_1)
    macro_f1s.append(f1_1)

    weighted_precisions.append(precision_1 * support_1)
    weighted_recalls.append(recall_1 * support_1)
    weighted_f1s.append(f1_1 * support_1)
    total_support += support_1

# Display table
headers = ["Assay", "Precision", "Recall", "F1-Score", "Support", "TN", "FP", "FN", "TP"]
print(tabulate(results_table, headers=headers, tablefmt="fancy_grid"))

# Macro and weighted avg
summary_table = [[
    "Macro Avg", f"{np.mean(macro_precisions):.6f}", f"{np.mean(macro_recalls):.6f}",
    f"{np.mean(macro_f1s):.6f}"
], [
    "Weighted Avg", f"{np.sum(weighted_precisions)/total_support:.6f}",
    f"{np.sum(weighted_recalls)/total_support:.6f}",
    f"{np.sum(weighted_f1s)/total_support:.6f}"
]]
print("\n" + tabulate(summary_table, headers=["Metric", "Precision", "Recall", "F1-Score"], tablefmt="fancy_grid"))



------------------------------------------------------------
Final Classification Report for Selected Assays (Class 1 only):
------------------------------------------------------------
training of 0 : NR.AhR
training of 1 : NR.AR
training of 2 : NR.AR.LBD
training of 3 : NR.Aromatase
training of 4 : NR.ER
training of 5 : NR.ER.LBD
training of 6 : NR.PPAR.gamma
training of 7 : SR.ARE
training of 8 : SR.ATAD5
training of 9 : SR.HSE
training of 10 : SR.MMP
training of 11 : SR.p53
╒═══════════════════════╤═════════════╤══════════╤════════════╤═══════════╤══════╤══════╤══════╤══════╕
│ Assay                 │   Precision │   Recall │   F1-Score │   Support │   TN │   FP │   FN │   TP │
╞═══════════════════════╪═════════════╪══════════╪════════════╪═══════════╪══════╪══════╪══════╪══════╡
│ Assay 1: NR.AhR       │      0.7097 │   0.3014 │     0.4231 │       610 │  528 │    9 │   51 │   22 │
├───────────────────────┼─────────────┼──────────┼────────────┼───────────┼──────┼──────┼──────┼────