# Figure 1: metrics from final trained model - ROC, PR curves

Run all cells to test on befree and sven genes, save figs

In [23]:
import os
import pickle as pkl
import pandas as pd
from sklearn.metrics import *
import matplotlib.pyplot as plt




# make files for each positive set
if not os.path.exists('befree'):
    os.makedirs('befree')
if not os.path.exists('sven'):
    os.makedirs('sven')

## 1. Load trained model

In [19]:
nash_svc = pkl.load(open('../results/final_model/nash_SVC_trained.pkl', 'rb'))
feature_selector = nash_svc.skb
model = nash_svc.svc

## 2. Define functions for testing

In [25]:
def evaluate(svc, test_X, test_y, save_path):
    """
    Calculates metrics for svc on test set
    :param svc:
    :param test_X:
    :param test_y:
    :param save_path:
    """
    predicted_y = svc.predict(test_X)
    report = pd.DataFrame.from_dict(classification_report(test_y, predicted_y, output_dict=True)).T
    print(report)
    report.to_csv(save_path + '/classification_report.csv')

    plot_roc_curve(svc, test_X, test_y)
    plt.title('ROC')
    plt.plot([0, 1], [0, 1], 'b--')
    plt.savefig(save_path + '/roc.png')
    plt.clf()
    plt.close()
    plt.show()

    plot_precision_recall_curve(svc, test_X, test_y)
    plt.plot([0, 1], [1, 0], 'b--')
    plt.title('PR')
    plt.savefig(save_path + '/PR.png')
    plt.clf()
    plt.close()
    plt.show()

    plot_confusion_matrix(svc, test_X, test_y)
    plt.savefig(save_path + '/confusion_matrix.png')
    plt.clf()
    plt.close()

In [21]:
def get_test_set(nash_svc, pos_genes):
    test_X = nash_svc.dataset.loc[pos_genes + nash_svc.neg_test_genes, ]
    test_y = [1] * len(pos_genes) + [0] * len(nash_svc.neg_test_genes)
    return test_X, test_y

## 3. Test on befree genes and save

In [24]:
befree_genes = nash_svc.M.load_befree_genes(threshold=0)  
test_X, test_y = get_test_set(nash_svc, befree_genes)
evaluate(model, test_X, test_y, 'befree')

              precision    recall  f1-score     support
0              0.615942  0.850000  0.714286  200.000000
1              0.870690  0.655844  0.748148  308.000000
accuracy       0.732283  0.732283  0.732283    0.732283
macro avg      0.743316  0.752922  0.731217  508.000000
weighted avg   0.770395  0.732283  0.734816  508.000000


## 4. Test on svensson genes and save

In [None]:
sven_genes = nash_svc.M.load_svensson_genes(threshold=0)  
test_X, test_y = get_test_set(nash_svc, sven_genes)
evaluate(model, test_X, test_y, 'sven')

Modules.py                 refined_model.py
[34m__pycache__[m[m                roc_thresholds_118sven.csv
benchmark.py               train_final_model.ipynb
module_params.ipynb
