# Gridsearch SVC Tuning

#### Imports

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.model_selection import GridSearchCV, train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import classification_report
from sklearn.svm import SVC
from sklearn import svm

import warnings
warnings.filterwarnings('ignore')

## Data Imports

#### Morgan Fingerprint

In [2]:
cyp2c19_df = pd.read_pickle(".././data/cyp_datasets/cyp2c19.pkl")
cyp2c9_df = pd.read_pickle(".././data/cyp_datasets/cyp2c9.pkl")
cyp1a2_df = pd.read_pickle(".././data/cyp_datasets/cyp1a2.pkl")
cyp2d6_df = pd.read_pickle(".././data/cyp_datasets/cyp2d6.pkl")
cyp3a4_df = pd.read_pickle(".././data/cyp_datasets/cyp3a4.pkl")

#### SwissADME Feature Set

In [3]:
cyp2c19_swiss_feat = pd.read_pickle('.././data/cyp_datasets/cyp2c19_swiss_feat.pkl')
cyp2c9_swiss_feat = pd.read_pickle('.././data/cyp_datasets/cyp2c9_swiss_feat.pkl')
cyp2d6_swiss_feat = pd.read_pickle('.././data/cyp_datasets/cyp2d6_swiss_feat.pkl')
cyp1a2_swiss_feat = pd.read_pickle('.././data/cyp_datasets/cyp1a2_swiss_feat.pkl')
cyp3a4_swiss_feat = pd.read_pickle('.././data/cyp_datasets/cyp3a4_swiss_feat.pkl')

In [4]:
cyp2c19_df.head()

Unnamed: 0,index,PUBCHEM_ACTIVITY_OUTCOME,Inhibition Observed,0,1,2,3,4,5,6,...,54,55,56,57,58,59,60,61,62,63
0,5,2,1,1,1,1,0,1,1,0,...,1,1,0,0,0,0,0,1,0,0
1,10,2,1,1,0,0,0,1,1,0,...,0,0,0,1,1,1,0,0,1,0
2,15,1,1,0,0,0,0,0,0,0,...,1,1,1,0,1,0,0,1,0,0
3,20,1,1,0,0,0,0,0,1,0,...,1,0,1,1,1,0,0,1,0,1
4,25,1,1,1,1,1,1,1,0,0,...,0,1,1,0,1,1,0,1,0,1


## Gridsearch SVC Tuning

In [5]:
   
def svc_tune(swiss_feat):
    
    X = swiss_feat.drop(columns=['index', 'PUBCHEM_ACTIVITY_OUTCOME', 'Inhibition Observed'], axis=1)
    y = swiss_feat['Inhibition Observed']
    
        # Split the dataset in two equal parts
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.5, random_state=87)
    
    print(__doc__)

    # To apply an classifier on this data, we need to flatten the image, to
    # turn the data in a (samples, feature) matrix:
    n_samples = len(X_train)


    # Set the parameters by cross-validation
    tuned_parameters = [{'kernel': ['rbf'], 'gamma': [1e-3, 1e-4],
                         'C': [1, 10, 100, 1000]},
                        {'kernel': ['classification'], 'C': [1, 10, 100, 1000]}]

    scores = ['precision', 'recall']

    for score in scores:
        print("# Tuning hyper-parameters for %s" % score)
        print()

        clf = GridSearchCV(
            SVC(), tuned_parameters, scoring='%s_macro' % score,
            n_jobs=-1
        )
        clf.fit(X_train, y_train)

        print("Best parameters set found on development set:")
        print()
        print(clf.best_params_)
        print()
        print("Grid scores on development set:")
        print()
        means = clf.cv_results_['mean_test_score']
        stds = clf.cv_results_['std_test_score']
        for mean, std, params in zip(means, stds, clf.cv_results_['params']):
            print("%0.3f (+/-%0.03f) for %r"
                  % (mean, std * 2, params))
        print()

        print("Detailed classification report:")
        print()
        print("The model is trained on the full development set.")
        print("The scores are computed on the full evaluation set.")
        print()
        y_true, y_pred = y_test, clf.predict(X_test)
        print(classification_report(y_true, y_pred))
        print()
    return classification_report(y_true, y_pred)

### Morgan Fingerprint svc_tune

#### CYP2c9

In [6]:
cyp2c19_svc_tune = svc_tune(cyp2c19_df)
# cyp2c19_svc_tune.to_csv('././data/model_output/cyp2c19_svc_tune.csv')

Automatically created module for IPython interactive environment
# Tuning hyper-parameters for precision

Best parameters set found on development set:

{'C': 1, 'gamma': 0.001, 'kernel': 'rbf'}

Grid scores on development set:

0.474 (+/-0.000) for {'C': 1, 'gamma': 0.001, 'kernel': 'rbf'}
0.474 (+/-0.000) for {'C': 1, 'gamma': 0.0001, 'kernel': 'rbf'}
0.474 (+/-0.000) for {'C': 10, 'gamma': 0.001, 'kernel': 'rbf'}
0.474 (+/-0.000) for {'C': 10, 'gamma': 0.0001, 'kernel': 'rbf'}
0.474 (+/-0.000) for {'C': 100, 'gamma': 0.001, 'kernel': 'rbf'}
0.474 (+/-0.000) for {'C': 100, 'gamma': 0.0001, 'kernel': 'rbf'}
0.474 (+/-0.000) for {'C': 1000, 'gamma': 0.001, 'kernel': 'rbf'}
0.474 (+/-0.000) for {'C': 1000, 'gamma': 0.0001, 'kernel': 'rbf'}
nan (+/-nan) for {'C': 1, 'kernel': 'classification'}
nan (+/-nan) for {'C': 10, 'kernel': 'classification'}
nan (+/-nan) for {'C': 100, 'kernel': 'classification'}
nan (+/-nan) for {'C': 1000, 'kernel': 'classification'}

Detailed classification repo

In [7]:
type(cyp2c19_svc_tune)

str

#### CYP2c9

In [8]:
cyp2c9_svc_tune = svc_tune(cyp2c9_df)
# cyp2c9_svc_tune.to_pickle('././data/model_output/cyp2c9_svc_tune.pkl')

Automatically created module for IPython interactive environment
# Tuning hyper-parameters for precision

Best parameters set found on development set:

{'C': 1, 'gamma': 0.001, 'kernel': 'rbf'}

Grid scores on development set:

0.471 (+/-0.000) for {'C': 1, 'gamma': 0.001, 'kernel': 'rbf'}
0.471 (+/-0.000) for {'C': 1, 'gamma': 0.0001, 'kernel': 'rbf'}
0.471 (+/-0.000) for {'C': 10, 'gamma': 0.001, 'kernel': 'rbf'}
0.471 (+/-0.000) for {'C': 10, 'gamma': 0.0001, 'kernel': 'rbf'}
0.471 (+/-0.000) for {'C': 100, 'gamma': 0.001, 'kernel': 'rbf'}
0.471 (+/-0.000) for {'C': 100, 'gamma': 0.0001, 'kernel': 'rbf'}
0.471 (+/-0.000) for {'C': 1000, 'gamma': 0.001, 'kernel': 'rbf'}
0.471 (+/-0.000) for {'C': 1000, 'gamma': 0.0001, 'kernel': 'rbf'}
nan (+/-nan) for {'C': 1, 'kernel': 'classification'}
nan (+/-nan) for {'C': 10, 'kernel': 'classification'}
nan (+/-nan) for {'C': 100, 'kernel': 'classification'}
nan (+/-nan) for {'C': 1000, 'kernel': 'classification'}

Detailed classification repo

#### CYP2d6

In [9]:
cyp2d6_svc_tune = svc_tune(cyp3a4_df)
# cyp2d6_svc_tune.to_pickle('././data/model_output/cyp2d6_svc_tune.pkl')

Automatically created module for IPython interactive environment
# Tuning hyper-parameters for precision

Best parameters set found on development set:

{'C': 1, 'gamma': 0.001, 'kernel': 'rbf'}

Grid scores on development set:

0.471 (+/-0.000) for {'C': 1, 'gamma': 0.001, 'kernel': 'rbf'}
0.471 (+/-0.000) for {'C': 1, 'gamma': 0.0001, 'kernel': 'rbf'}
0.471 (+/-0.000) for {'C': 10, 'gamma': 0.001, 'kernel': 'rbf'}
0.471 (+/-0.000) for {'C': 10, 'gamma': 0.0001, 'kernel': 'rbf'}
0.471 (+/-0.000) for {'C': 100, 'gamma': 0.001, 'kernel': 'rbf'}
0.471 (+/-0.000) for {'C': 100, 'gamma': 0.0001, 'kernel': 'rbf'}
0.471 (+/-0.000) for {'C': 1000, 'gamma': 0.001, 'kernel': 'rbf'}
0.471 (+/-0.000) for {'C': 1000, 'gamma': 0.0001, 'kernel': 'rbf'}
nan (+/-nan) for {'C': 1, 'kernel': 'classification'}
nan (+/-nan) for {'C': 10, 'kernel': 'classification'}
nan (+/-nan) for {'C': 100, 'kernel': 'classification'}
nan (+/-nan) for {'C': 1000, 'kernel': 'classification'}

Detailed classification repo

#### CYP1a2

In [10]:
cyp1a2_svc_tune = svc_tune(cyp1a2_df)
# cyp1a2_svc_tune.to_pickle('././data/model_output/cyp1a2_svc_tune.pkl')

Automatically created module for IPython interactive environment
# Tuning hyper-parameters for precision

Best parameters set found on development set:

{'C': 1, 'gamma': 0.001, 'kernel': 'rbf'}

Grid scores on development set:

0.474 (+/-0.000) for {'C': 1, 'gamma': 0.001, 'kernel': 'rbf'}
0.474 (+/-0.000) for {'C': 1, 'gamma': 0.0001, 'kernel': 'rbf'}
0.474 (+/-0.000) for {'C': 10, 'gamma': 0.001, 'kernel': 'rbf'}
0.474 (+/-0.000) for {'C': 10, 'gamma': 0.0001, 'kernel': 'rbf'}
0.474 (+/-0.000) for {'C': 100, 'gamma': 0.001, 'kernel': 'rbf'}
0.474 (+/-0.000) for {'C': 100, 'gamma': 0.0001, 'kernel': 'rbf'}
0.474 (+/-0.000) for {'C': 1000, 'gamma': 0.001, 'kernel': 'rbf'}
0.474 (+/-0.000) for {'C': 1000, 'gamma': 0.0001, 'kernel': 'rbf'}
nan (+/-nan) for {'C': 1, 'kernel': 'classification'}
nan (+/-nan) for {'C': 10, 'kernel': 'classification'}
nan (+/-nan) for {'C': 100, 'kernel': 'classification'}
nan (+/-nan) for {'C': 1000, 'kernel': 'classification'}

Detailed classification repo

#### CYp3a4

In [11]:
cyp3a4_svc_tune = svc_tune(cyp3a4_df)
# cyp3a4_svc_tune.to_pickle('././data/model_output/cyp3a4_svc_tune.pkl')

Automatically created module for IPython interactive environment
# Tuning hyper-parameters for precision

Best parameters set found on development set:

{'C': 1, 'gamma': 0.001, 'kernel': 'rbf'}

Grid scores on development set:

0.471 (+/-0.000) for {'C': 1, 'gamma': 0.001, 'kernel': 'rbf'}
0.471 (+/-0.000) for {'C': 1, 'gamma': 0.0001, 'kernel': 'rbf'}
0.471 (+/-0.000) for {'C': 10, 'gamma': 0.001, 'kernel': 'rbf'}
0.471 (+/-0.000) for {'C': 10, 'gamma': 0.0001, 'kernel': 'rbf'}
0.471 (+/-0.000) for {'C': 100, 'gamma': 0.001, 'kernel': 'rbf'}
0.471 (+/-0.000) for {'C': 100, 'gamma': 0.0001, 'kernel': 'rbf'}
0.471 (+/-0.000) for {'C': 1000, 'gamma': 0.001, 'kernel': 'rbf'}
0.471 (+/-0.000) for {'C': 1000, 'gamma': 0.0001, 'kernel': 'rbf'}
nan (+/-nan) for {'C': 1, 'kernel': 'classification'}
nan (+/-nan) for {'C': 10, 'kernel': 'classification'}
nan (+/-nan) for {'C': 100, 'kernel': 'classification'}
nan (+/-nan) for {'C': 1000, 'kernel': 'classification'}

Detailed classification repo

In [12]:
   
def svc_tune_sw(swiss_feat):
    
    X = swiss_feat.drop(columns=['index', 'Panel Name', 'Inhibition Observed'], axis=1)
    y = swiss_feat['Inhibition Observed']
    
        # Split the dataset in two equal parts
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.2, random_state=895)
    
    print(__doc__)

    # To apply an classifier on this data, we need to flatten the image, to
    # turn the data in a (samples, feature) matrix:
    n_samples = len(X_train)


    # Set the parameters by cross-validation
    tuned_parameters = [{'kernel': ['rbf'], 'gamma': [1e-3, 1e-4],
                         'C': [1, 10, 100, 1000]},
                        {'kernel': ['classification'], 'C': [1, 10, 100, 1000]}]

    scores = ['precision', 'recall']

    for score in scores:
        print("# Tuning hyper-parameters for %s" % score)
        print()

        clf = GridSearchCV(
            SVC(), tuned_parameters, scoring='%s_macro' % score,
            n_jobs=-1
        )
        clf.fit(X_train, y_train)

        print("Best parameters set found on development set:")
        print()
        print(clf.best_params_)
        print()
        print("Grid scores on development set:")
        print()
        means = clf.cv_results_['mean_test_score']
        stds = clf.cv_results_['std_test_score']
        for mean, std, params in zip(means, stds, clf.cv_results_['params']):
            print("%0.3f (+/-%0.03f) for %r"
                  % (mean, std * 2, params))
        print()

        print("Detailed classification report:")
        print()
        print("The model is trained on the full development set.")
        print("The scores are computed on the full evaluation set.")
        print()
        y_true, y_pred = y_test, clf.predict(X_test)
        print(classification_report(y_true, y_pred))
        print()
    return classification_report(y_true, y_pred)

### SwissADME Features MC_NN

#### CYP2c19

In [13]:
cyp2c19sw_svc_tune = svc_tune_sw(cyp2c19_swiss_feat)
# cyp2c19sw_svc_tune.to_pickle('././data/model_output/cyp2c19sw_svc_tune.pkl')

Automatically created module for IPython interactive environment
# Tuning hyper-parameters for precision

Best parameters set found on development set:

{'C': 100, 'gamma': 0.001, 'kernel': 'rbf'}

Grid scores on development set:

0.469 (+/-0.000) for {'C': 1, 'gamma': 0.001, 'kernel': 'rbf'}
0.469 (+/-0.000) for {'C': 1, 'gamma': 0.0001, 'kernel': 'rbf'}
0.469 (+/-0.000) for {'C': 10, 'gamma': 0.001, 'kernel': 'rbf'}
0.469 (+/-0.000) for {'C': 10, 'gamma': 0.0001, 'kernel': 'rbf'}
0.494 (+/-0.054) for {'C': 100, 'gamma': 0.001, 'kernel': 'rbf'}
0.469 (+/-0.000) for {'C': 100, 'gamma': 0.0001, 'kernel': 'rbf'}
0.492 (+/-0.036) for {'C': 1000, 'gamma': 0.001, 'kernel': 'rbf'}
0.469 (+/-0.000) for {'C': 1000, 'gamma': 0.0001, 'kernel': 'rbf'}
nan (+/-nan) for {'C': 1, 'kernel': 'classification'}
nan (+/-nan) for {'C': 10, 'kernel': 'classification'}
nan (+/-nan) for {'C': 100, 'kernel': 'classification'}
nan (+/-nan) for {'C': 1000, 'kernel': 'classification'}

Detailed classification re

#### CYP2c9

In [14]:
cyp2c9sw_svc_tune = svc_tune_sw(cyp2c9_swiss_feat)
# cyp2c9sw_svc_tune.to_pickle('././data/model_output/cyp2c9sw_svc_tune.pkl')

Automatically created module for IPython interactive environment
# Tuning hyper-parameters for precision

Best parameters set found on development set:

{'C': 1000, 'gamma': 0.0001, 'kernel': 'rbf'}

Grid scores on development set:

0.470 (+/-0.001) for {'C': 1, 'gamma': 0.001, 'kernel': 'rbf'}
0.470 (+/-0.001) for {'C': 1, 'gamma': 0.0001, 'kernel': 'rbf'}
0.470 (+/-0.001) for {'C': 10, 'gamma': 0.001, 'kernel': 'rbf'}
0.470 (+/-0.001) for {'C': 10, 'gamma': 0.0001, 'kernel': 'rbf'}
0.492 (+/-0.051) for {'C': 100, 'gamma': 0.001, 'kernel': 'rbf'}
0.470 (+/-0.001) for {'C': 100, 'gamma': 0.0001, 'kernel': 'rbf'}
0.496 (+/-0.030) for {'C': 1000, 'gamma': 0.001, 'kernel': 'rbf'}
0.529 (+/-0.146) for {'C': 1000, 'gamma': 0.0001, 'kernel': 'rbf'}
nan (+/-nan) for {'C': 1, 'kernel': 'classification'}
nan (+/-nan) for {'C': 10, 'kernel': 'classification'}
nan (+/-nan) for {'C': 100, 'kernel': 'classification'}
nan (+/-nan) for {'C': 1000, 'kernel': 'classification'}

Detailed classification 

#### CYP2d6

In [15]:
cyp2d6sw_svc_tune = svc_tune_sw(cyp2d6_swiss_feat)
# cyp2d6sw_svc_tune.to_pickle('././data/model_output/cyp2d6sw_svc_tune.pkl')

Automatically created module for IPython interactive environment
# Tuning hyper-parameters for precision

Best parameters set found on development set:

{'C': 100, 'gamma': 0.001, 'kernel': 'rbf'}

Grid scores on development set:

0.469 (+/-0.000) for {'C': 1, 'gamma': 0.001, 'kernel': 'rbf'}
0.469 (+/-0.000) for {'C': 1, 'gamma': 0.0001, 'kernel': 'rbf'}
0.486 (+/-0.067) for {'C': 10, 'gamma': 0.001, 'kernel': 'rbf'}
0.469 (+/-0.000) for {'C': 10, 'gamma': 0.0001, 'kernel': 'rbf'}
0.503 (+/-0.032) for {'C': 100, 'gamma': 0.001, 'kernel': 'rbf'}
0.469 (+/-0.000) for {'C': 100, 'gamma': 0.0001, 'kernel': 'rbf'}
0.496 (+/-0.034) for {'C': 1000, 'gamma': 0.001, 'kernel': 'rbf'}
0.494 (+/-0.101) for {'C': 1000, 'gamma': 0.0001, 'kernel': 'rbf'}
nan (+/-nan) for {'C': 1, 'kernel': 'classification'}
nan (+/-nan) for {'C': 10, 'kernel': 'classification'}
nan (+/-nan) for {'C': 100, 'kernel': 'classification'}
nan (+/-nan) for {'C': 1000, 'kernel': 'classification'}

Detailed classification re

#### CYP1a2

In [16]:
cyp1a2sw_svc_tune = svc_tune_sw(cyp1a2_swiss_feat)
# cyp1a2sw_svc_tune.to_pickle('././data/model_output/cyp1a2sw_svc_tune.pkl')

Automatically created module for IPython interactive environment
# Tuning hyper-parameters for precision

Best parameters set found on development set:

{'C': 100, 'gamma': 0.0001, 'kernel': 'rbf'}

Grid scores on development set:

0.470 (+/-0.001) for {'C': 1, 'gamma': 0.001, 'kernel': 'rbf'}
0.470 (+/-0.001) for {'C': 1, 'gamma': 0.0001, 'kernel': 'rbf'}
0.490 (+/-0.080) for {'C': 10, 'gamma': 0.001, 'kernel': 'rbf'}
0.470 (+/-0.001) for {'C': 10, 'gamma': 0.0001, 'kernel': 'rbf'}
0.514 (+/-0.049) for {'C': 100, 'gamma': 0.001, 'kernel': 'rbf'}
0.570 (+/-0.401) for {'C': 100, 'gamma': 0.0001, 'kernel': 'rbf'}
0.495 (+/-0.038) for {'C': 1000, 'gamma': 0.001, 'kernel': 'rbf'}
0.495 (+/-0.101) for {'C': 1000, 'gamma': 0.0001, 'kernel': 'rbf'}
nan (+/-nan) for {'C': 1, 'kernel': 'classification'}
nan (+/-nan) for {'C': 10, 'kernel': 'classification'}
nan (+/-nan) for {'C': 100, 'kernel': 'classification'}
nan (+/-nan) for {'C': 1000, 'kernel': 'classification'}

Detailed classification r

#### CYp3a4

In [17]:
cyp3a4sw_svc_tune = svc_tune_sw(cyp3a4_swiss_feat)
# cyp3a4sw_svc_tune.to_pickle('././data/model_output/cyp3a4sw_svc_tune.pkl')

Automatically created module for IPython interactive environment
# Tuning hyper-parameters for precision

Best parameters set found on development set:

{'C': 100, 'gamma': 0.001, 'kernel': 'rbf'}

Grid scores on development set:

0.470 (+/-0.001) for {'C': 1, 'gamma': 0.001, 'kernel': 'rbf'}
0.470 (+/-0.001) for {'C': 1, 'gamma': 0.0001, 'kernel': 'rbf'}
0.487 (+/-0.067) for {'C': 10, 'gamma': 0.001, 'kernel': 'rbf'}
0.470 (+/-0.001) for {'C': 10, 'gamma': 0.0001, 'kernel': 'rbf'}
0.514 (+/-0.065) for {'C': 100, 'gamma': 0.001, 'kernel': 'rbf'}
0.470 (+/-0.001) for {'C': 100, 'gamma': 0.0001, 'kernel': 'rbf'}
0.513 (+/-0.040) for {'C': 1000, 'gamma': 0.001, 'kernel': 'rbf'}
0.470 (+/-0.001) for {'C': 1000, 'gamma': 0.0001, 'kernel': 'rbf'}
nan (+/-nan) for {'C': 1, 'kernel': 'classification'}
nan (+/-nan) for {'C': 10, 'kernel': 'classification'}
nan (+/-nan) for {'C': 100, 'kernel': 'classification'}
nan (+/-nan) for {'C': 1000, 'kernel': 'classification'}

Detailed classification re