# Gridsearch SVC Tuning

#### Imports

In [1]:
import numpy as np
import pandas as pd

from sklearn.model_selection import GridSearchCV, train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import classification_report
from sklearn.svm import SVC

import warnings
warnings.filterwarnings('ignore')

## Data Imports

#### Morgan Fingerprint

In [2]:
cyp2c19_df = pd.read_pickle("././data/cyp_datasets/cyp2c19.pkl")
cyp2c9_df = pd.read_pickle("././data/cyp_datasets/cyp2c9.pkl")
cyp1a2_df = pd.read_pickle("././data/cyp_datasets/cyp1a2.pkl")
cyp2d6_df = pd.read_pickle("././data/cyp_datasets/cyp2d6.pkl")
cyp3a4_df = pd.read_pickle("././data/cyp_datasets/cyp3a4.pkl")

#### SwissADME Feature Set

In [3]:
cyp2c19_swiss_feat = pd.read_pickle('././data/cyp_datasets/cyp2c19_swiss_feat.pkl')
cyp2c9_swiss_feat = pd.read_pickle('././data/cyp_datasets/cyp2c9_swiss_feat.pkl')
cyp2d6_swiss_feat = pd.read_pickle('././data/cyp_datasets/cyp2d6_swiss_feat.pkl')
cyp1a2_swiss_feat = pd.read_pickle('././data/cyp_datasets/cyp1a2_swiss_feat.pkl')
cyp3a4_swiss_feat = pd.read_pickle('././data/cyp_datasets/cyp3a4_swiss_feat.pkl')

## Gridsearch SVC Tuning

In [4]:
   
def svc_tune(swiss_feat):
    
    X = swiss_feat.drop(columns=['index', 'Inhibition Observed'], axis=1)
    y = swiss_feat['Inhibition Observed']
    
        # Split the dataset in two equal parts
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.5, random_state=0)
    
    print(__doc__)

    # To apply an classifier on this data, we need to flatten the image, to
    # turn the data in a (samples, feature) matrix:
    n_samples = len(X_train)


    # Set the parameters by cross-validation
    tuned_parameters = [{'kernel': ['rbf'], 'gamma': [1e-3, 1e-4],
                         'C': [1, 10, 100, 1000]},
                        {'kernel': ['linear'], 'C': [1, 10, 100, 1000]}]

    scores = ['precision', 'recall']

    for score in scores:
        print("# Tuning hyper-parameters for %s" % score)
        print()

        clf = GridSearchCV(
            SVC(), tuned_parameters, scoring='%s_macro' % score,
            n_jobs=-1
        )
        clf.fit(X_train, y_train)

        print("Best parameters set found on development set:")
        print()
        print(clf.best_params_)
        print()
        print("Grid scores on development set:")
        print()
        means = clf.cv_results_['mean_test_score']
        stds = clf.cv_results_['std_test_score']
        for mean, std, params in zip(means, stds, clf.cv_results_['params']):
            print("%0.3f (+/-%0.03f) for %r"
                  % (mean, std * 2, params))
        print()

        print("Detailed classification report:")
        print()
        print("The model is trained on the full development set.")
        print("The scores are computed on the full evaluation set.")
        print()
        y_true, y_pred = y_test, clf.predict(X_test)
        print(classification_report(y_true, y_pred))
        print()
    return classification_report(y_true, y_pred)

### Markov Fingerprint svc_tune

#### CYP2c9

In [5]:
cyp2c19_svc_tune = svc_tune(cyp2c19_df)
# cyp2c19_svc_tune.to_csv('././data/model_output/cyp2c19_svc_tune.csv')

Automatically created module for IPython interactive environment
# Tuning hyper-parameters for precision

Best parameters set found on development set:

{'C': 1, 'gamma': 0.001, 'kernel': 'rbf'}

Grid scores on development set:

0.467 (+/-0.001) for {'C': 1, 'gamma': 0.001, 'kernel': 'rbf'}
0.467 (+/-0.001) for {'C': 1, 'gamma': 0.0001, 'kernel': 'rbf'}
0.467 (+/-0.001) for {'C': 10, 'gamma': 0.001, 'kernel': 'rbf'}
0.467 (+/-0.001) for {'C': 10, 'gamma': 0.0001, 'kernel': 'rbf'}
0.467 (+/-0.001) for {'C': 100, 'gamma': 0.001, 'kernel': 'rbf'}
0.467 (+/-0.001) for {'C': 100, 'gamma': 0.0001, 'kernel': 'rbf'}
0.467 (+/-0.001) for {'C': 1000, 'gamma': 0.001, 'kernel': 'rbf'}
0.467 (+/-0.001) for {'C': 1000, 'gamma': 0.0001, 'kernel': 'rbf'}
0.467 (+/-0.001) for {'C': 1, 'kernel': 'linear'}
0.467 (+/-0.001) for {'C': 10, 'kernel': 'linear'}
0.467 (+/-0.001) for {'C': 100, 'kernel': 'linear'}
0.467 (+/-0.001) for {'C': 1000, 'kernel': 'linear'}

Detailed classification report:

The model i

In [6]:
type(cyp2c19_svc_tune)

str

#### CYP2c9

In [7]:
cyp2c9_svc_tune = svc_tune(cyp2c9_df)
# cyp2c9_svc_tune.to_pickle('././data/model_output/cyp2c9_svc_tune.pkl')

Automatically created module for IPython interactive environment
# Tuning hyper-parameters for precision

Best parameters set found on development set:

{'C': 1, 'gamma': 0.001, 'kernel': 'rbf'}

Grid scores on development set:

0.468 (+/-0.001) for {'C': 1, 'gamma': 0.001, 'kernel': 'rbf'}
0.468 (+/-0.001) for {'C': 1, 'gamma': 0.0001, 'kernel': 'rbf'}
0.468 (+/-0.001) for {'C': 10, 'gamma': 0.001, 'kernel': 'rbf'}
0.468 (+/-0.001) for {'C': 10, 'gamma': 0.0001, 'kernel': 'rbf'}
0.468 (+/-0.001) for {'C': 100, 'gamma': 0.001, 'kernel': 'rbf'}
0.468 (+/-0.001) for {'C': 100, 'gamma': 0.0001, 'kernel': 'rbf'}
0.468 (+/-0.001) for {'C': 1000, 'gamma': 0.001, 'kernel': 'rbf'}
0.468 (+/-0.001) for {'C': 1000, 'gamma': 0.0001, 'kernel': 'rbf'}
0.468 (+/-0.001) for {'C': 1, 'kernel': 'linear'}
0.468 (+/-0.001) for {'C': 10, 'kernel': 'linear'}
0.468 (+/-0.001) for {'C': 100, 'kernel': 'linear'}
0.468 (+/-0.001) for {'C': 1000, 'kernel': 'linear'}

Detailed classification report:

The model i

#### CYP2d6

In [8]:
cyp2d6_svc_tune = svc_tune(cyp3a4_df)
# cyp2d6_svc_tune.to_pickle('././data/model_output/cyp2d6_svc_tune.pkl')

Automatically created module for IPython interactive environment
# Tuning hyper-parameters for precision

Best parameters set found on development set:

{'C': 1000, 'gamma': 0.001, 'kernel': 'rbf'}

Grid scores on development set:

0.468 (+/-0.001) for {'C': 1, 'gamma': 0.001, 'kernel': 'rbf'}
0.468 (+/-0.001) for {'C': 1, 'gamma': 0.0001, 'kernel': 'rbf'}
0.468 (+/-0.001) for {'C': 10, 'gamma': 0.001, 'kernel': 'rbf'}
0.468 (+/-0.001) for {'C': 10, 'gamma': 0.0001, 'kernel': 'rbf'}
0.468 (+/-0.001) for {'C': 100, 'gamma': 0.001, 'kernel': 'rbf'}
0.468 (+/-0.001) for {'C': 100, 'gamma': 0.0001, 'kernel': 'rbf'}
0.540 (+/-0.136) for {'C': 1000, 'gamma': 0.001, 'kernel': 'rbf'}
0.468 (+/-0.001) for {'C': 1000, 'gamma': 0.0001, 'kernel': 'rbf'}
0.468 (+/-0.001) for {'C': 1, 'kernel': 'linear'}
0.500 (+/-0.078) for {'C': 10, 'kernel': 'linear'}
0.523 (+/-0.103) for {'C': 100, 'kernel': 'linear'}
0.531 (+/-0.105) for {'C': 1000, 'kernel': 'linear'}

Detailed classification report:

The mode

#### CYP1a2

In [9]:
cyp1a2_svc_tune = svc_tune(cyp1a2_df)
# cyp1a2_svc_tune.to_pickle('././data/model_output/cyp1a2_svc_tune.pkl')

Automatically created module for IPython interactive environment
# Tuning hyper-parameters for precision

Best parameters set found on development set:

{'C': 1, 'gamma': 0.001, 'kernel': 'rbf'}

Grid scores on development set:

0.467 (+/-0.001) for {'C': 1, 'gamma': 0.001, 'kernel': 'rbf'}
0.467 (+/-0.001) for {'C': 1, 'gamma': 0.0001, 'kernel': 'rbf'}
0.467 (+/-0.001) for {'C': 10, 'gamma': 0.001, 'kernel': 'rbf'}
0.467 (+/-0.001) for {'C': 10, 'gamma': 0.0001, 'kernel': 'rbf'}
0.467 (+/-0.001) for {'C': 100, 'gamma': 0.001, 'kernel': 'rbf'}
0.467 (+/-0.001) for {'C': 100, 'gamma': 0.0001, 'kernel': 'rbf'}
0.467 (+/-0.001) for {'C': 1000, 'gamma': 0.001, 'kernel': 'rbf'}
0.467 (+/-0.001) for {'C': 1000, 'gamma': 0.0001, 'kernel': 'rbf'}
0.467 (+/-0.001) for {'C': 1, 'kernel': 'linear'}
0.467 (+/-0.001) for {'C': 10, 'kernel': 'linear'}
0.467 (+/-0.001) for {'C': 100, 'kernel': 'linear'}
0.467 (+/-0.001) for {'C': 1000, 'kernel': 'linear'}

Detailed classification report:

The model i

#### CYp3a4

In [10]:
cyp3a4_svc_tune = svc_tune(cyp3a4_df)
# cyp3a4_svc_tune.to_pickle('././data/model_output/cyp3a4_svc_tune.pkl')

Automatically created module for IPython interactive environment
# Tuning hyper-parameters for precision

Best parameters set found on development set:

{'C': 1000, 'gamma': 0.001, 'kernel': 'rbf'}

Grid scores on development set:

0.468 (+/-0.001) for {'C': 1, 'gamma': 0.001, 'kernel': 'rbf'}
0.468 (+/-0.001) for {'C': 1, 'gamma': 0.0001, 'kernel': 'rbf'}
0.468 (+/-0.001) for {'C': 10, 'gamma': 0.001, 'kernel': 'rbf'}
0.468 (+/-0.001) for {'C': 10, 'gamma': 0.0001, 'kernel': 'rbf'}
0.468 (+/-0.001) for {'C': 100, 'gamma': 0.001, 'kernel': 'rbf'}
0.468 (+/-0.001) for {'C': 100, 'gamma': 0.0001, 'kernel': 'rbf'}
0.540 (+/-0.136) for {'C': 1000, 'gamma': 0.001, 'kernel': 'rbf'}
0.468 (+/-0.001) for {'C': 1000, 'gamma': 0.0001, 'kernel': 'rbf'}
0.468 (+/-0.001) for {'C': 1, 'kernel': 'linear'}
0.500 (+/-0.078) for {'C': 10, 'kernel': 'linear'}
0.523 (+/-0.103) for {'C': 100, 'kernel': 'linear'}
0.531 (+/-0.105) for {'C': 1000, 'kernel': 'linear'}

Detailed classification report:

The mode

### SwissADME Features MC_NN

#### CYP2c19

In [11]:
cyp2c19sw_svc_tune = svc_tune(cyp2c19_swiss_feat)
# cyp2c19sw_svc_tune.to_pickle('././data/model_output/cyp2c19sw_svc_tune.pkl')

Automatically created module for IPython interactive environment
# Tuning hyper-parameters for precision

Best parameters set found on development set:

{'C': 1000, 'gamma': 0.0001, 'kernel': 'rbf'}

Grid scores on development set:

0.467 (+/-0.001) for {'C': 1, 'gamma': 0.001, 'kernel': 'rbf'}
0.467 (+/-0.001) for {'C': 1, 'gamma': 0.0001, 'kernel': 'rbf'}
0.467 (+/-0.002) for {'C': 10, 'gamma': 0.001, 'kernel': 'rbf'}
0.467 (+/-0.001) for {'C': 10, 'gamma': 0.0001, 'kernel': 'rbf'}
0.487 (+/-0.037) for {'C': 100, 'gamma': 0.001, 'kernel': 'rbf'}
0.467 (+/-0.001) for {'C': 100, 'gamma': 0.0001, 'kernel': 'rbf'}
0.485 (+/-0.035) for {'C': 1000, 'gamma': 0.001, 'kernel': 'rbf'}
0.492 (+/-0.100) for {'C': 1000, 'gamma': 0.0001, 'kernel': 'rbf'}
0.467 (+/-0.001) for {'C': 1, 'kernel': 'linear'}
0.467 (+/-0.001) for {'C': 10, 'kernel': 'linear'}
0.467 (+/-0.001) for {'C': 100, 'kernel': 'linear'}
0.467 (+/-0.001) for {'C': 1000, 'kernel': 'linear'}

Detailed classification report:

The mod

#### CYP2c9

In [12]:
cyp2c9sw_svc_tune = svc_tune(cyp2c9_swiss_feat)
# cyp2c9sw_svc_tune.to_pickle('././data/model_output/cyp2c9sw_svc_tune.pkl')

Automatically created module for IPython interactive environment
# Tuning hyper-parameters for precision

Best parameters set found on development set:

{'C': 10, 'gamma': 0.0001, 'kernel': 'rbf'}

Grid scores on development set:

0.468 (+/-0.001) for {'C': 1, 'gamma': 0.001, 'kernel': 'rbf'}
0.468 (+/-0.001) for {'C': 1, 'gamma': 0.0001, 'kernel': 'rbf'}
0.540 (+/-0.135) for {'C': 10, 'gamma': 0.001, 'kernel': 'rbf'}
0.569 (+/-0.402) for {'C': 10, 'gamma': 0.0001, 'kernel': 'rbf'}
0.515 (+/-0.066) for {'C': 100, 'gamma': 0.001, 'kernel': 'rbf'}
0.519 (+/-0.202) for {'C': 100, 'gamma': 0.0001, 'kernel': 'rbf'}
0.493 (+/-0.030) for {'C': 1000, 'gamma': 0.001, 'kernel': 'rbf'}
0.468 (+/-0.002) for {'C': 1000, 'gamma': 0.0001, 'kernel': 'rbf'}
0.468 (+/-0.001) for {'C': 1, 'kernel': 'linear'}
0.468 (+/-0.001) for {'C': 10, 'kernel': 'linear'}
0.468 (+/-0.001) for {'C': 100, 'kernel': 'linear'}
0.468 (+/-0.001) for {'C': 1000, 'kernel': 'linear'}

Detailed classification report:

The model

#### CYP2d6

In [13]:
cyp2d6sw_svc_tune = svc_tune(cyp2d6_swiss_feat)
# cyp2d6sw_svc_tune.to_pickle('././data/model_output/cyp2d6sw_svc_tune.pkl')

Automatically created module for IPython interactive environment
# Tuning hyper-parameters for precision

Best parameters set found on development set:

{'C': 10, 'gamma': 0.001, 'kernel': 'rbf'}

Grid scores on development set:

0.467 (+/-0.001) for {'C': 1, 'gamma': 0.001, 'kernel': 'rbf'}
0.467 (+/-0.001) for {'C': 1, 'gamma': 0.0001, 'kernel': 'rbf'}
0.517 (+/-0.201) for {'C': 10, 'gamma': 0.001, 'kernel': 'rbf'}
0.467 (+/-0.001) for {'C': 10, 'gamma': 0.0001, 'kernel': 'rbf'}
0.510 (+/-0.072) for {'C': 100, 'gamma': 0.001, 'kernel': 'rbf'}
0.467 (+/-0.001) for {'C': 100, 'gamma': 0.0001, 'kernel': 'rbf'}
0.505 (+/-0.039) for {'C': 1000, 'gamma': 0.001, 'kernel': 'rbf'}
0.513 (+/-0.131) for {'C': 1000, 'gamma': 0.0001, 'kernel': 'rbf'}
0.467 (+/-0.001) for {'C': 1, 'kernel': 'linear'}
0.467 (+/-0.001) for {'C': 10, 'kernel': 'linear'}
0.467 (+/-0.001) for {'C': 100, 'kernel': 'linear'}
0.467 (+/-0.001) for {'C': 1000, 'kernel': 'linear'}

Detailed classification report:

The model 

#### CYP1a2

In [None]:
cyp1a2sw_svc_tune = svc_tune(cyp1a2_swiss_feat)
# cyp1a2sw_svc_tune.to_pickle('././data/model_output/cyp1a2sw_svc_tune.pkl')

Automatically created module for IPython interactive environment
# Tuning hyper-parameters for precision



#### CYp3a4

In [None]:
cyp3a4sw_svc_tune = svc_tune(cyp3a4_swiss_feat)
# cyp3a4sw_svc_tune.to_pickle('././data/model_output/cyp3a4sw_svc_tune.pkl')