### Training the final models

In [1]:
%matplotlib qt4
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from sklearn.metrics import roc_curve, auc,roc_auc_score,f1_score,confusion_matrix
import numpy as np
print (__doc__)

bare_bones="../preprocessed/amlodipine_bare_bones.csv"
no_blood="../preprocessed/amlodipine_no_blood.csv"
f = open(bare_bones)
f.readline()  # skip the header
data =  np.loadtxt(fname = f, delimiter = ',',dtype='double')
Y_bare_bones = data[:,0]
X_bare_bones = data[:, 1:data.shape[1]]

# Reading the labels now
f= open(no_blood)
f.readline()
data =  np.loadtxt(fname = f, delimiter = ',',dtype='double')
Y_no_blood = data[:,0]
X_no_blood = data[:, 1:data.shape[1]]


Automatically created module for IPython interactive environment


In [2]:
# Replacing [1,3] with [0,1] for Groups 1,3 to 0,1
Y_no_blood[Y_no_blood==1]=0
Y_no_blood[Y_no_blood==3]=1

Y_bare_bones[Y_bare_bones==1]=0
Y_bare_bones[Y_bare_bones==3]=1

#Normalizing to Zero Mean Unit Variance
mean = X_bare_bones.mean(axis=0)
std = X_bare_bones.std(axis=0)
X_bare_bones = (X_bare_bones - mean) / std

mean = X_no_blood.mean(axis=0)
std = X_no_blood.std(axis=0)
X_no_blood = (X_no_blood - mean) / std

In [3]:
# Defining Confusion Matrix
class_labels_=np.array(['Group 1','Group 3'])
def plot_confusion_matrix(cm,name,title='Confusion matrix', cmap=plt.cm.Blues):
    plt.imshow(cm, interpolation='nearest', cmap=cmap)
    plt.title(title)
    plt.colorbar()
    tick_marks = np.arange(len(class_labels_))
    plt.xticks(tick_marks,class_labels_, rotation=45)
    plt.yticks(tick_marks,class_labels_)
    plt.tight_layout()
    plt.ylabel('True label')
    plt.xlabel('Predicted label')

In [11]:
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap
from sklearn.cross_validation import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
from sklearn import linear_model

#Splitting the dataset into 30% test and 70% training set
X_train, X_test, y_train, y_test = train_test_split(X_no_blood, Y_no_blood, test_size=.3)

clf= linear_model.LogisticRegression()
clf.fit(X_train, y_train)
score = clf.score(X_test, y_test)
y_predic=clf.predict(X_test)
y_conf=clf.decision_function(X_test)
print 'min=',y_conf.min(axis=0),' and max= ',y_conf.max(axis=0)
y_conf = (y_conf - y_conf.min(axis=0)) / (y_conf.max(axis=0) - y_conf.min(axis=0))
print y_conf.min(axis=0)
print 'Logistic Regression Score: ',score
print 'The confidence factor for the test set : ',y_conf
print 'The classification result is : ',y_predic

min= -7.61614969163  and max=  8.95765210687
0.0
Logistic Regression Score:  1.0
The confidence factor for the test set :  [ 0.83567748  0.18728335  0.57486947  0.26708492  0.76476342  0.6304039   1.
  0.19602383  0.05905391  0.87046448  0.88693297  0.12253358  0.15826262
  0.76066571  0.74923676  0.54269308  0.15550521  0.01040882  0.79922237
  0.          0.10882229  0.94380791  0.77825771  0.50540674  0.13602786
  0.11016258  0.12829963  0.11937792  0.01371129  0.90206138  0.80695038
  0.01371129  0.69932157  0.60965646  0.74978204  0.10439653  0.8158721
  0.89433315  0.10439653  0.77847471  0.73392733]
The classification result is :  [ 1.  0.  1.  0.  1.  1.  1.  0.  0.  1.  1.  0.  0.  1.  1.  1.  0.  0.
  1.  0.  0.  1.  1.  1.  0.  0.  0.  0.  0.  1.  1.  0.  1.  1.  1.  0.
  1.  1.  0.  1.  1.]


**Saving the Trained Model**

In [5]:
from sklearn.externals import joblib
joblib.dump(clf, 'models/no_blood.pkl')

['no_blood.pkl',
 'no_blood.pkl_01.npy',
 'no_blood.pkl_02.npy',
 'no_blood.pkl_03.npy',
 'no_blood.pkl_04.npy']

In [6]:
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap
from sklearn.cross_validation import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.datasets import make_moons, make_circles, make_classification
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
from sklearn import linear_model

#Splitting the dataset into 30% test and 70% training set
X_train, X_test, y_train, y_test = train_test_split(X_bare_bones, Y_bare_bones, test_size=.3)

clf= SVC(probability=True)
clf.fit(X_train, y_train)
score = clf.score(X_test, y_test)
y_predic=clf.predict(X_test)
y_conf=clf.decision_function(X_test)    
y_conf = (y_conf - y_conf.min(axis=0)) / (y_conf.max(axis=0) - y_conf.min(axis=0))
print 'SVM Classification Score: ',score
print 'The confidence factor for the test set : ',y_conf
print 'The classification result is : ',y_predic

SVM Classification Score:  1.0
The confidence factor for the test set :  [ 0.86823894  0.06417253  0.07309179  0.89872246  0.75682218  0.05541037
  0.02220725  0.02432834  0.01028897  0.          0.03571522  0.89153526
  0.05847782  0.02893455  0.9785775   0.92531499  0.03141219  0.06033073
  0.06689777  0.89069221  0.06005167  0.03593348  0.70024955  0.89243608
  0.02087152  0.83218788  0.90971747  0.9373964   0.98840043  0.03661995
  0.05849149  0.91023813  0.05985323  0.87758812  0.88125954  0.12973968
  1.          0.99270369  0.96739011  0.05681649  0.92466511]
The classification result is :  [ 1.  0.  0.  1.  1.  0.  0.  0.  0.  0.  0.  1.  0.  0.  1.  1.  0.  0.
  0.  1.  0.  0.  1.  1.  0.  1.  1.  1.  1.  0.  0.  1.  0.  1.  1.  0.
  1.  1.  1.  0.  1.]


In [7]:
from sklearn.externals import joblib
joblib.dump(clf, 'models/bare_bones.pkl')

['bare_bones.pkl',
 'bare_bones.pkl_01.npy',
 'bare_bones.pkl_02.npy',
 'bare_bones.pkl_03.npy',
 'bare_bones.pkl_04.npy',
 'bare_bones.pkl_05.npy',
 'bare_bones.pkl_06.npy',
 'bare_bones.pkl_07.npy',
 'bare_bones.pkl_08.npy',
 'bare_bones.pkl_09.npy',
 'bare_bones.pkl_10.npy',
 'bare_bones.pkl_11.npy']

In [8]:
print X_no_blood.shape[1]

5


In [9]:
print X_bare_bones.shape[1]

4
