In [7]:
# Package imports
import matplotlib.pyplot as plt
import numpy as np
import sklearn
import sklearn.datasets
import sklearn.linear_model
import matplotlib
import numpy as np
import pandas as pd
from sklearn import metrics
from sklearn.metrics import roc_curve
from sklearn import preprocessing
from sklearn.linear_model import (LinearRegression, Ridge,Lasso, RandomizedLasso)
from sklearn.feature_selection import RFE, f_regression
from sklearn.preprocessing import MinMaxScaler
from sklearn.ensemble import RandomForestRegressor
from sklearn import datasets
from pybrain.utilities import percentError
from pybrain.tools.shortcuts import buildNetwork
from pybrain.supervised.trainers import BackpropTrainer
from pybrain.structure.modules import SoftmaxLayer
from pybrain.datasets.classification import ClassificationDataSet
from pybrain.tools.validation import Validator
from sklearn.metrics import confusion_matrix

def featureSelectionRFE():
    from sklearn.feature_selection import RFE
    from sklearn.linear_model import LogisticRegression
    model = LogisticRegression()
    # create the RFE model and select 3 attributes
    rfe = RFE(model, 10)
    rfe = rfe.fit(train_data[0:,1:], train_data[0:,0])
    # summarize the selection of the attributes
    print(rfe.support_)
    print(rfe.ranking_)
    print(rfe.n_features_)
    #Check the accuracy of the model
    rfe.score(train_data[0:,1:], train_data[0:,0])
    
def labelConversion(df):
# Discreet value integer encoder
    label_encoder = preprocessing.LabelEncoder()
    df['drugname'] = label_encoder.fit_transform(df['drugname'])
    df['route'] = label_encoder.fit_transform(df['route'])
    df['dose_unit'] = label_encoder.fit_transform(df['dose_unit'])
    df['dose_form'] = label_encoder.fit_transform(df['dose_form'])
    df['dose_freq'] = label_encoder.fit_transform(df['dose_freq'])
    df['mfr_sndr'] = label_encoder.fit_transform(df['mfr_sndr'])
    df['pt'] = label_encoder.fit_transform(df['pt'])
    df['outc_cod'] = label_encoder.fit_transform(df['outc_cod'])
    df['dose_amt'] = label_encoder.fit_transform(df['dose_amt'])
    return df    
    
    
def createDataFrame(str):
    perf_df = pd.read_csv(str ,sep=",",skipinitialspace=True,error_bad_lines=False, index_col=False, dtype='unicode')
    return perf_df  

# build a neural network
def build_neural_network(train_num_df_X, train_y, test_num_df_X, test_y ):
 
    #Calculating rows and columns for input dfs
    trn_rows,trn_cols=train_num_df_X.shape
    tst_rows,tst_cols=test_num_df_X.shape
       
    # build train dataset
    print("Inside build_neural_network : ")
    print("Building train dataset")
    train_data = ClassificationDataSet(trn_cols, 1 , nb_classes=7)
    for k in range(len(train_num_df_X)): 
        train_data.addSample(train_num_df_X.iloc[k],train_y.iloc[k]) 
    
    # build test dataset
    print("Building test dataset")
    test_data = ClassificationDataSet(tst_cols, 1 , nb_classes=7)
    for k in range(len(test_num_df_X)): 
        test_data.addSample(test_num_df_X.iloc[k],test_y.iloc[k])
        
 
    print("Train Dataset input length: {}".format(len(train_data['input'])))
    print("Train Dataset input|output dimensions are {}|{}".format(train_data.indim, train_data.outdim))
     
    print("Train Data length: {}".format(len(train_data)))
    print("Test Data length: {}".format(len(test_data)))
 
    # encode with one output neuron per class
    train_data._convertToOneOfMany()
    test_data._convertToOneOfMany()
 
    print("Train Data input|output dimensions are {}|{}".format(train_data.indim, train_data.outdim))
    print("Test Data input|output dimensions are {}|{}".format(test_data.indim, test_data.outdim))
 
    # build network (INPUT=10,HIDDEN=5,CLASSES=2,outclass=SoftmaxLayer)
    print("Building Neural network with 5 hidden layer")
    network = buildNetwork(train_data.indim,100,train_data.outdim,outclass=SoftmaxLayer)
 
    # train network
    print("Training the network, it may take a while(20-30 min)...")
    trainer = BackpropTrainer(network,dataset=train_data,momentum=0.1,verbose=True,weightdecay=0.01)
    trainer.trainOnDataset(train_data, 10) #training model on One epoch
 
    print("Total epochs: {}".format(trainer.totalepochs))
 
    # test network
    print("Predicting the output array with the trained model")
    output = network.activateOnDataset(test_data).argmax(axis=1)
     
    #Neural network Percent error and accuracy    
    print("Percent error: {}".format(percentError(output, test_data['class'])))
    accuracy=Validator.classificationPerformance(output, test_y)
    print("Model Accuracy: {}".format(accuracy))
    print("Classification report for Test data %s:\n%s\n"% (network, metrics.classification_report(test_y, output)))
    
    #Compute confusion metrics
    cm = confusion_matrix(test_y,output)
    print(cm)
   




In [8]:
#Creating DataFrame
print("Creating Test Dataframe...")
main_df= createDataFrame("../dockerImage/MergedFile_.csv")
print("Creating Training and Test Dataframe...")
train_df=main_df.sample(frac=0.8,random_state=200)
test_df =main_df.drop(train_df.index)

Creating Test Dataframe...
Creating Training and Test Dataframe...


In [9]:
#Setting the input parameter for Neural Network
print("#Setting the input parameter for Neural Network...")
cols_to_keep=['drugname','route','dose_amt','dose_unit','dose_form','dose_freq','mfr_sndr','pt']

#Setting the input parameter for Neural Network...


In [10]:
print("Checking all required columns in train and test dataframes")
train_num_df=labelConversion(train_df)
test_num_df= labelConversion(test_df)

Checking all required columns in train and test dataframes


In [11]:
#Setting the input parameter for Neural Network
print("Creating X and y variables for Train and Test Dataframes")
train_num_df_X = train_num_df[cols_to_keep]
outcome_num_train_y = train_num_df['outc_cod']

test_num_df_X = test_num_df[cols_to_keep]
outcome_num_test_y = test_num_df['outc_cod']

Creating X and y variables for Train and Test Dataframes


In [12]:
#Calling Neural Network
print("Calling neural network with train and test dataframes")
build_neural_network(train_num_df_X, outcome_num_train_y, test_num_df_X, outcome_num_test_y)

Calling neural network with train and test dataframes
Inside build_neural_network : 
Building train dataset
Building test dataset
Train Dataset input length: 297522
Train Dataset input|output dimensions are 8|1
Train Data length: 297522
Test Data length: 74380
Train Data input|output dimensions are 8|7
Test Data input|output dimensions are 8|7
Building Neural network with 5 hidden layer
Training the network, it may take a while(20-30 min)...
Total error:  0.0508140061168
Total error:  0.0508040353009
Total error:  0.0506605201272
Total error:  0.0507604312534
Total error:  0.0505839244297
Total error:  0.050685096024
Total error:  0.0505912943743
Total error:  0.0507837223023
Total error:  0.0507016171197
Total error:  0.0506183643558
Total epochs: 10
Predicting the output array with the trained model
Percent error: 66.27184727077172
Model Accuracy: 0.3372815272922829
Classification report for Test data FeedForwardNetwork-17
   Modules:
    [<BiasUnit 'bias'>, <LinearLayer 'in'>, <Sigm

  'precision', 'predicted', average, warn_for)
