In [4]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns


from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
from sklearn.metrics import roc_curve
from sklearn.metrics import roc_auc_score
from sklearn.metrics import f1_score
from sklearn.metrics import accuracy_score
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import cross_val_predict
from sklearn.model_selection import cross_validate
from sklearn.model_selection import GridSearchCV

## Load data

In [None]:
data = pd.read_csv('MA_Features.csv', encoding= 'utf-8')
data.info()

## Model analysis functions

In [5]:
def rocPlot(modelName, y_test, y_pred_prob):
    fpr, tpr, thresholds = roc_curve(y_test, y_pred_prob)
    plt.plot([0, 1], [0, 1], 'k--')
    plt.plot(fpr, tpr, label=modelName)
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    plt.title(modelName + ' ROC Curve')
    plt.show();
    print('AUC: ', roc_auc_score(y_test, y_pred_prob))

In [6]:
import itertools

def plot_confusion_matrix(cm, classes,
                          normalize=False,
                          title='Confusion matrix',
                          cmap=plt.cm.Blues):
    """
    This function prints and plots the confusion matrix.
    Normalization can be applied by setting `normalize=True`.
    """
    if normalize:
        cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
        print("Normalized confusion matrix")
    else:
        print('Confusion matrix, without normalization')

    print(cm)

    plt.figure()
    plt.imshow(cm, interpolation='nearest', cmap=cmap)
    plt.title(title)
    plt.colorbar()
    tick_marks = np.arange(len(classes))
    plt.xticks(tick_marks, classes, rotation=45)
    plt.yticks(tick_marks, classes)

    fmt = '.2f' if normalize else 'd'
    thresh = cm.max() / 2.
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        plt.text(j, i, format(cm[i, j], fmt),
                 horizontalalignment="center",
                 color="white" if cm[i, j] > thresh else "black")

    plt.tight_layout()
    plt.ylabel('True label')
    plt.xlabel('Predicted label')
    plt.show()

In [7]:
def gridSearch(model, param_grid, cv, x, y):
    grid = GridSearchCV(model, param_grid, cv= cv)
    grid.fit(x, y)
    return (grid.best_params_, grid.best_score_)

In [8]:
def parameterTuning(name, train_accuracy, train_f1, test_accuracy, test_f1, values):
    # Generate plot
    plt.title(name +': Varying parameter')
    ax = plt.gca()
    ax2 = ax.twinx()
    ax.plot(values, test_accuracy, label = 'Testing Accuracy')
    ax.plot(values, train_accuracy, label = 'Training Accuracy')
    ax2.plot(values, test_f1, label = 'Testing F1 Score', color='red')
    ax2.plot(values, train_f1, label = 'Training F1 Score', color='green')
    ax.set_xlabel('values')
    ax.set_ylabel('Accuracy')
    ax2.set_ylabel('F1 Score')
    ax.legend(loc=0)
    ax2.legend(loc=0)
    plt.show()

In [18]:
def crossValidation(model, X, y):
    cv_scores = cross_val_predict(model, X, y, cv=10)
    
    report = classification_report(cv_scores, y)
    conf_matrix = confusion_matrix(cv_scores, y)
    print(report)
    print('F1 score achieved {}'.format( f1_score(cv_scores, y_train, average="macro")))
    print('Accuracay score achieved {}'.format( accuracy_score(cv_scores, y_train)))
    plot_confusion_matrix(conf_matrix, ['Show', 'No Show'], normalize= True)

In [11]:
def trainAndPredict(model, X_train, y_train, X_test, y_test):
    model.fit(X_train, y_train)
    predictions = model.predict(X_test)
    
    report = classification_report(predictions, y_test)
    conf_matrix = confusion_matrix(predictions, y_test)
    print(report)
    print('F1 score achieved {}'.format( f1_score(predictions, y_test, average="macro")))
    print('Accuracy score achieved {}'.format( accuracy_score(pred, y_test)))
    plot_confusion_matrix(conf_matrix, ['Show', 'No Show'], normalize= True)

In [14]:
def modelROC(model, modelName, X_train, y_train, X_test, y_test):
    logreg.fit(X_train, y_train)
    y_pred_prob = logreg.predict_proba(X_test)[:,1]
    rocPlot(modelName, y_test, y_pred_prob)

In [15]:
def modelParameterTuning(modelName, initializer, param, X_train, y_train, X_test, y_test):
    train_accuracy = np.empty(len(param))
    test_accuracy = np.empty(len(param))
    train_f1 = np.empty(len(param))
    test_f1 = np.empty(len(param))
    
    # Loop over different values of k
    for i, k in enumerate(C_values):
        model = initializer(k)
        model.fit(X_train, y_train)
        
        y_pred = logreg.predict(X_train)
        #Compute accuracy on the training set
        train_accuracy[i] = accuracy_score(y_train, y_pred)
        train_f1[i] = f1_score(y_train, y_pred, average="macro")
        
        y_pred = model.predict(X_test)
        #Compute accuracy on the testing set
        test_accuracy[i] = accuracy_score(y_test, y_pred)
        test_f1[i] = f1_score(y_test, y_pred, average="macro")
        
        parameterTuning(modelName, train_accuracy, train_f1, test_accuracy, test_f1, param)

In [16]:
def modelGridSearch(model, cv):
    best_param, best_score = gridSearch(model, param_grid, cv, X_train, y_train)
    
    # Print the tuned parameters and score
    print("Tuned Parameters: {}".format(best_param)) 
    print("Best score is {}".format(best_score))

## Logistic regression models

### Model number 1

In [17]:
predictors = [
    'Patient_Age', 
    'Patient_Gender', 
    'Patient_Scholarship', 
    'Patient_Hypertension', 
    'Patient_Diabetes', 
    'Patient_Alcoholism', 
    'Patient_Handicap', 
    'SMS_Received', 
    'Appointment_Date_Month',
    'Appointment_ElapsedTime', 
    'Saturday', 
    'Ratio_Adjusted', 
    'First_Time', 
    'Previous_Appointment',
    'No_Show_per_patient'
             ]
X = data[predictors]
y = data.No_Show
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state= 121, stratify= y)

NameError: name 'data' is not defined

## Neural network models

### Model number 1

## Bayes net models

### Model number 1

## KNN models

### Model number 1

## Random Forest models

### Model number 1