## Logistic Regression

In [1]:
def logistic_mod(df, logProb = 1.0):
    from sklearn import linear_model

    ## Prepare data for model
    nrow = df.shape[0]
    X = df[['x', 'y']].as_matrix().reshape(nrow,2)
    Y = df.z.as_matrix().ravel() #reshape(nrow,1)
    ## Compute the logistic regression model
    lg = linear_model.LogisticRegression()
    logr = lg.fit(X, Y)
    ## Compute the y values
    temp = logr.predict_log_proba(X)  
    df['predicted']  = [1 if (logProb > p[1]/p[0]) else 0 for p in temp]
    return df

def eval_logistic(df):
    import matplotlib.pyplot as plt
    import pandas as pd

    truePos = df[((df['predicted'] == 1) & (df['z'] == df['predicted']))]  
    falsePos = df[((df['predicted'] == 1) & (df['z'] != df['predicted']))] 
    trueNeg = df[((df['predicted'] == 0) & (df['z'] == df['predicted']))]  
    falseNeg = df[((df['predicted'] == 0) & (df['z'] != df['predicted']))]

    fig = plt.figure(figsize=(5, 5))
    fig.clf()
    ax = fig.gca()
    truePos.plot(kind = 'scatter', x = 'x', y = 'y', ax = ax, 
                       alpha = 1.0, color = 'DarkBlue', marker = '+', s = 80) 
    falsePos.plot(kind = 'scatter', x = 'x', y = 'y', ax = ax, 
                       alpha = 1.0, color = 'Red', marker = 'o', s = 40)  
    trueNeg.plot(kind = 'scatter', x = 'x', y = 'y', ax = ax, 
                       alpha = 1.0, color = 'DarkBlue', marker = 'o', s = 40)  
    falseNeg.plot(kind = 'scatter', x = 'x', y = 'y', ax = ax, 
                       alpha = 1.0, color = 'Red', marker = '+', s = 80) 
    ax.set_xlabel('X')
    ax.set_ylabel('Y')
    ax.set_title('Classes vs X and Y')
    
    TP = truePos.shape[0]
    FP = falsePos.shape[0]
    TN = trueNeg.shape[0]
    FN = falseNeg.shape[0]
       
    confusion = pd.DataFrame({'Positive': [FP, TP],
                              'Negative': [TN, FN]},
                               index = ['TrueNeg', 'TruePos'])
    accuracy = float(TP + TN)/float(TP + TN + FP + FN)      
    precision = float(TP)/float(TP + FP)     
    recall =  float(TP)/float(TP + FN)      
    
    print(confusion)
    print('accracy = ' + str(accuracy))
    print('precision = ' + str(precision))
    print('recall = ' + str(recall))
    
    return 'Done'