In [1]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.dummy import DummyClassifier
from sklearn.metrics import accuracy_score,recall_score,precision_score,f1_score
from sklearn.metrics import confusion_matrix
from sklearn.metrics import plot_confusion_matrix
from sklearn.linear_model import LogisticRegression
import random
from itertools import chain
from collections import Counter
import numpy as np
import scipy

from ipynb.fs.full.GenerateData import generatedata
#from ipynb.fs.full.Logistic_Functions import logistic_regression,log_likelihood, sigmoid
from ipynb.fs.full.Logistic_Functions import sigmoid,loss,gradient_descent,gradient_ascent,update_weight_loss,log_likelihood,update_weight_mle

In [17]:
data = generatedata()

In [18]:
data.head()

Unnamed: 0,index,id,name,first,last,sex,race,dob,age,age_cat,...,r_charge_desc,r_jail_in,r_jail_out,is_violent_recid,num_vr_cases,vr_case_number,vr_charge_degree,vr_offense_date,vr_charge_desc,release
0,0,1,miguel hernandez,miguel,hernandez,Male,Other,1947-04-18 00:00:00.000000,69,Greater than 45,...,,,,0,,,,,,1
1,1,2,michael ryan,michael,ryan,Male,Caucasian,1985-02-06 00:00:00.000000,31,25 - 45,...,,,,0,,,,,,0
2,2,3,kevon dixon,kevon,dixon,Male,African-American,1982-01-22 00:00:00.000000,34,25 - 45,...,Felony Battery (Dom Strang),,,1,,13009779CF10A,(F3),2013-07-05 00:00:00.000000,Felony Battery (Dom Strang),1
3,3,4,ed philo,ed,philo,Male,African-American,1991-05-14 00:00:00.000000,24,Less than 25,...,Driving Under The Influence,2013-06-16 09:05:47.000000,2013-06-16 07:18:55.000000,0,,,,,,0
4,4,5,marcu brown,marcu,brown,Male,African-American,1993-01-21 00:00:00.000000,23,Less than 25,...,,,,0,,,,,,0


In [19]:
# check how many released/remanded
data.release.value_counts()

1    6804
0    4938
Name: release, dtype: int64

In [20]:
# check how many released/remanded by decile_score
pd.crosstab(data.decile_score, data.release)

release,0,1
decile_score,Unnamed: 1_level_1,Unnamed: 2_level_1
1,249,2328
2,145,1427
3,110,1149
4,623,576
5,517,517
6,464,529
7,814,86
8,733,63
9,734,68
10,549,61


In [21]:
# create dummy variables
sex_dummies = pd.get_dummies(data['sex'])
race_dummies = pd.get_dummies(data['race'])

data['sex_1_male'] = sex_dummies['Male']
data['African_American']=race_dummies['African-American']
data['Asian']=race_dummies['Asian']
data['Caucasian']=race_dummies['Caucasian']
data['Hispanic'] = race_dummies['Hispanic']
data['Native_American']=race_dummies['Native American']
data['Other']=race_dummies['Other']

data.head()

Unnamed: 0,index,id,name,first,last,sex,race,dob,age,age_cat,...,vr_offense_date,vr_charge_desc,release,sex_1_male,African_American,Asian,Caucasian,Hispanic,Native_American,Other
0,0,1,miguel hernandez,miguel,hernandez,Male,Other,1947-04-18 00:00:00.000000,69,Greater than 45,...,,,1,1,0,0,0,0,0,1
1,1,2,michael ryan,michael,ryan,Male,Caucasian,1985-02-06 00:00:00.000000,31,25 - 45,...,,,0,1,0,0,1,0,0,0
2,2,3,kevon dixon,kevon,dixon,Male,African-American,1982-01-22 00:00:00.000000,34,25 - 45,...,2013-07-05 00:00:00.000000,Felony Battery (Dom Strang),1,1,1,0,0,0,0,0
3,3,4,ed philo,ed,philo,Male,African-American,1991-05-14 00:00:00.000000,24,Less than 25,...,,,0,1,1,0,0,0,0,0
4,4,5,marcu brown,marcu,brown,Male,African-American,1993-01-21 00:00:00.000000,23,Less than 25,...,,,0,1,1,0,0,0,0,0


In [23]:
# select out relevant data & generate the train and test data

data = data[['release','decile_score','age','is_violent_recid']] # NOTE::: This is just a subset of features we would examine in reality

# train / test
X = data.drop(['release'],axis=1)
y = data['release']
class_names = data.release.unique()

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=212)

P_alg_train = X_train['decile_score']
P_alg_test = X_test['decile_score']

X_train = X_train.drop(['decile_score'],axis=1)
X_test = X_test.drop(['decile_score'],axis=1)

In [None]:
def initialize_parameters(num_params):
    """
    Argument:
    num_params -- the number of parameters in the training data
    
    Returns:
    params -- python dictionary containing your parameters:
                    B -- coefficient matrix of shape (num_params,1)
                    b -- constant
    """
    
    np.random.seed(2) # we set up a seed so that your output matches ours although the initialization is random.
    
    B = np.random.randn(num_params,1)*0.01 # randomly initialize the B_0, B_1 coefficients
    b = 0 # intialize constant + w @ 0
    w = 0
    
    assert (B.shape == (num_params,1))
    assert(isinstance(b, float) or isinstance(b, int))
    assert(isinstance(w, float) or isinstance(w, int))
    
    parameters = {"B": B,
                  "b": b,
                  "w" : w}
    
    return parameters

In [None]:
def sigmoid(z):
    """
    Compute the sigmoid of z

    Arguments:
    z -- A scalar or numpy array of any size.

    Return:
    s -- sigmoid(z)
    """

    s = 1/(1+np.exp(-z))
    
    return s

In [None]:
def forward_propagation(X, parameters, delta):
    """
    Argument:
    X -- input data of size (# obs, # params)
    parameters -- python dictionary containing your parameters (output of initialization function)
    delta -- threshold for difference betwen P_prior & P_algo
    
    Returns:
    A2 -- The sigmoid output of the second activation
    cache -- a dictionary containing "Z1", "A1", "Z2" and "A2"
    """
    # Retrieve each parameter from the dictionary "parameters"
    B = parameters['B']
    b = parameters['b']
    w = parameters['w']
    
    # Implement Forward Propagation to calculate P_post (probabilities)
    P_priorodds = np.dot(B.T,X)+b
    P_prior = sigmoid(P_priorodds)
    
    if abs(P_prior - P_algo) < delta:
        P_post = (w*P_algo) + ((1-w)*P_prior)
    else:
        P_post = P_prior
    
    #assert(A2.shape == (1, X.shape[1]))
    
    cache = {"P_priorodds": P_priorodds,
             "P_prior": P_prior,
             "P_post": P_post}
    
    return P_post, cache

In [None]:
def compute_cost(P_post, Y, parameters):
    """
    Computes the cross-entropy cost given in equation (13)
    
    Arguments:
    P_post -- The scaled sigmoid output (probability)
    Y -- "true" labels 
    parameters -- python dictionary containing your parameters B, b, w
    
    Returns:
    cost -- cross-entropy cost
    
    """
    
    m = Y.shape[0] # number of example

    # Compute the cross-entropy cost
    logprobs = np.multiply(np.log(P_post),Y)+np.multiply(np.log(1-P_post),(1-Y))         
    cost = - np.sum(logprobs)/m
    
    cost = float(np.squeeze(cost))  # makes sure cost is the dimension we expect. 
                                    # E.g., turns [[17]] into 17 
    assert(isinstance(cost, float))
    
    return cost

In [None]:
def backward_propagation(parameters, cache, X, Y, delta):
    """
    Implement the backward propagation 
    
    Arguments:
    parameters -- python dictionary containing our parameters 
    cache -- a dictionary containing P_priorodds, P_prior, and P_post
    X -- input data of shape (2, number of examples)
    Y -- "true" labels vector of shape (1, number of examples)
    
    Returns:
    grads -- python dictionary containing your gradients with respect to different parameters
    """
    m = X.shape[1]
    
    # First, retrieve W1 and W2 from the dictionary "parameters".
    B = parameters['B']
    b = parameters['b']
    w = parameters['w']
        
    # Retrieve also  from dictionary "cache".
    P_post = cache['P_post']
    P_priorodds = cache['P_priorodds']
    P_prior = cache["P_prior"]
                
    # intermediate derivatives
    
    # dL / dPost
    dLdPost = (P_post-Y)/((P_post*(1-P_post)))
    
    # dPost / dw
    if abs(P_prior - P_algo) < delta:
        dPostdw = P_algo - P_post
    else:
        dPostdw = 0
    
    # dPost / dPrior
    if abs(P_prior - P_algo) < delta:
        dPostdPrior = 1-w
    else:
        dPostdPrior = 1
        
    # dPrior / dPriorOdds
    dPriordPriorOdds = P_prior*(1-P_prior)
    
    # dPriorOdds / dB
    dPriorOddsdb = 1
    dPriorOddsdB1 = X_train.iloc[:,0]
    dPriorOddsdB2 = X_train.iloc[:,1]
    
    # Backward propagation: calculate dLdb, dLdB1, dLdB2, dLdw 
    
    dLdw = dLdPost*dPostdw
    
    dLdB1 = dLdPost*dPostdPrior*dPriordPriorOdds*dPriorOddsdB1
    
    dLdB2 = dLdPost*dPostdPrior*dPriordPriorOdds*dPriorOddsdB1
    
    dLdb = dLdPost*dPostdPrior*dPriordPriorOdds
    
    # store gradients
    grads = {"db": dLdb,
             "dB1": dLdB1,
             "dB2": dLdB2,
             "dw": dLdw}
    
    return grads

In [None]:
X_train.head