In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.dummy import DummyClassifier
from sklearn.metrics import accuracy_score,recall_score,precision_score,f1_score
from sklearn.metrics import confusion_matrix
from sklearn.metrics import plot_confusion_matrix
from sklearn.linear_model import LogisticRegression
import random
from itertools import chain
from collections import Counter
import numpy as np
import scipy
import math

from ipynb.fs.full.GenerateData import generatedata


In [None]:
random.seed(321)

In [None]:
def initialize_parameters(X,lower,upper):
    """
    Argument:
    X -- training data
    lower -- lower bound of scale
    upper -- upper bound of scale
    
    Returns:
    params -- python dictionary containing model parameters:
        B -- coefficient matrix
        b -- constant
        q -- constant
        mu_var -- variance of prior 
    """
    
    np.random.seed(212) 
    
    # of parameters
    n = X.shape[1]
    
    B = np.random.rand(n,1)*0.1 # randomly initialize the B_0, B_1 coefficients
    b = 0 # intialize  constants b,c @ 0
    q = np.random.rand(1)*10
    var = np.random.rand(1)*10
    tau = random.randint(lower, upper)
    
    parameters = {"B": B,
                  "b": b,
                  "q" : q,
                  "var_prior" : var,
                  "tau" : tau}
    
    return parameters

In [None]:
def initialize_dicts(X,decile_score,z):
    # Create dictionary to store data that is not being updated
    cache = {"mu_prior" : [],
         "mu_ra" : [],
         "var_ra" : [],
         "mu_post" : [],
         "var_post" : []
         }
    cache['mu_ra'] = np.array(decile_score)

    # create dictionary to store derivatives
    derivatives = {
        "dL": [],
        "dphipost_dmupost": [],
        "dphipost_dvarpost": [],
        "dphipost_dtau": [],
        "dmuprior_dB": [],
        "dmuprior_db": [],
        "dmupost_dmuprior": [],
        "dvarpost_dvarra": [],
        "dvarra_dvarprior": [],
        "dvarpost_dq": [],
        "dmupost_dq": [],
        "dvarra_dq": [],
        "dvarpost_dvarprior": []
    }
    
    # create dictionary to store gradients
    grads = {
        "dB": [],
        "db": [],
        "dq": [],
        "dvar_dprior": [],
        "dtau" : []
    }
        
    # initialize parameters
    parameters = initialize_parameters(X,1,10)
    
    return cache, parameters, derivatives, grads

In [5]:
data = generatedata()

# create dummy variables
sex_dummies = pd.get_dummies(data['sex'])
race_dummies = pd.get_dummies(data['race'])

data['sex_1_male'] = sex_dummies['Male']
data['African_American']=race_dummies['African-American']
data['Asian']=race_dummies['Asian']
data['Caucasian']=race_dummies['Caucasian']
data['Hispanic'] = race_dummies['Hispanic']
data['Native_American']=race_dummies['Native American']
data['Other']=race_dummies['Other']

# select out relevant data & generate the train and test data

#data = data[['release','decile_score','age','is_violent_recid']] # NOTE::: This is just a subset of features we would examine in reality

# train / test
X = data[['release','decile_score','age','is_violent_recid']].drop(['release'],axis=1)
y = data['release']
class_names = data.release.unique()

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=212)
decile_score = X_train['decile_score']
X_train = X_train.drop(['decile_score'],axis=1)

#X_test = X_test.drop(['decile_score'],axis=1)

cache,parameters,derivatives,grads = initialize_dicts(X_train,decile_score,5)

In [None]:
lower=1
upper=10
theta = 5
loss = []
learning_rate = 0.1
num_iterations = 5

# of parameters
n = X_train.shape[1]
    
B = np.random.rand(n,1)*0.1 # randomly initialize the B_0, B_1 coefficients
b = 0 # intialize  constants b,c @ 0
q = np.random.rand(1)*10
var_prior = np.random.rand(1)*10
tau = random.randint(lower, upper)
mu_ra = np.array(decile_score)
    
# prior belief mean
mu_prior = (np.dot(B.T,np.array(X_train).T)+b)[0]

# risk assessment variance
var_ra = np.array([q*var_prior if np.abs(mu_ra[i]-mu_prior[i])>theta else float("inf") for i in range(len(mu_prior))])

# posterior belief mean
mu_post = np.array([(mu_prior[i]*(q/(q+1)))+(mu_ra[i]/(q+1)) if np.abs(mu_ra[i]-mu_prior[i])>theta else mu_prior[i] for i in range(len(mu_prior))])

# posterior belief variance
var_post = np.array([var_ra[i]*(q/(q+1)) if np.abs(mu_ra[i]-mu_prior[i])>theta else var_prior for i in range(len(var_ra))])

Phi = []
for i in range(len(mu_post)):
    posterior = scipy.stats.norm(loc=mu_post[i],scale=np.sqrt(var_post[i]))
    Phi.append(1 - posterior.cdf(tau))

# some pred_y = 0 or 1 due to rounding in pythong, which produces an error when take log; add a tiny little number to get around this.
Phi = [1e-16 if i==0 else i for i in Phi]
Phi = np.array([1-1e-16 if i==1 else i for i in Phi])

# calculate loss
L = (-np.array(y_train)*np.log(Phi))-(1-np.array(y_train))*np.log(1-np.array(Phi))
loss.append(np.sum(L))

# Calculate Derivatives / Partial Derivatives

# dL
post_normal = scipy.stats.norm(mu_post,np.sqrt(var_post))
phi_post = post_normal.pdf(tau)

dL = phi_post*((np.array(1-y_train)*(phi_post/Phi))-(np.array(y_train)*(phi_post/(1-Phi))))

# dphi_post / dmu_post
dphipost_dmupost = ((tau-mu_post)/((var_post**(3/2))*((2*math.pi)**(1/2))))*np.exp((-((tau-mu_post)**2 / (2*var_post))).astype(float))

# dphi_post / dvar_post
dphipost_dvarpost = ((((tau-mu_post)**2)/(2*(var_post**(5/2))*((2*math.pi)**(1/2))))-(1/(2*(var_post**(3/2))*((2*math.pi)**(1/2)))))*np.exp(((-(((tau-mu_post)**2)/(2*var_post)))).astype(float))

#dphi_post / dtau
dphipost_dtau = derivatives['dphipost_dtau'] = (-((tau-mu_post)/((var_post**(3/2))*((2*math.pi)**(1/2)))))*np.exp((-(((tau-mu_post)**2)/(2*var_post))))

dmupost_dmuprior = []
dvarpost_dvarra = []
dvarra_dvarprior = []
dvarpost_dq = []
dmupost_dq = []
dvarra_dq = []
dvarpost_dvarprior = []

for i in range(len(mu_prior)):
    if np.abs(mu_prior[i] - mu_ra[i]) > theta:
        # dmu_post/dmu_prior
        dmupost_dmuprior.append((q/(q+1)))

        # dvarpost / dvarra
        dvarpost_dvarra.append((q/(q+1)))

        # dvarra / dvarprior
        dvarra_dvarprior.append(q)

        #dvarpost_dq
        dvarpost_dq.append(((var_ra[i]/(q+1))+((q*var_ra[i])/(q+1)**2)))

        #dmupost_dq
        dmupost_dq.append((mu_prior[i]/(q+1)) - (mu_ra[i]/((q+1)**2)) - ((mu_prior[i]*q)/((q+1)**2)))

        #dvarra_dq
        dvarra_dq.append((var_prior**2))

        #dvarpost_dvarprior
        dvarpost_dvarprior.append(0)
    else:
        # dmu_post/dmu_prior
        dmupost_dmuprior.append(1)

        # dvarpost / dvarra
        dvarpost_dvarra.append(0)

        # dvarra / dvarprior
        dvarra_dvarprior.append(0)

        #dvarpost_dq
        dvarpost_dq.append(0)

        #dmupost_dq
        dmupost_dq.append(0)

        #dvarra_dq
        dvarra_dq.append(0)

        #dvarpost_dvarprior
        dvarpost_dvarprior.append(1)

# Calculate gradients

# dL_dB
dB = np.dot(dL*dphipost_dmupost*dmupost_dmuprior,X_train)


# dL_db
db = np.sum(dL*((dphipost_dmupost*dmupost_dmuprior)))

#dL_dvarprior
dvar_dprior = np.sum(dL*(dphipost_dvarpost*dvarpost_dvarra*dvarra_dvarprior+dphipost_dvarpost*dvarpost_dvarprior))         

# dL_dq
dq = grads['dq'] = np.sum(dL*(dphipost_dvarpost*dvarpost_dq+dphipost_dmupost*dmupost_dq+dphipost_dvarpost*dvarpost_dvarra*dvarra_dq))

# dL_dtau
dtau = np.sum(dL*dphipost_dtau)

B = np.array([B[i]-(dB[i]*learning_rate) for i in range(B.shape[0])])
b = b-(learning_rate*db)
q = q-(learning_rate*dq)
var_prior = var_prior-(learning_rate*dvar_dprior)
tau = tau-(learning_rate*dtau)

    #j+=1


In [6]:
parameters

{'B': array([[0.02329016],
        [0.09116896]]),
 'b': 0,
 'q': array([0.51030257]),
 'var_prior': array([0.1000737]),
 'tau': 4}

In [7]:
cache

{'mu_prior': [],
 'mu_ra': array([2, 2, 3, ..., 7, 3, 6]),
 'var_ra': [],
 'mu_post': [],
 'var_post': []}

In [11]:
theta = 5
loss = []
learning_rate = 0.1
num_iterations = 3

j=0

while j<num_iterations:
    
    B=parameters['B']
    b=parameters['b']
    var_prior = parameters['var_prior']
    mu_ra = cache['mu_ra']
    q = parameters['q']
    tau = parameters['tau']

    # prior belief mean
    mu_prior = cache['mu_prior'] = (np.dot(B.T,np.array(X_train).T)+b)[0]

    # risk assessment variance
    var_ra = cache['var_ra'] = np.array([q*var_prior if np.abs(mu_ra[i]-mu_prior[i])>theta else float("inf") for i in range(len(mu_prior))])

    # posterior belief mean
    mu_post = cache['mu_post'] = np.array([(mu_prior[i]*(q/(q+1)))+(mu_ra[i]/(q+1)) if np.abs(mu_ra[i]-mu_prior[i])>theta else mu_prior[i] for i in range(len(mu_prior))])

    # posterior belief variance
    var_post = cache['var_post'] = np.array([var_ra[i]*(q/(q+1)) if np.abs(mu_ra[i]-mu_prior[i])>theta else var_prior for i in range(len(var_ra))])

    Phi = []
    for i in range(len(mu_post)):
        posterior = scipy.stats.norm(loc=mu_post[i],scale=np.sqrt(var_post[i]))
        Phi.append(1 - posterior.cdf(tau))

    # some pred_y = 0 or 1 due to rounding in pythong, which produces an error when take log; add a tiny little number to get around this.
    Phi = [1e-16 if i==0 else i for i in Phi]
    Phi = np.array([1-1e-16 if i==1 else i for i in Phi])

    # calculate loss
    L = (-np.array(y_train)*np.log(Phi.astype('float64')))-(1-np.array(y_train))*np.log(1-Phi.astype('float64'))
    loss.append(np.sum(L))

    # Calculate Derivatives / Partial Derivatives

    # dL
    post_normal = scipy.stats.norm(mu_post,np.sqrt(var_post))
    phi_post = post_normal.pdf(tau)

    dL = derivatives['dL'] = phi_post*((np.array(1-y_train)*(phi_post/Phi))-(np.array(y_train)*(phi_post/(1-Phi))))

    # dphi_post / dmu_post
    p1 = (tau-mu_post)/((var_post**(3/2))*((2*math.pi)**(1/2)))
    p2 = -((tau-mu_post)**2 / (2*var_post))
    dphipost_dmupost = derivatives['dphipost_dmupost'] = p1*np.exp(p2.astype(float))

    # dphi_post / dvar_post
    p1 = ((tau-mu_post)**2)/(2*(var_post**(5/2))*((2*math.pi)**(1/2)))
    p2 = 1/(2*(var_post**(3/2))*((2*math.pi)**(1/2)))
    p3 = (-(((tau-mu_post)**2)/(2*var_post)))
    dphipost_dvarpost = derivatives['dphipost_dvarpost'] = (p1-p2)*np.exp(p3.astype(float))

    #dphi_post / dtau
    p1 = -((tau-mu_post)/((var_post**(3/2))*((2*math.pi)**(1/2))))
    p2 = -(((tau-mu_post)**2)/(2*var_post))
    dphipost_dtau = derivatives['dphipost_dtau'] = p1*np.exp(p2)

    # dmu_prior / dB
    #derivatives['dmuprior_dB'] = X

    # dmu_prior / db
    #derivatives['dmuprior_db'] = 1

    dmupost_dmuprior = []
    dvarpost_dvarra = []
    dvarra_dvarprior = []
    dvarpost_dq = []
    dmupost_dq = []
    dvarra_dq = []
    dvarpost_dvarprior = []

    for i in range(len(mu_prior)):
        if np.abs(mu_prior[i] - mu_ra[i]) > theta:
            # dmu_post/dmu_prior
            dmupost_dmuprior.append((q/(q+1)))

            # dvarpost / dvarra
            dvarpost_dvarra.append((q/(q+1)))

            # dvarra / dvarprior
            dvarra_dvarprior.append(q)

            #dvarpost_dq
            dvarpost_dq.append(((var_ra[i]/(q+1))+((q*var_ra[i])/(q+1)**2)))

            #dmupost_dq
            dmupost_dq.append((mu_prior[i]/(q+1)) - (mu_ra[i]/((q+1)**2)) - ((mu_prior[i]*q)/((q+1)**2)))

            #dvarra_dq
            dvarra_dq.append((var_prior**2))

            #dvarpost_dvarprior
            dvarpost_dvarprior.append(0)
        else:
            # dmu_post/dmu_prior
            dmupost_dmuprior.append(1)

            # dvarpost / dvarra
            dvarpost_dvarra.append(0)

            # dvarra / dvarprior
            dvarra_dvarprior.append(0)

            #dvarpost_dq
            dvarpost_dq.append(0)

            #dmupost_dq
            dmupost_dq.append(0)

            #dvarra_dq
            dvarra_dq.append(0)

            #dvarpost_dvarprior
            dvarpost_dvarprior.append(1)

    derivatives['dmupost_dmuprior'] = np.array(dmupost_dmuprior)
    derivatives['dvarpost_dvarra'] = np.array(dvarpost_dvarra)
    derivatives['dvarra_dvarprior'] = np.array(dvarra_dvarprior)
    derivatives['dvarpost_dq'] = np.array(dvarpost_dq)
    derivatives['dmupost_dq'] = np.array(dmupost_dq)
    derivatives['dvarra_dq'] = np.array(dvarra_dq)
    derivatives['dvarpost_dvarprior'] = np.array(dvarpost_dvarprior)

    # Calculate gradients

    # dL_dB
    dB = grads['dB'] = np.dot(dL*dphipost_dmupost*dmupost_dmuprior,X_train)

    # dL_db
    db = grads['db'] = np.sum(dL*((dphipost_dmupost*dmupost_dmuprior)))

    #dL_dvarprior
    dvar_dprior = grads['dvar_dprior'] = np.sum(dL*(dphipost_dvarpost*dvarpost_dvarra*dvarra_dvarprior+dphipost_dvarpost*dvarpost_dvarprior))         

    # dL_dq
    dq = grads['dq'] = np.sum(dL*(dphipost_dvarpost*dvarpost_dq+dphipost_dmupost*dmupost_dq+dphipost_dvarpost*dvarpost_dvarra*dvarra_dq))

    # dL_dtau
    dtau = grads['dtau'] = np.sum(dL*dphipost_dtau)

    parameters['B'] = np.array([B[i]-(dB[i]*learning_rate) for i in range(B.shape[0])])
    parameters['b'] = b-(learning_rate*db)
    parameters['q'] = q-(learning_rate*dq)
    parameters['var_prior'] = var_prior-(learning_rate*dvar_dprior)
    parameters['tau'] = tau-(learning_rate*dtau)

    #j+=1


AttributeError: 'numpy.float64' object has no attribute 'exp'

In [None]:
Phi

In [None]:
# recalculate risk assessment variance
var_ra = cache['var_ra'] = np.array([q*var_prior if np.abs(mu_ra[i]-mu_prior[i])>theta else float("inf") for i in range(len(mu_prior))])
var_ra

In [None]:
parameters

In [None]:
grads

In [None]:
loss

In [None]:
grads

In [None]:
parameters