In [16]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.dummy import DummyClassifier
from sklearn.metrics import accuracy_score,recall_score,precision_score,f1_score
from sklearn.metrics import confusion_matrix
from sklearn.metrics import plot_confusion_matrix
from sklearn.linear_model import LogisticRegression
import random
from itertools import chain
from collections import Counter
import numpy as np
from scipy.stats import norm
import math
from operator import add

from ipynb.fs.full.GenerateData import generatedata


**0) Define variables**

$ \{x_{1}, x_{2},...x_{k}\} \in X$, Observed features of defendant
 
$\mu_{prior}$, Judge's mean prior belief about a defendant; \textit{most probable} risk prediction

$\sigma_{prior}$, Standard deviation of judge's prior belief on a defendant; corresponds to a judge's $\textit{uncertainty}$ about prediction

$\mu_{ra}$, Observed algorithmic risk assessment prediction; mean of normally-distributed perceived anchor information

$\sigma_{ra}$, Perceived anchor $\textit{influence}$; Corresponds to S.D. of risk assessment (when perceived as containing a lot of information S.D. will be lower and vice versa).

$w,c$, Parameters for mapping $\sigma_{prior}$ onto $\sigma_{ra}$

$z_{\alpha}$, $z$-score distance between $\mu_{prior}$ and $\mu_{ra}$

$z$, Threshold distance between $\mu_{prior}$ and $\mu_{ra}$ after which $\mu_{ra}$ has no (or minimal) effect on $\mu_{prior}$

$\mu_{post}$, Judge's mean posterior belief

$\sigma_{post}$, Standard deviation of judge's posterior belief 

$\tau$, Decision making threshold

$\phi_{prior}(\mu_{prior},\sigma_{prior})$, Gaussian distribution representing judge's prior belief

$\phi_{ra}(\mu_{ra},\sigma_{ra})$, Gaussian distribution representing the influence of algorithmic risk assessment predictions

$\phi_{post}(\mu_{post},\sigma_{post})$, Gaussian distribution representing the judge's posterior belief

$\Phi_{post}$, Probability of drawing a belief from the posterior greater than threshold $\tau$

$y$, Judge's observed decision on defendant
 
$\alpha$, Learning rate


**1) Helper functions**

**(a) Initialize parameters**

$\beta_{1}$ (nx1), $\beta_{2}$ (nx1), and $w$ (1x1) are randomly initialized (>0);
$b$ (1x1) and $c$ (1x1) are initialized as zero

In [17]:
def initialize_parameters(X,lower,upper):
    """
    Argument:
    
    Returns:

    """
    # of parameters
    n = X.shape[1]
    
    B = 0.1*np.random.rand(n,1) # randomly initialize the B coefficient
    b = np.random.rand(1)*10#0 # intialize  constants bias, b @ 0
    q = np.random.rand(1)
    sd = np.random.rand(1)*10
    tau = random.randint(lower, upper)
    
    parameters = {"B": B,
                  "b": b[0],
                  "q" : q[0],
                  "sd_prior" : sd[0],
                  "tau" : tau}
    
    return parameters

In [18]:
# create dictionary to store derivatives
def initialize_dicts():
    derivatives = {
        "dL_dphi": [],
        "dphipost_dmupost": [],
        "dphipost_dsdpost": [],
        "dphipost_dtau": [],
        "dmupost_dmuprior": [],
        "dsdpost_dq": [],
        "dmupost_dq": [],
        "dsdpost_dsdprior": []
    }

    grads = {
        "dB":[],
        "db":[],
        "dsdprior":[],
        "dq":[],
        "dtau":[]
    }
    
    return derivatives,grads

**2) Forward**

**(a) Estimate mean of prior belief distribution**

Given {$\beta, b, \sigma, \theta, \tau, w$}, want to calculate loss, $\mathcal{L}(\hat{y},y_i)$ using the following steps:

1. Calculate prior mean, $\mu_{prior}$
\begin{equation}
\mu_{prior} = \beta X + b
\end{equation}

Initialize the first estimated prior mean and store in the cache. 
Future calculations will use the function calc_prior_mean

In [22]:
def calc_prior_mean(X,parameters):
    """
    Arguments:
    
    Returns:
    """
    
    B = parameters['B']
    b = parameters['b']
    
    #mu_prior = [(B*X[i])+b for i in range(len(X))]
    mu_prior = np.dot(B.T,X.T)+b
    
    return mu_prior[0].tolist()

**(b) Estimate the standard deviation of the risk assessment score (i.e. perceived confidence in the anchor info):**

**We're not currently including this in further calculations**

\begin{equation}
\sigma_{ra}^2 = var_{ra} = \left \{
	\begin{array}{ll}
        q \cdot var_{prior} & if |\mu_{ra} - \mu_{prior}|<\Theta \\
        \infty & otherwise
    \end{array}
    \right.
\end{equation}

In [21]:
def calc_var_ra(sd_prior,mu_prior,mu_ra,q,theta): 
    """
    #Arguments:

    #Returns:
    """

    var_ra = [q*sd_prior if np.abs(mu_ra[i]-mu_prior[i])<theta else float("inf") for i in range(len(mu_prior))]

    return var_ra 

**(c) Calculate the posterior distribution of a judge's beliefs on defendant, $\phi_{post}(\mu_{post},\sigma_{post}^2)$, and probability of making decision, $\hat{y}$:**



\begin{equation}
\phi_{prior}(\mu_{prior},\sigma_{prior}^2) = \mathcal{N}(\mu_{prior},\sigma_{prior}^2)
\end{equation}


\begin{equation}
    \phi_{ra}(\mu_{ra},\sigma_{ra}^2) = \mathcal{N}(\mu_{ra},\sigma_{ra}^2)
\end{equation}

\begin{equation}
\phi_{post}(\mu_{post},\sigma_{post}^2) =  \mathcal{N}(\mu_{post},\sigma_{post}^2)
\end{equation}

\begin{equation}
    = \frac{\phi_{prior}(\mu_{prior},\sigma_{prior}^2)\phi_{ra}(\mu_{ra},\sigma_{ra}^2)}{\int \phi_{prior}(\mu_{prior},\sigma_{prior}^2)\phi_{ra}(\mu_{ra},\sigma_{ra}^2)}
\end{equation}

where:

\begin{equation}
\mu_{post} = \left \{
	\begin{array}{ll}
        \mu_{prior} \cdot \frac{q}{q+1} + \mu_{ra} \cdot \frac{1}{q+1} & if |\mu_{ra} - \mu_{prior}|<\Theta \\
        \mu_{prior} & otherwise
    \end{array}
    \right.
\end{equation}

\begin{equation}
\sigma_{post} = \left \{
	\begin{array}{ll}
        \sigma_{prior} \cdot \sqrt{\frac{q}{q+1}} & if |\mu_{ra} - \mu_{prior}|<\Theta \\
        \sigma_{prior} & otherwise
    \end{array}
    \right.
\end{equation}

In [6]:
def calc_post_mean(mu_prior, mu_ra, parameters, theta):
    """
    Arguments:

    Returns:
    """
    q = parameters['q']
    
    mu_post = [(mu_prior[i]*(q/(q+1)))+(mu_ra[i]/(q+1)) if abs(mu_ra[i]-mu_prior[i])<theta else mu_prior[i] for i in range(len(mu_prior))]

    return mu_post

In [7]:
def calc_post_sd(mu_prior, mu_ra, parameters, theta):
    """
    Arguments:
    
    Returns:
    """
    q = parameters['q']
    sd_prior = parameters['sd_prior']
    
    sd_post = [sd_prior*(q/(q+1)) if abs(mu_ra[i]-mu_prior[i])<theta else sd_prior for i in range(len(mu_prior))]
    
    return sd_post

**(d) Calculate the probability of detaining defendant as area under the posterior belief curve $\geq$ decision threshold, $\tau$**

\begin{equation}
\Phi(\tau;\mu_{post},\sigma_{post}) = \int_\tau^{\infty} \phi_{post}(\tau; \mu_{post},\sigma_{post}^2)
\end{equation}


In [8]:
def calc_Phi(mu_post,sd_post,parameters):    
    """
    Arguments:
    
    Returns:
    """
    tau = parameters['tau']

    Phi = [1 - norm.cdf(tau,loc=mu_post[i],scale=sd_post[i]) for i in range(len(mu_post))]
    
    Phi = np.where(Phi==0,1e-6,Phi)
    Phi = np.where(Phi==1,1-(1e-6),Phi).tolist()

    return Phi

**(e) Compute negative log likelihood**


\begin{equation}
\mathcal{L}(\Phi(\tau;\mu_{post},\sigma_{post}^2), y_i) = - (y_i \log (1-\Phi(\tau;\mu_{post},\sigma_{post}^2)) + (1-y_i) \log \Phi(\tau;\mu_{post},\sigma_{post}^2))
\end{equation}


In [12]:
def calc_L(Phi,Y):
    """
    Arguments:

    Returns:
    """
    loss = 0
    for i in range(len(Phi)):
        loss +=(Y[i]*np.log(1-Phi[i]))+((1-Y[i])*np.log(Phi[i]))
        
    L = -loss/len(Phi)
    
    return L

**4) Calculate derivatives for gradient descent**

\begin{equation}
    \frac{d\mathcal{L}}{d\phi_{post}} = \phi_{post}\left(\frac{y}{1-\Phi}-\frac{1-y}{\Phi}\right)
\end{equation}

\begin{equation}
    \frac{d\phi_{post}}{d\mu_{post}} = \frac{\tau-\mu_{post}}{\sigma_{post}^3\cdot\sqrt{2\pi}}\cdot exp \left(- \frac{(\tau-\mu_{post})^2}{2\cdot \sigma_{post}^2} \right)
\end{equation}

\begin{equation}
    \frac{d\phi_{post}}{d\sigma_{post}} = \left(\frac{(\tau-\mu_{post})^2}{\sigma_{post}^4\sqrt{2\pi}} - \frac{1}{\sigma_{post}^2 \sqrt{2\pi}} \right) \cdot exp \left[- \frac{(\tau-\mu_{post})^2}{2\cdot \sigma_{post}^2} \right]
\end{equation}

\begin{equation}
    \frac{d\phi_{post}}{d\tau}= -\frac{\tau-\mu_{post}}{\sigma_{post}^3 \sqrt{2\pi}}exp\left[-\frac{(\tau-\mu_{post})^2}{2\sigma_{post}^3} \right]
\end{equation}

\begin{equation}
    \frac{d\mu_{post}}{d\mu_{prior}} = 
    \left \{
	\begin{array}{ll}
        \frac{q}{q+1} & if |\mu_{ra} - \mu_{prior}|<\Theta \\
        1 & otherwise
    \end{array}
    \right.
\end{equation}

\begin{equation}
    \frac{d\mu_{post}}{dq} = 
        \left \{
	\begin{array}{ll}
        \frac{\mu_{prior}}{1+q} - \frac{\mu_{ra}}{(1+q)^2}-\frac{q\cdot\mu_{prior}}{(1+q)^2} & if |\mu_{ra} - \mu_{prior}|<\Theta \\
        0 & otherwise
    \end{array}
    \right.
\end{equation}        

\begin{equation}
    \frac{d\sigma_{post}}{dq} = 
    \left \{
	\begin{array}{ll}
        \frac{\sigma_{prior}}{2(q+1)^2 \sqrt{\frac{q}{q+1}}} & if |\mu_{ra} - \mu_{prior}|<\Theta \\
        0 & otherwise
    \end{array}
    \right.
\end{equation}

\begin{equation}
    \frac{d\sigma_{post}}{d\sigma_{prior}} = 
        \left \{
	\begin{array}{ll}
        \sqrt{\frac{q}{q+1}} & if |\mu_{ra} - \mu_{prior}|<\Theta \\
        1 & otherwise
    \end{array}
    \right.
\end{equation}

\begin{equation}
\frac{d\mu_{prior}}{d\beta} = X
\end{equation}

\begin{equation}
\frac{d\mu_{prior}}{db} = 1
\end{equation}

In [15]:
def calc_component_derivs(X,parameters, derivatives, theta, mu_prior, mu_ra,mu_post,sd_post,Phi,Y):
        
    B = parameters['B']
    b = parameters['b']
    q = parameters['q']
    sd_prior = parameters['sd_prior']
    tau = parameters['tau']

    for i in range(len(X)):
        
        #retrieve variables
        sd = sd_post[i]
        y = Y[i]
        m_prior = mu_prior[i]
        m_post = mu_post[i]
        m_ra = mu_ra[i]
        Ph = Phi[i]
        
        # calculate derivatives / partial derivatives
    
        # a few expressions for reuse
        exponent = np.exp(-((tau-m_post)**2)/(2*(sd**2)))

        # dL/dphi
        phi_post = (1/(sd*np.sqrt(2*math.pi)))*exponent
        dL_dphi = phi_post*((y/(1-Ph))-((1-y)/Ph))

        # dphipost_dmupost
        dphipost_dmupost = ((tau-m_post)/((sd**3)*np.sqrt(2*math.pi)))*exponent 

        # dphipost_dsdpost
        dphi_dsd_1 = ((tau-m_post)**2)/((sd**4)*np.sqrt(2*math.pi))
        dphi_dsd_2 = 1/((sd**2)*np.sqrt(2*math.pi))
        dphipost_dsdpost = (dphi_dsd_1-dphi_dsd_2)*exponent

        # dphipost_dtau
        dphipost_dtau = -((tau-m_post)/((sd**3)*np.sqrt(2*math.pi)))*exponent

        if np.abs(m_prior - m_ra)<theta:
            dmupost_dmuprior = q/(q+1) # dmupost_dmuprior
            dsdpost_dq = sd_prior/(2*((q+1)**2)*np.sqrt(q/(q+1)))# dvarpost_dq
            dmupost_dq = (m_prior/(q+1)) - (m_ra/((q+1)**2)) - ((m_prior*q)/((q+1)**2))# dmupost_dq
            dsdpost_dsdprior = np.sqrt(q/(q+1))# dvarpost_dvarprior
        else:
            dmupost_dmuprior = 1 # dmupost_dmuprior
            dsdpost_dq = 0# dvarpost_dq
            dmupost_dq = 0# dmupost_dq
            dsdpost_dsdprior = 1# dvarpost_dvarprior
            
        derivatives['dL_dphi'].append(dL_dphi)
        derivatives['dphipost_dmupost'].append(dphipost_dmupost)
        derivatives['dphipost_dsdpost'].append(dphipost_dsdpost)
        derivatives['dphipost_dtau'].append(dphipost_dtau)
        derivatives['dmupost_dmuprior'].append(dmupost_dmuprior)
        derivatives['dsdpost_dq'].append(dsdpost_dq)
        derivatives['dmupost_dq'].append(dmupost_dq)
        derivatives['dsdpost_dsdprior'].append(dsdpost_dsdprior)
    
    return derivatives


In [None]:
"""
def calc_component_derivs(parameters, derivatives, theta, mu_prior, mu_ra,mu_post,var_post,Phi,y):
    
    #retrieve variables
    B = parameters['B']
    b = parameters['b']
    q = parameters['q']
    var_prior = parameters['var_prior']
    tau = parameters['tau']
    
    # calculate derivatives / partial derivatives
    
    # a few expressions for reuse
    exponent = [math.exp(-((tau-mu_post[i])**2)/(2*var_post[i])) for i in range(len(mu_post))]
    var_post_5 = [var**5 for var in var_post]
    var_post_3 = [var**3 for var in var_post]
    
    # dL/dphi
    phi_post_0 = [1/math.sqrt(2*math.pi*var_post[i]) for i in range(len(var_post))]
    phi_post = [ph*ex for ph,ex in zip(phi_post_0,exponent)]
    dL_dphi = [ph*((y0/(1-Ph))-((1-y0)/Ph)) for ph,y0,Ph in zip(phi_post,y,Phi)]
    
    # dphipost_dmupost
    dphipost_dmupost_0 = [(tau-mu_post[i])/math.sqrt(2*math.pi*var_post_3[i]) for i in range(len(mu_post))]
    dphipost_dmupost = [ph*ex for ph,ex in zip(dphipost_dmupost_0,exponent)]
    
    # dphipost_dvarpost
    dphi_dvar_1 = [((tau-mu_post[i])**2)/math.sqrt(8*math.pi*var_post_5[i]) for i in range(len(mu_post))]
    dphi_dvar_2 = [1/math.sqrt(8*math.pi*var_post_3[i]) for i in range(len(mu_post))]
    dphipost_dvarpost = [(ph1-ph2)*ex for ph1,ph2,ex in zip(dphi_dvar_1,dphi_dvar_2,exponent)]
    
    # dphipost_dtau
    dphipost_dtau = [(-(tau-mp)/math.sqrt(2*math.pi*vp))*ex for mp,vp,ex in zip(mu_post,var_post_3,exponent)]

    # dmupost_dmuprior
    dmupost_dmuprior = [q/(q+1) if np.abs(mu_prior[i] - mu_ra[i])<theta else 1 for i in range(len(mu_prior))]
    
    # dvarpost_dq
    dvarpost_dq = [((var_prior/(q+1))-((q*var_prior)/((q+1)**2))) if np.abs(mu_prior[i] - mu_ra[i])<theta else 0 for i in range(len(mu_prior))]
    
    # dmupost_dq
    dmupost_dq = [(mu_prior[i]/(q+1)) - (mu_ra[i]/((q+1)**2)) - ((mu_prior[i]*q)/((q+1)**2)) if np.abs(mu_prior[i] - mu_ra[i])<theta else 0 for i in range(len(mu_prior))]

    # dvarpost_dvarprior
    dvarpost_dvarprior = [(q/(q+1)) if np.abs(mu_prior[i] - mu_ra[i])<theta else 1 for i in range(len(mu_prior))]
    
    derivatives = {
        "dL_dphi": dL_dphi,
        "dphipost_dmupost": dphipost_dmupost,
        "dphipost_dvarpost": dphipost_dvarpost,
        "dphipost_dtau": dphipost_dtau,
        "dmupost_dmuprior": dmupost_dmuprior,
        "dvarpost_dq": dvarpost_dq,
        "dmupost_dq": dmupost_dq,
        "dvarpost_dvarprior": dvarpost_dvarprior
    }

    return derivatives
    """

**5) Use chain rule to calculate gradients**

\begin{equation}
    \frac{d\mathcal{L}}{d\beta} =  \frac{d\mathcal{L}}{d\phi_{post}}\cdot\frac{\partial\phi_{post}}{\partial\mu_{post}}\cdot \frac{\partial\mu_{post}}{\partial\mu_{prior}}\cdot \frac{d\mu_{prior}}{d\beta}
\end{equation}

\begin{equation}
    \frac{d\mathcal{L}}{db} =  \frac{d\mathcal{L}}{d\phi_{post}}\cdot\frac{\partial\phi_{post}}{\partial\mu_{post}}\cdot \frac{\partial\mu_{post}}{\partial\mu_{prior}}\cdot \frac{d\mu_{prior}}{db}
\end{equation}

\begin{equation}
    \frac{d\mathcal{L}}{d\sigma_{prior}} = \frac{d\mathcal{L}}{d\phi_{post}}\cdot\frac{d\phi_{post}}{d\sigma_{post}} \cdot \frac{d\sigma_{post}}{d\sigma_{prior}} 
\end{equation}

\begin{equation}
    \frac{d\mathcal{L}}{dq} = \frac{d\mathcal{L}}{d\phi_{post}}\cdot\frac{d\phi_{post}}{d\sigma_{post}} \cdot \frac{d\sigma_{post}}{dq} + \frac{d\mathcal{L}}{d\phi_{post}}\cdot\frac{d\phi_{post}}{d\mu_{post}} \cdot \frac{d\mu_{post}}{dq} 
\end{equation}

\begin{equation}
    \frac{d\mathcal{L}}{d\tau} = \frac{d\mathcal{L}}{d\phi_{post}}\cdot\frac{d\phi_{post}}{d\tau}
\end{equation}

In [11]:
def calc_gradients(X,grads,derivatives): 
    
    n = X.shape[1]
    B_grads = [0]*n
    b_grads = 0
    q_grads = 0
    tau_grads = 0
    sd_prior_grads = 0
    
    for i in range(len(X)):
        # Retrieve variables
        dL_dphipost = derivatives['dL_dphi'][i]
        dphipost_dmupost = derivatives['dphipost_dmupost'][i]
        dphipost_dsdpost = derivatives['dphipost_dsdpost'][i]
        dphipost_dtau = derivatives['dphipost_dtau'][i]
        dmupost_dmuprior = derivatives['dmupost_dmuprior'][i]
        dsdpost_dq = derivatives['dsdpost_dq'][i]
        dmupost_dq = derivatives['dmupost_dq'][i]
        dsdpost_dsdprior = derivatives['dsdpost_dsdprior'][i]
        x = X.iloc[i,:].tolist()

        # dL_dB
        #B_grads += dL_dphipost*dphipost_dmupost*dmupost_dmuprior*x
        B_grads = [sum(x) for x in zip(B_grads,np.dot(dL_dphipost*dphipost_dmupost*dmupost_dmuprior,x))]

        # dL_db
        b_grads += dL_dphipost*dphipost_dmupost*dmupost_dmuprior*1

        #dL_dvarprior
        sd_prior_grads += dL_dphipost*dphipost_dsdpost*dsdpost_dsdprior

        # dL_dq
        q_grads += (dL_dphipost*dphipost_dsdpost*dsdpost_dq)+(dL_dphipost*dphipost_dmupost*dmupost_dq)

        # dL_dtau
        tau_grads += dL_dphipost*dphipost_dtau
    
    grads['dB'] = [gr/float(len(X)) for gr in B_grads]
    grads['db'] = b_grads/float(len(X))
    grads['dsdprior'] = sd_prior_grads/float(len(X))
    grads['dq'] = q_grads/float(len(X))
    grads['dtau'] = tau_grads/float(len(X))
    
    return grads

In [None]:
"""
def calc_gradients(grads,derivatives,X): 
    # Retrieve variables
    dL_dphipost = derivatives['dL_dphi']
    dphipost_dmupost = derivatives['dphipost_dmupost']
    dphipost_dvarpost = derivatives['dphipost_dvarpost']
    dphipost_dtau = derivatives['dphipost_dtau']
    dmupost_dmuprior = derivatives['dmupost_dmuprior']
    dvarpost_dq = derivatives['dvarpost_dq']
    dmupost_dq = derivatives['dmupost_dq']
    dvarpost_dvarprior = derivatives['dvarpost_dvarprior']

    # dL_dB
    dL_dB = sum([dphipost*dmupost*dmuprior*x for dphipost,dmupost,dmuprior,x in zip(dL_dphipost,dphipost_dmupost,dmupost_dmuprior,X)])
    
    # dL_db
    dL_db = sum([dphipost*dmupost*dmuprior for dphipost,dmupost,dmuprior in zip(dL_dphipost,dphipost_dmupost,dmupost_dmuprior)])
    
    #dL_dvarprior
    dL_dvarprior = sum([dphipost*dvarpost*dvarprior for dphipost,dvarpost,dvarprior in zip(dL_dphipost,dphipost_dvarpost,dvarpost_dvarprior)])         

    # dL_dq
    dL_dq1 = [dphipost*dvarpost*dq for dphipost,dvarpost,dq in zip(dL_dphipost,dphipost_dvarpost,dvarpost_dq)]
    dL_dq2 = [dphipost*dmupost*dq for dphipost,dmupost,dq in zip(dL_dphipost,dphipost_dmupost,dmupost_dq)]
    dL_dq = sum([x1+x2 for x1,x2 in zip(dL_dq1,dL_dq2)])

    # dL_dtau
    dL_dtau = sum([dphipost*dtau for dphipost,dtau in zip(dL_dphipost,dphipost_dtau)])

    grads = {
            "dB":dL_dB,
            "db":dL_db,
            "dvarprior":dL_dvarprior,
            "dq":dL_dq,
            "dtau":dL_dtau
    }
    
    return grads
"""

**8) Update parameters with gradients**

\begin{equation}
\beta' = \beta - \alpha \frac{d\mathcal{L}}{d\beta} \\
b' = b - \alpha \frac{d\mathcal{L}}{db} \\
q' = q - \alpha \frac{d\mathcal{L}}{dq} \\
var_{prior}' = var_{prior}-\alpha\frac{d\mathcal{L}}{dvar_{prior}} \\
\tau' = \tau - \alpha \frac{dL}{d\tau}
\end{equation}

In [None]:
def update_parameters(parameters, grads, learning_rate):
    """
    Arguments:
    
    Returns:
    parameters -- dictionary containing updated parameters 
    """
    #dB.shape = (dB.shape[0],1)    

    parameters['B'] -= learning_rate*grads['dB']
    parameters['b'] -= learning_rate*grads['db']
    parameters['q'] -= learning_rate*grads['dq']
    parameters['sd_prior'] -= learning_rate*grads['dsdprior']
    parameters['tau'] -= learning_rate*grads['dtau']
    
    return parameters