This repository has been archived by the owner on Feb 6, 2020. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 0
/
2017_fairml_shpitser_withnotes.tex
executable file
·641 lines (520 loc) · 20.7 KB
/
2017_fairml_shpitser_withnotes.tex
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
\documentclass[12pt,t,handout]{beamer}
\usepackage{graphicx}
\setbeameroption{show notes}
\setbeamertemplate{note page}[plain]
\usepackage{listings}
\usepackage{datetime}
\usepackage{url}
% specifications for presenter mode
\beamerdefaultoverlayspecification{<+->}
\setbeamercovered{transparent}
% math shorthand
\usepackage{bm}
\usepackage{amsmath}
\usepackage{mathtools}
\newcommand{\D}{\mathcal{D}}
\newcommand{\E}{\mathbb{E}}
\newcommand{\F}{\mathcal{F}}
\newcommand{\X}{\mathcal{X}}
\newcommand{\lik}{\mathcal{L}}
\DeclarePairedDelimiterX{\infdivx}[2]{(}{)}{%
#1\;\delimsize\|\;#2%
}
\newcommand{\infdiv}{D\infdivx}
\DeclarePairedDelimiter{\norm}{\lVert}{\rVert}
\DeclareMathOperator*{\argmin}{arg\,min}
\DeclareMathOperator*{\argmax}{arg\,max}
% Bibliography
\usepackage{natbib}
\bibpunct{(}{)}{,}{a}{}{;}
\usepackage{bibentry}
%\nobibliography*
\def\notescolors{1}
\input{header.tex}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% end of header
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% title info
\title{Fair New World}
\subtitle{\scriptsize for the seminar: \textit{Fairness in Machine Learning}, \\
organized by M.~Hardt, Fall 2017, UC Berkeley}
\author{\href{http://nimahejazi.org}{Nima Hejazi}
\\[-10pt]
}
\institute{Division of Biostatistics \\
University of California, Berkeley \\
\href{https://www.stat.berkeley.edu/~nhejazi}
{\tt \scriptsize \color{foreground}
stat.berkeley.edu/\textasciitilde{}nhejazi
}
\\[4pt]
\includegraphics[height=20mm]{Figs/seal-berkeley.png}
\\[-12pt]
}
\date{
\href{http://nimahejazi.org}
{\tt \scriptsize \color{foreground} nimahejazi.org}
\\[-4pt]
\href{https://twitter.com/nshejazi}
{\tt \scriptsize \color{foreground} twitter/@nshejazi}
\\[-4pt]
\href{https://github.com/nhejazi}
{\tt \scriptsize \color{foreground} github/nhejazi}
}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{document}
% title slide
{
\setbeamertemplate{footline}{} % no page number here
\frame{
\titlepage
\vspace{-1em}
\centerline{\href{https://goo.gl/8RWEy5}{\tt \scriptsize
\underline{slides}: goo.gl/8RWEy5}}
\vspace{-1.5em}
\vfill \hfill \includegraphics[height=6mm]{Figs/cc-zero.png} \vspace*{-0.5cm}
\note{This slide deck is for a reading group presentation on the manuscript
``Fair Inference on Outcomes'' (Rabi \& Shpitser, 2017), for the seminar on
``Fairness in Machine Learning'', organized in Fall 2017 by Moritz Hardt, at
the University of California, Berkeley.
Source: {\tt https://github.com/nhejazi/talk\_fair-outcomes} \\
Slides: {\tt https://goo.gl/i3CxL9} \\
With notes: {\tt https://goo.gl/8RWEy5}
}
}
}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[c]{Preview: Summary}
\only<1>{\addtocounter{framenumber}{-1}}
\begin{center}
\begin{itemize}
\itemsep6pt
\item Mediation analysis provides a framework under which intuitive
definitions of fairness may be expressed.
\item ``Fair inference'' is analogous to causal inference, except in that the
counterfactuals explored refer to a ``fair'' world (n.~b., intentionally
vague).
\item Fairness may be characterized as the absence (or dampening) of a
\textbf{path-specific effect (PSE)}.
\item Restriction of a PSE is easily expressed as a likelihood maximization
problem that features contraining the magnitude of the undesirable PSE.
\item This approach to fairness avoids throwing away information (i.e.,
``fairness through unawareness'') but leaves the definition of fairness to
the analyst.
\end{itemize}
\end{center}
\note{
We'll go over this summary again at the end of the talk. Hopefully, it will make
more sense then.
}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[c]{Preliminaries: Notation}
\begin{center}
\begin{itemize}
\itemsep10pt
\item Data $\D = (Y, \bm{X})$; outcome $Y$ and feature vector $\bm{X}$.
\item Sensitive features: $S \in \bm{X}$, where inference on $Y$ using $S$
\textit{might} result in discrimination.
\item Treatment variable: $A \in \bm{X}$.
\item Mediator variables: $M \in \bm{X}$ or $\bm{M} \subseteq \bm{X}$.
\item Potential outcome: $Y(a)$, realization of $Y$ under $A = a$.
\end{itemize}
\end{center}
\note{
}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[c]{Preliminaries: Mediation Analysis}
\begin{center}
\begin{itemize}
\itemsep10pt
\item \textbf{Goal:} understand the mechanism by which $A$ influences $Y$.
\item Decompose the \textbf{ACE} into \textit{direct} and \textit{indirect}
effects mediated by a variable $M$.
\item Partition feature space $\bm{X}$ into $A$ (treatment), $M$ (mediator),
and $\bf{C} = \bm{X}$ \textbackslash \hspace{0.05em} $\{A, M\}$ (baseline
factors).
\item Counterfactual contrasts are expressed via \textit{nested} potential
outcomes (i.e., $Y(a, M(a'))$).
\end{itemize}
\end{center}
\note{
\begin{itemize}
\item Nested potential outcomes read as ``the outcome $Y$ if $A$ were set
to $a$ while $M$ were set to whatever value it would have attained had
$A$ been set to $a'$''.
\end{itemize}
}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[c]{The Average Causal Effect (ACE)}
\begin{center}
\begin{itemize}
\itemsep10pt
\item $\text{ACE} = \E[Y(a)] - \E[Y(a')]$
\item Not computed via $\E[Y \mid A],$ as associations between $A$ and $Y$ may
be ``partly causal'' or spurious.
\item Decomposition: $\text{ACE} = \text{NDE} + \text{NIE},$ where
\textbf{NDE} is the \textit{Natural Direct Effect} and \textbf{NIE} is the
\textit{Natural Indirect Effect}.
\end{itemize}
\begin{equation*}
\begin{split}
\text{ACE} & = \E[Y(a)] - \E[Y(a')] \\ & = \E[Y(a)] - \E[Y(a, M(a')] \\ & +
\E[(Y(a, M(a')] - \E[Y(a')]
\end{split}
\end{equation*}
\end{center}
\note{
\begin{itemize}
\item Decomposition of the ACE gives us a way to express undesirable PSEs
using mediators
\item The decomposition is just by way of a telescoping sum arguemnt.
\end{itemize}
}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[c]{The Natural \textit{Direct} Effect (NDE)}
\begin{center}
\begin{itemize}
\itemsep10pt
\item Comparison of the mean outcome under only the part of the treatment that
directly affects it ($A = a$) and the placebo treatment (i.e., $A = a'$).
\item Note that the \textit{indirect} effect of the treatment (through the
mediator $M$) is ``turned off'' (i.e., $M(A = a')$).
\end{itemize}
\vspace{1em}
\begin{definition}{Natural \textbf{Direct} Effect}
$$\text{NDE} = \E[(Y(a, M(a')] - \E[Y(a')]$$
\end{definition}
\end{center}
\note{
With additional causal assumptions, the NDE is identified as the
\begin{definition}{Mediation formula}
$$
\sum_{\bm{C}, M} (\E[Y \mid a, M, \bm{C}] - \E[Y \mid a', M, \bm{C}])
p(M \mid a', \bm{C}) p(\bm{C})
$$
\end{definition}
\begin{itemize}
\item Estimation may be performed using plug-in estimators.
\end{itemize}
}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[c]{The Natural \textit{Indirect} Effect (NIE)}
\begin{center}
\begin{itemize}
\itemsep10pt
\item Comparison of the outcome affected by all treatment (both direct and
indirect) and the outcome where the effect through the mediator ($M$) is
``turned off'' (i.e., $M(A = a')$).
\item Although in a roundabout manner, this quantity gets at the effect of the
path-specific effect through the mediator on the outcome.
\end{itemize}
\vspace{1em}
\begin{definition}{Natural \textbf{Indirect} Effect}
$$\text{NIE} = \E[Y(a)] - \E[(Y(a, M(a')]$$
\end{definition}
\end{center}
\note{
}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[c]{Example: \textit{Thank You for Smoking}}
For a better intuition of $Y(a, M(a'))$, consider the following:
\begin{center}
\begin{itemize}
\itemsep4pt
\item Let $Y$ be a health outcome (e.g., survival probability), $A$ be a
treatment (e.g., smoking).
\item Consider a decomposition of the effect of $A$ on $Y$ --- that is, let
$M$ be a mediator (e.g., cancer).
\item $A$ affects $Y$ directly (nicotine exposure) and indirectly (inducing
lung cancer, through $M$).
\item Here, $Y(a, M(a'))$ correponds to ``the response of $Y$ to an
intervention that sets the nicotine exposure (direct effect) to what it
would be in smokers, and the smoke exposure (indirect effect) to what it
would be in non-smokers'' (e.g., nicotine patch).
\end{itemize}
\end{center}
\note{
}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[c]{Path-Specific Effects}
\begin{center}
\begin{itemize}
\itemsep10pt
\item A more general idea than the NDE and NIE --- such effects are easily
formulated as nested counterfactuals.
\item \textit{Intuition}: along a path of interest, all nodes behave as if the
active rule were imposed (i.e., $A = a$) while, along all other paths, nodes
behave as though the alternative were the case (i.e., $A = a'$).
\end{itemize}
\vspace{1em}
\begin{definition}{Path-Specific Effect (PSE)}
\\
\vspace{1em}
(Along a path, say $A \rightarrow W \rightarrow Y$)
$$\E[Y(a', W(M(a'), a), M(a'))] - \E[Y(a')]$$
\end{definition}
\end{center}
\note{
With more complex assumptions, we get the
\begin{definition}{Edge G-formula}
$$
\sum_{\bm{C}, M, W} \E[Y \mid a', W, M, \bm{C}] p(W \mid a, M, \bm{C})
p(M \mid a', \bm{C}) p(\bm{C})
$$
\end{definition}
\begin{itemize}
\item Estimation may be performed using plug-in estimators.
\end{itemize}
}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[c]{Finding Fairness}
\begin{center}
\begin{itemize}
\itemsep4pt
\item Much work has focused on defining fairness via associative relationships
(including equalized odds). Such criteria provided unintuive results when
the sensitive feature is not randomly assigned.
\item Here, an approach that ought to provide intuitive results (wrt
fairness), even when the sensitive attribute is associated with the outcome
(perhaps by way of an unobserved feature), is proposed.
\item Associative fairness metrics fail to properly model sources of
confounding (between $S$ and $Y$).
\item Generally, this failure is rooted in the fact that ``counterfactual
probabilities are complex functions of the observed data, no just
conditional densities.''
\end{itemize}
\end{center}
\note{
To see the problem with associative fairness criteria, consider the following
example (see section 4, paragraph 3 of paper for details):
\begin{itemize}
\itemsep10pt
\item Letting $p(H \mid C, G)$ be a hiring rule, we would assess fairness as
follows: $p(H = 1 \mid C = 1) = p(H = 1 \mid C = 0) \approx 0.022$, using
associative fairness criteria.
\item Intuitively, fairness of a hiring rule would lead to equal hiring
probabilities for cases and controls in a hypothetical randomized trial
(RCT) --- i.e., $p(H(C = 1)) = p(H(C = 0))$.
\item In our example, application of the adjustment formula would yield $p(H(C
= 1)) = 0.025$ and $p(H(C = 0)) = 0.25$, a rather striking difference.
\item The difference between $p(H(C = 0))$ and $p(H \mid C = 0)$ is driven by
extreme values of $p(C \mid G)$.
\item This is basically Simpson's paradox, or rather close to it.
\end{itemize}
}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[c]{In Pursuit of ``Fair Inference''}
\begin{center}
\begin{itemize}
\itemsep10pt
\item Fairness is, at its core, rooted in counterfactuals. Thus, we can see
\textit{``fair inference''} as a branch of causal inference wherein the
counterfactuals to be considered are with respect to a ``fair'' world.
\item \textit{Discrimination} may be expressed as the presence of a particular
PSE, with choice of the specific PSE left as a domain-specific issue.
\item Thus, minimization of specific PSEs corresponds to minimizing
discrimination and is a problem of constrained inference on statistical
models.
\end{itemize}
\end{center}
\note{
\textit{From the legal literature:} \\
``The central question in any employment-discrimination case is whether the
employer would have taken the same action had the employee been of a different
race (age, sex, religion, national origin, etc.) and everything else had been
the same.''
}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[c]{Fairness as PSE Minimization}
\begin{center}
\begin{itemize}
\itemsep10pt
\item Let $p(Y, \bf{X})$ be a statistical model, assumed to be induced by a
\textit{causal model}.
\item Discrimination (wrt $Y$ based on $S \in \bm{X}$) in this model is a PSE,
identified as the functional $f(p(Y, \bm{X}))$.
\item Let $(\epsilon_l, \epsilon_u)$ be lower and upper bounds on the PSE,
giving the degree of unfairness considered tolerable (n.b., the PSE is
removed in the special case $\epsilon_l = \epsilon_u$).
\item \underline{\textbf{Proposal}}: transform $p(Y, \bf{X})$ into $p^*(Y,
\bm{X})$ under the constraint that the PSE of interest lies within
$(\epsilon_l, \epsilon_u)$, where the two distributions are close in the
sense of KL-divergence.
\end{itemize}
\end{center}
\note{
\begin{definition}{Kullback-Leibler Divergence}
$$\infdiv{P}{Q} = \int_{-\infty}^{\infty} p(x) \text{log} \frac{p(x)}{q(x)}
\text{dx}$$
\end{definition}
\begin{itemize}
\item Why do we restrict ourselves to KL-divergence?
\item Well, as we'll see shortly, our approach amounts to a constrained
likelihood maximization problem. This makes KL-divergence a natural choice.
\item Were we to consider formulating our fairness metric differently, we
could use other distance metrics (e.g., Hellinger distance, total variation
distance).
\end{itemize}
}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[c]{Finding Fair Worlds I}
\begin{center}
\begin{itemize}
\itemsep10pt
\item \underline{\textbf{Proposal}}: We can make \textit{any} function of $p$
\textbf{\textit{fair}}, merely by computing it from $p^*$ (instead of from
$p$).
\item To ensure fairness, we must make inference only in the ``fair world'',
just as we only perform inference on counterfactuals in causal inference.
\item To do this, map any $x^i$ from $p$ to a sensible version of it drawn
from $p^*$ --- i.e., find a $g: x^i_{p} \mapsto x^i_{p^*}$.
\item I want to be fair, so what exactly do I do?
\end{itemize}
\end{center}
\note{
\begin{itemize}
\item The intuition here is simply that $p^*$ includes all the same
information that was found in $p$, except that information that leads to
discrimination (is used in the unfavorable PSE).
\item We'll shortly see what it means to construct a fair world $p^*$.
\end{itemize}
}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[c]{Finding Fair Worlds II}
\begin{center}
\begin{itemize}
\itemsep10pt
\item Consider the following general setup:
\begin{itemize}
\itemsep3pt
\item finite samples $\D$ drawn from $p(Y, \bm{X})$
\item a likelihood function $\lik_{Y, \bm{X}}(\D; \alpha)$
\item a discriminative PSE $f(p(Y, \bf{X}))$ with bounds $(\epsilon_l,
\epsilon_u)$
\item an estimator of the PSE $g(\D)$.
\end{itemize}
\item We obtain fairness by solving:
$$\hat{\alpha} = \argmax_{\alpha} \lik_{Y, \bm{X}} (\D; \alpha),$$
subject to $\epsilon_l \leq g(\D) \leq \epsilon_u.$
\item In this setup, fairness is achieved by constraining parts of $p(Y,
\bm{X},; \alpha),$ with the choice of $g$ determining exactly what is
constrained.
\end{itemize}
\end{center}
\note{
\begin{itemize}
\item Note that knowing $p$ and $p^*$ exactly would make the problem rather
easy, since we would merely use $x_{\bm{W}}^i,$ where $\bm{W}$ is simply the
largest subset of $X$ that contrains the PSE in a desirable manner (i.e.,
where $p(\bm{W}) = p^*(\bm{W})$.
\item Further, it's important to consider ``large'' statistical models ---
perhaps infinite-dimensional models? --- in order to ensure that prediction
(and inference, if so desired) can be optimized with respect to
out-of-sample observations.
\item In regard to both prediction and inference, it turns out that most
estimators that rely on the outcome ($Y$) model are not robust to its
misspecification. Thus, it is important to either use simpler (IPW)
estimators or more modern approaches (e.g., \textit{triply robust}
estimators).
\end{itemize}
}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[c]{Fairness is (Partial?) (Un)Awareness}
\begin{center}
\begin{itemize}
\itemsep10pt
\item Since using all of the information contained in $p$ leads to unfairness,
this approach amounts to discarding information that is exclusively in $p$,
relative to $p^*$.
\item The goal of this approach is to use the available information as well as
possible, but only in so far as our inferences are drawn from the ``fair
world.''
\item In this approach, fairness is characterized as the \textit{a priori}
\underline{inadmissability} of certain paths in the DAG of interest --- that
is, paths other than a single edge path might cause discrimination.
\end{itemize}
\end{center}
\note{
\begin{itemize}
\item Consider the experiment of the authors using the ``adult'' dataset:
here, there is concern about the direct effect of gender on income class as
well as the effect of gender on income class through marital status. Using
the proposed constrained optimization approach, they are able to avoid
dropping sensitive variables from the analysis while improving upon the
efficacy of ``blind'' models.
\item Specifically, they estimate the PSE in the unconstrained model to be
$3.16$, and then restric the PSE to lie within $(0.95, 1.00)$ in the
``fair'' (constrained) model. The ``fair'' model achives accuracy of $72\%$
while the ``blind'' model only achieves $42\%$ (the unconstrained model does
better, with an accuracy of $82\%$).
\end{itemize}
}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[c]{Review: Summary}
\begin{center}
\begin{itemize}
\itemsep6pt
\item Mediation analysis provides a framework under which intuitive
definitions of fairness may be expressed.
\item ``Fair inference'' is analogous to causal inference, except in that the
counterfactuals explored refer to a ``fair'' world (n.~b., intentionally
vague).
\item Fairness may be characterized as the absence (or dampening) of a
\textbf{path-specific effect (PSE)}.
\item Restriction of a PSE is easily expressed as a likelihood maximization
problem that features contraining the magnitude of the undesirable PSE.
\item This approach to fairness avoids throwing away information (i.e.,
``fairness through unawareness'') but leaves the definition of fairness to
the analyst.
\end{itemize}
\end{center}
\note{
It's always good to include a summary. You've seen this all before.
}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% don't want dimming with references
\setbeamercovered{}
\beamerdefaultoverlayspecification{}
\begin{frame}[c,allowframebreaks]{References}
\bibliographystyle{apalike}
\nocite{*}
\bibliography{references}
%\note{Here's some work we've talked about. Go check these out if interested.}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[c]{Thank you.}
\Large
Slides: \href{https://goo.gl/i3CxL9}{goo.gl/i3CxL9} \quad
\includegraphics[height=5mm]{Figs/cc-zero.png}
\vspace{3mm}
Notes: \href{https://goo.gl/8RWEy5}{goo.gl/8RWEy5}
\vspace{3mm}
Source (repo): \href{https://goo.gl/qJSoz6}{goo.gl/qJSoz6}
\vspace{3mm}
\href{https://www.stat.berkeley.edu/~nhejazi}{\tt
stat.berkeley.edu/\textasciitilde{}nhejazi}
\vspace{3mm}
\href{http://nimahejazi.org}{\tt nimahejazi.org}
\vspace{3mm}
\href{https://twitter.com/nshejazi}{\tt twitter/@nshejazi}
\vspace{3mm}
\href{https://github.com/nhejazi}{\tt github/nhejazi}
\note{Here's where you can find me, as well as the materials from this talk.}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\end{document}