# Conditional expectation

In [None]:
import pandas as pd
import numpy as np

from tabulate import tabulate

In [None]:
student = ['R', 'S']
test = [1, 2]
test_difficult = [0, 1]

In [None]:
score_df = pd.DataFrame({
    'student': np.tile(student, reps=2),
    'test_number': np.repeat(test, repeats=2),
    'test_difficult': np.repeat(test_difficult, repeats=2),
    'score': [95, 100, 80, 70]
})
tabulate(score_df.sort_values(by=['student', 'test_number']), headers='keys', tablefmt='html')

$$
\mathbf{E} \left[ \text{score} \vert \text{student} = \text{R} \right]
\quad \quad
\text{or}
\quad \quad
\mathbf{E} \left[ \text{score} \vert \text{student} = \text{S} \right]
$$

In [None]:
print_series = score_df.groupby('student')['score'].mean()
tabulate(print_series.to_frame(), headers='keys', tablefmt='html')

Components of conditional expectation:
$$
\begin{aligned}
\mathbb{E} \left[ \text{score} \vert \text{student} = \text{R} \right] 
&= \sum_{s_i \in \{ 95, 80\}} s_i \mathbf{P} (s_i \vert \text{student} = \text{R}) \\
&= 95 \cdot \mathbf{P} (95 \vert \text{student} = \text{R}) + 80 \cdot \mathbf{P} (80 \vert \text{student} = \text{R}) \\
&= 95 \cdot 0.5 + 80 \cdot 0.5 \\
&= 87.5
\end{aligned}
$$


Conditional expectation if the test is hard
$$
\mathbf{E} \left[ \text{score} \vert \text{difficulty} = 0 \right]
\quad \quad
\text{or}
\quad \quad
\mathbf{E} \left[ \text{score} \vert \text{difficulty} = 1 \right]
$$

In [None]:
print_series = score_df.groupby('test_difficult')['score'].mean()
tabulate(print_series.to_frame(), headers='keys', tablefmt='html')


In [None]:
test_3 = pd.DataFrame({
    'student': student,
    'test_number': np.repeat([3], repeats=2),
    'test_difficult': np.repeat([0], repeats=2),
    'score': [95, 70]
})

new_score_df = pd.concat([score_df, test_3])
new_score_df.sort_values(by=['student', 'test_number'])

Components of conditional expectation:
$$
\begin{aligned}
\mathbf{E} \left[ \text{score} \vert \text{student} = \text{R} \right] 
&= \sum_{s \in \{ 95, 80\}} s \mathbf{P} (s \vert \text{student} = \text{R}) \\
&= 95 \cdot \mathbf{P} (95 \vert \text{student} = \text{R}) + 80 \cdot \mathbf{P} (80 \vert \text{student} = \text{R}) \\
&= 95 \cdot \frac{2}{3} + 80 \cdot {1}{3} \\
&= 90
\end{aligned}
$$


In [None]:
print_series = new_score_df.groupby('student')['score'].mean()
tabulate(print_series.to_frame(), headers='keys', tablefmt='html')

Law of iterated expectations:
$$
\mathbf{E} [ \mathbf{E} [Y \vert X]] = \mathbf{E}[Y]
$$

In [None]:
# law of iterated expectations
assert (
    new_score_df.groupby('student')['score'].mean().mean() == new_score_df['score'].mean()
)