# Homework Week 05

Material from Chapters 8 and 9 (Conditional Manatees and Markov Chain Monte Carlo)

## Data and preliminaries

In [1]:
import numpy as np
import pandas as pd
import arviz as az
import matplotlib.pyplot as plt
import pymc3 as pm
from sklearn import preprocessing

import warnings
warnings.simplefilter(action="ignore", category=FutureWarning)  #suppress Arviz warnings

In [2]:
d = pd.read_csv("Data/Wines2012.csv",
               sep=";")
d.head()

Unnamed: 0,judge,flight,wine,score,wine.amer,judge.amer
0,Jean-M Cardebat,white,A1,10.0,1,0
1,Jean-M Cardebat,white,B1,13.0,1,0
2,Jean-M Cardebat,white,C1,14.0,0,0
3,Jean-M Cardebat,white,D1,15.0,0,0
4,Jean-M Cardebat,white,E1,8.0,1,0


### Standardize score

In [3]:
d["score_s"] = d["score"]/d["score"].max() # standardize on interval [0,1]

### Create index variables for judge and wine

First, change data type to category

In [4]:
d["judge"] = d["judge"].astype("category")
d["wine"] = d["wine"].astype("category")

In [5]:
dict(enumerate(d["judge"].cat.categories))

{0: 'Daniele Meulder',
 1: 'Francis Schott',
 2: 'Jamal Rayyis',
 3: 'Jean-M Cardebat',
 4: 'John Foy',
 5: 'Linda Murphy',
 6: 'Olivier Gergaud',
 7: 'Robert Hodgson',
 8: 'Tyler Colman'}

In [6]:
n_judge = len(d["judge"].cat.categories)
n_wine = len(d["wine"].cat.categories)
n_judge, n_wine

(9, 20)

Then, create new columns creating just the index codes

In [7]:
d["judge_idx"] = d["judge"].cat.codes
d["wine_idx"] = d["wine"].cat.codes

In [None]:
d

## Question 1

> In this first problem, consider only variation among judges and wines.
Construct index variables of judge and wine and then use these index variables
to construct a linear regression model. Justify your priors. You should
end up with 9 judge parameters and 20 wine parameters.

In [9]:
with pm.Model() as q1_m1:
    a_wine = pm.Normal("a_wine", 0.5, 0.2, shape=n_wine)
    a_judge = pm.Normal("a_judge", 0.5, 0.2, shape=n_judge)
    mu = a_judge[d["judge_idx"]] + a_wine[d["wine_idx"]]
    sigma = pm.Exponential("sigma", 1)
    score = pm.Normal("score", mu, sigma, observed=d["score_s"])
    q1_m1_trace = pm.sample(1000)

Auto-assigning NUTS sampler...
Initializing NUTS using jitter+adapt_diag...
Multiprocess sampling (2 chains in 2 jobs)
NUTS: [sigma, a_judge, a_wine]


Sampling 2 chains for 1_000 tune and 1_000 draw iterations (2_000 + 2_000 draws total) took 13 seconds.
The number of effective samples is smaller than 25% for some parameters.


In [10]:
with q1_m1:
    q1_m1_res = az.summary(q1_m1_trace, round_to=2)
q1_m1_res

Unnamed: 0,mean,sd,hdi_3%,hdi_97%,mcse_mean,mcse_sd,ess_mean,ess_sd,ess_bulk,ess_tail,r_hat
a_wine[0],0.43,0.05,0.33,0.54,0.0,0.0,354.75,354.75,354.73,645.79,1.01
a_wine[1],0.43,0.05,0.33,0.52,0.0,0.0,383.48,383.48,384.97,802.79,1.01
a_wine[2],0.45,0.05,0.35,0.55,0.0,0.0,348.31,348.31,348.16,693.22,1.01
a_wine[3],0.49,0.05,0.39,0.59,0.0,0.0,379.07,378.12,378.41,878.99,1.0
a_wine[4],0.4,0.05,0.3,0.51,0.0,0.0,378.11,378.11,377.52,766.53,1.0
a_wine[5],0.36,0.05,0.26,0.46,0.0,0.0,357.4,357.4,358.86,752.9,1.01
a_wine[6],0.46,0.05,0.36,0.55,0.0,0.0,351.85,351.85,352.63,703.88,1.01
a_wine[7],0.45,0.05,0.35,0.56,0.0,0.0,387.95,387.95,389.28,642.83,1.0
a_wine[8],0.42,0.05,0.32,0.52,0.0,0.0,358.74,358.74,358.25,813.49,1.01
a_wine[9],0.43,0.05,0.33,0.53,0.0,0.0,373.46,373.46,370.74,694.11,1.0


### Intpretation

Some wines are higher rated than others.  Some judged appear to score higher than others.

In [15]:
d.loc[d["judge_idx"] == 8, "score_s"].mean()

0.6743589743589743

Looking at judge means, they are consistent (ordinally) with mean scores.

In [18]:
d.groupby("judge_idx")["score_s"].mean()

judge_idx
0    0.684615
1    0.761538
2    0.760256
3    0.643590
4    0.852564
5    0.802564
6    0.748718
7    0.625641
8    0.674359
Name: score_s, dtype: float64

## Question 2

>Use indicator or index variables to model the influence of these features on
the scores. Omit the individual judge and wine index variables from Problem 1. Do not include interaction effects yet. Again use ulam, justify your priors,
and be sure to check the chains. What do you conclude about the differences
among the wines and judges? Try to relate the results to the inferences in
Problem 1.

### Create additional index variables

In [22]:
d["flight"] = d["flight"].astype("category")
n_flight = len(d["flight"].cat.categories)
dict(enumerate(d["flight"].cat.categories))

{0: 'red', 1: 'white'}

In [21]:
d["flight_idx"] = d["flight"].cat.codes

### Create model

Again, this is an intercept-only model:

In [23]:
with pm.Model() as q2_m1:
    a_flight = pm.Normal("a_flight", 0.5, 0.2, shape=n_flight)
    a_wineA = pm.Normal("a_wineA", 0.5, 0.2, shape=2)
    a_judgeA = pm.Normal("a_judgeA", 0.5, 0.2, shape=2)
    mu = a_flight[d["flight_idx"]] + a_wineA[d["wine.amer"]] + a_judgeA[d["judge.amer"]]
    sigma = pm.Exponential("sigma", 1)
    score = pm.Normal("score", mu, sigma, observed=d["score_s"])
    q2_m1_trace = pm.sample(1000)

Auto-assigning NUTS sampler...
Initializing NUTS using jitter+adapt_diag...
Multiprocess sampling (2 chains in 2 jobs)
NUTS: [sigma, a_judgeA, a_wineA, a_flight]


Sampling 2 chains for 1_000 tune and 1_000 draw iterations (2_000 + 2_000 draws total) took 21 seconds.


In [24]:
with q2_m1:
    q2_m1_res = az.summary(q2_m1_trace, round_to=2)
q2_m1_res

Unnamed: 0,mean,sd,hdi_3%,hdi_97%,mcse_mean,mcse_sd,ess_mean,ess_sd,ess_bulk,ess_tail,r_hat
a_flight[0],0.24,0.11,0.04,0.45,0.0,0.0,831.16,784.13,830.41,1021.18,1.0
a_flight[1],0.24,0.11,0.03,0.45,0.0,0.0,835.63,796.14,836.11,1030.17,1.0
a_wineA[0],0.26,0.12,0.06,0.48,0.0,0.0,774.08,774.08,778.22,850.15,1.0
a_wineA[1],0.23,0.12,0.03,0.45,0.0,0.0,773.83,773.83,778.22,820.52,1.0
a_judgeA[0],0.23,0.11,0.04,0.44,0.0,0.0,699.22,699.22,700.23,837.05,1.0
a_judgeA[1],0.26,0.11,0.07,0.48,0.0,0.0,705.39,705.39,706.39,750.07,1.0
sigma,0.14,0.01,0.12,0.15,0.0,0.0,1252.77,1247.34,1260.73,1079.69,1.0


### Interpretation

Hard to make sense of this

## Question 3

>Now consider two-way interactions among the three features. You should
end up with three different interaction terms in your model. These will be
easier to build, if you use indicator variables. 

>Explain what each interaction means.
Be sure to interpret the model’s predictions on the outcome scale (mu, the
expected score), not on the scale of individual parameters. You can use link
to help with this, or just use your knowledge of the linear model instead.
What do you conclude about the features and the scores? Can you relate
the results of your model(s) to the individual judge and wine inferences from
Problem 1?

In [53]:
flights = d["flight_idx"].values
wine_amer = d["wine.amer"].values
judge_amer = d["judge.amer"].values

In [61]:
with pm.Model() as q3_m1:
    a_flight = pm.Normal("a_flight", 0.5, 0.2, shape=n_flight)
    a_wineA = pm.Normal("a_wineA", 0.5, 0.2, shape=2)
    a_judgeA = pm.Normal("a_judgeA", 0.5, 0.2, shape=2)
    
    #interactions
    a_fj = pm.Normal("a_fj", 0, 0.2, shape=(n_flight,2))
    a_fw = pm.Normal("a_fw", 0, 0.2, shape=(n_flight,2))
    a_jw = pm.Normal("a_jw", 0, 0.2, shape=(2,2))
    
    #model
    mu = a_flight[d["flight_idx"]] + a_wineA[d["wine.amer"]] + a_judgeA[d["judge.amer"]] + a_fj[d["flight_idx"], d["judge.amer"]]  + a_fw[d["flight_idx"], d["wine.amer"]] + a_jw[d["judge.amer"], d["wine.amer"]]
    sigma = pm.Exponential("sigma", 1)  
    score = pm.Normal("score", mu, sigma, observed=d["score_s"])
    q3_m1_trace = pm.sample(1000)

Auto-assigning NUTS sampler...
Initializing NUTS using jitter+adapt_diag...
Multiprocess sampling (2 chains in 2 jobs)
NUTS: [sigma, a_jw, a_fw, a_fj, a_judgeA, a_wineA, a_flight]


Sampling 2 chains for 1_000 tune and 1_000 draw iterations (2_000 + 2_000 draws total) took 54 seconds.


In [59]:
with q3_m1:
    q3_m1_res = az.summary(q3_m1_trace, round_to=2)
q3_m1_res

Unnamed: 0,mean,sd,hdi_3%,hdi_97%,mcse_mean,mcse_sd,ess_mean,ess_sd,ess_bulk,ess_tail,r_hat
a_flight[0],0.32,0.16,0.03,0.62,0.0,0.0,1870.59,1785.96,1873.91,1298.02,1.0
a_flight[1],0.32,0.16,0.02,0.63,0.0,0.0,2332.09,2028.72,2327.63,1448.01,1.0
a_wineA[0],0.34,0.16,0.05,0.64,0.0,0.0,2062.21,1757.3,2073.84,1658.37,1.0
a_wineA[1],0.33,0.16,0.03,0.64,0.0,0.0,1773.17,1691.97,1775.26,1493.61,1.0
a_judgeA[0],0.32,0.17,-0.02,0.6,0.0,0.0,1858.13,1674.16,1860.5,1554.6,1.0
a_judgeA[1],0.34,0.16,0.05,0.64,0.0,0.0,2125.07,2002.59,2130.37,1551.13,1.0
"a_fj[0,0]",-0.09,0.16,-0.38,0.2,0.0,0.0,1737.95,1409.39,1733.88,1375.62,1.0
"a_fj[0,1]",-0.08,0.15,-0.36,0.21,0.0,0.0,1770.68,1435.53,1778.66,1531.03,1.0
"a_fj[1,0]",-0.09,0.15,-0.37,0.2,0.0,0.0,1725.13,1181.14,1731.81,1177.07,1.0
"a_fj[1,1]",-0.08,0.16,-0.37,0.21,0.0,0.0,2071.48,1337.98,2066.67,1292.88,1.0


### Interpretation

Yikes, hard to interpret.  Basically, I see no interaction between American wines and American judges.