In [75]:
# Load packages used in this notebook
import os
import json
import pandas as pd

from cmdstanpy import CmdStanModel, cmdstan_path, install_cmdstan

import arviz as az

import bebi103

import bokeh
from bokeh.io import output_notebook, show

%load_ext pycodestyle_magic

%pycodestyle_on

# Glicko2

Final project for Alp Küçükelbir's Machine Learning Probabilistic Programming (COMS6998) course.

**Box's Loop**

![Box's Loop](./boxsloop.png)


## 1. Data

## 2. Model

In [None]:
1. MCMC convergence diagnosis (rhat, pacf, acf, trace plots etc..) - compact şekilde yap talo yap rhat avg koy vb... (mert)
2. VI (OZAN)
3. farklı inference algo için test metriclerini değerlendirmek (ozan)
4. MAPi run et Orjinal inference algoritması check et - ekle convergence a bak (mert)
orjinal infernece algoritmasını nasıl crit edeceğiz? convergence bakabiliriz belki?

ilk set - convergence diagnositcs
ikinci set - kapıştırma

The model we work is based on a popular Bayesian statistical model called Glicko2, an extension of Glicko model, which is the first Bayesian rating model that models uncertainity of player merits in a time dependent manner. In Glicko2, the latent merit/skills of each player are drawn from a Normal distribution, and the variance of normal distribution are drawn from lognormal distribution. Morover, the probability of winning is modeled by a logistic curve. 

In [97]:
glicko_stan = './glicko/glicko.stan'

chess_data = './glicko/chess.data.json'

glicko_stan = CmdStanModel(stan_file=glicko_stan)

with bebi103.stan.disable_logging():
    glicko_mcmc = glicko_stan.sample(
        data=chess_data,
        chains=4,
        iter_sampling=1000,
        adapt_delta=0.95,
        seed=123,
    )

INFO:cmdstanpy:compiling stan program, exe file: /home/mk4139/Glicko2_/glicko/glicko
INFO:cmdstanpy:compiler options: stanc_options={}, cpp_options={}
INFO:cmdstanpy:compiled model file: /home/mk4139/Glicko2_/glicko/glicko


In [119]:
samples = az.from_cmdstanpy(
    posterior=glicko_mcmc,
    posterior_predictive=["score_pp"],
)

In [125]:
ell_ppc = samples.posterior_predictive[
    "score_pp"]

In [129]:
ell_ppc[0] #(chain, sample_size, game_size)


(1000, 15)

Posterior Predictice Checks:

1. Is there a statistically significant difference between number of wins for each player on average?
2. Is there a statistically significant difference wins and losses, on average, in terms of games?
3. Is there a statistically significant difference between average number of wins in each period?


In [106]:
ell_ppc = ell_ppc.transpose('sample', 'score_pp_dim_0')

In [118]:
ell_ppc.shape

(4000, 15)

In [115]:
bokeh.io.show(
    bebi103.viz.predictive_ecdf(
        ell_ppc,
        percentiles=[30, 50, 70, 99],
        data=json.load(open(chess_data))['score'],
        x_axis_label='spindle length [µm]'
    )
)

In [116]:
bokeh.io.show(
    bebi103.viz.predictive_ecdf(
        ell_ppc,
        percentiles=[30, 50, 70, 99],
        data=json.load(open(chess_data))['score'],
        diff=True,
        x_axis_label='spindle length [µm]'
    )
)



In [None]:
bokeh.io.show(
    bebi103.viz.predictive_regression(
        ell_ppc,
        samples_x=d,
        percentiles=[30, 50, 70, 99],
        data=np.vstack((d, ell)).transpose(),
        x_axis_label='droplet diameter [µm]',
        y_axis_label='spindle length [µm]'
    )
)

In [35]:
output_notebook()

bokeh.io.show(
    bebi103.viz.trace(
        samples,
        parameters=["gamma[2,2]"]
    )
)

In [41]:
bokeh.io.show(bebi103.viz.corner(samples, parameters=["gamma[2,2]"]))

In [47]:
ell_ppc = samples_posterior_pred.posterior_predictive[
    "scores"
].stack({"sample": ("chain", "draw")})

AttributeError: 'InferenceData' object has no attribute 'posterior_predictive'

In [43]:
print(az.rhat(samples))

<xarray.Dataset>
Dimensions:         (gamma_dim_0: 10, gamma_dim_1: 13, sigma_sq_dim_0: 10, sigma_sq_dim_1: 13)
Coordinates:
  * gamma_dim_0     (gamma_dim_0) int64 0 1 2 3 4 5 6 7 8 9
  * gamma_dim_1     (gamma_dim_1) int64 0 1 2 3 4 5 6 7 8 9 10 11 12
  * sigma_sq_dim_0  (sigma_sq_dim_0) int64 0 1 2 3 4 5 6 7 8 9
  * sigma_sq_dim_1  (sigma_sq_dim_1) int64 0 1 2 3 4 5 6 7 8 9 10 11 12
Data variables:
    gamma           (gamma_dim_0, gamma_dim_1) float64 1.004 1.002 ... 1.006
    sigma_sq        (sigma_sq_dim_0, sigma_sq_dim_1) float64 1.002 ... 1.011
    tau_sq          float64 1.023
    beta            float64 1.003
    rho             float64 1.023


## 3. Inference

### 1. MCMC

In [None]:
glicko_fit = glicko_stan.sample(data=chess_data, seed=123)

### 2. VI

## 4. Criticism

In [7]:
import numpy as np

In [10]:
!pip install numpy

