# Setup

In [None]:
import arviz as az
import numpy as np
import pandas as pd
import pymc3 as pm
import matplotlib.pyplot as plt
import scipy.stats as st
import seaborn as sns

from typing import List

# Problem

Describe problem here. 

See e.g.:

* [Wiki](https://en.wikipedia.org/wiki/German_tank_problem)
* [Example 1](https://www.isaacslavitt.com/2015/12/19/german-tank-problem-with-pymc-and-pystan/)
* [Example 2](http://mcleonard.github.io/sampyl/examples/german_tank_problem.html)

## Generating data

In [None]:
# Defining data
def generate_data(true_N: int, size: int, save: bool=True):
    """Generate CSV file containing tank serial numbers"""
    observations = np.random.randint(low=1, high=true_N, size=size)
    data = pd.DataFrame({"serial_no": observations})
    if save:
        data.to_csv("german_tanks.csv", index=False)
    return data

# Generate data
captured_tanks = 4
german_records = 369
data = generate_data(true_N=german_records, size=captured_tanks)

data

## Sampling from the posterior

In [None]:
with pm.Model() as model:
    # Defining prior distribution
    N = pm.DiscreteUniform("num_tanks", lower=data["serial_no"].max(), upper=10000)
    # Defining likelihood
    likelihood = pm.DiscreteUniform("observed_sn", lower=1, upper=N, observed=data["serial_no"])
    
    # Defining MCMC algorithm (NOTE: Discrete data --> Metropolis)
    step = pm.Metropolis()
    # Defining initial values
    start = {"num_tanks": data["serial_no"].max()}
    
    # Sampling
    trace = pm.sample(draws=100000, step=step, start=start)
    # Discarding the burn-in period
    burned_trace = trace[10000:]

## Checking convergence

In [None]:
# Plotting trace
az.plot_trace(burned_trace)
plt.show()

According to our model, the expected number of tanks is approximately 369.

In [None]:
# Plotting posterior distribution
az.plot_posterior(burned_trace)
plt.show()

# Printing summary of posterior distribution

In [None]:
az.summary(burned_trace)

In [None]:
np.median(burned_trace["num_tanks"])

# Kladd av custom loss, etc.

In [None]:
def loss(guess, sample, risk=500):
    loss = np.zeros_like(sample)
    idx = guess < sample
    loss[idx] = risk * (guess - sample[idx])**2
    loss[~idx] = risk * np.abs(guess - sample[~idx])
    return loss

In [None]:
guesses = np.array([100*i for i in range(11)])

In [None]:
results = [loss(g, burned_trace["num_tanks"]).mean() for g in guesses]
plt.plot(guesses, results)
plt.show()

In [None]:
def deaths(samples):
    return 5*np.log(samples)

In [None]:
x = deaths(burned_trace["num_tanks"])

In [None]:
plt.hist(x, density=True)

In [None]:
((x <= 30) & (x >= 20)).mean()