# Server model steps

This code implements a simple PyMC model and sends the resutls to server

In [48]:
import os

import arviz as az
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import pymc as pm
import pytensor.tensor as pyt
import seaborn as sns
import pdb
from matplotlib.gridspec import GridSpec
import xarray as xr
import xarray_einstats
import rdata as rd
import mcbackend
import clickhouse_driver
import networkx as nx
import scipy as sp


In [2]:
# Set figure style.
az.style.use("arviz-darkgrid")
# point to data and figure directories
bd = os.getcwd() + "/../Data/"
bf = os.getcwd() + "/../Figures/"

pd.set_option('display.max_rows', 500)

In [3]:
# Helper functions
def indexall(L):
    poo = []
    for p in L:
        if not p in poo:
            poo.append(p)
    Ix = np.array([poo.index(p) for p in L])
    return poo, Ix


# Helper functions
match = lambda a, b: np.array([b.index(x) if x in b else None for x in a])

def zscore(x):
    return (x-np.mean(x))/np.std(x)

def unique(series: pd.Series):
    "Helper function to sort and isolate unique values of a Pandas Series"
    return series.sort_values().unique()

# Simulate data

In [4]:
# True parameters
true_m = 2.5
true_b = 1.0
true_sigma = 0.5

# Generate x and y
x = np.linspace(0, 10, 50)
y = true_m * x + true_b + np.random.normal(0, true_sigma, size=len(x))

# Connect to server

In [5]:
# Initalize backend
#ch_client = clickhouse_driver.Client(host="129.173.118.118", password='buKcek-qetsyj-pynci7', database='gsmtdb', send_receive_timeout = 7200, settings={'max_execution_time': 7200})
ch_client = clickhouse_driver.Client(host="129.173.118.118", password='buKcek-qetsyj-pynci7', database='gsmtdb')
# Backend object
ch_backend = mcbackend.ClickHouseBackend(ch_client)

# Run initial model

In [6]:
# Define the PyMC model
with pm.Model() as linear_model:
    # Priors for the parameters
    m = pm.Normal("m", mu=0, sigma=10)
    b = pm.Normal("b", mu=0, sigma=10)
    sigma = pm.HalfNormal("sigma", sigma=1)

    # Likelihood (observed data)
    y_obs = pm.Normal("y_obs", mu=m*x+b, sigma=sigma, observed=y)

In [7]:
# Check initial log-probabilities
linear_model.point_logps()

{'m': -3.22, 'b': -3.22, 'sigma': -0.73, 'y_obs': -5934.48}

Sample to server

In [8]:
with linear_model:
    # Regular model
    pm.sample(trace=ch_backend)
    # Huge model
    #pm.sample(draws=500, tune=1000, trace=ch_backend, idata_kwargs=dict(log_likelihood=False))
print('Done sampling')

Auto-assigning NUTS sampler...
Initializing NUTS using jitter+adapt_diag...
Multiprocess sampling (4 chains in 4 jobs)
NUTS: [m, b, sigma]


KeyboardInterrupt: 

----> HARD BREAK <----

Next, we'll pull the posteriors from the server and use them as our new priors for a second set of simulated data. First let's simulate the data

In [9]:
# Generate x and y
x2 = np.linspace(0, 10, 50)
y2 = true_m * x + true_b + np.random.normal(0, true_sigma, size=len(x))

Now pull the traces for variables of interest

In [15]:
# List backend runs available
rxid = ch_backend.get_runs()
rxid

Unnamed: 0_level_0,created_at,proto
rid,Unnamed: 1_level_1,Unnamed: 2_level_1
C341T,2024-06-28 21:52:42.099999+00:00,"RunMeta(rid='C341T', variables=[Variable(name=..."
ZAH7N,2024-07-02 16:16:10.601239+00:00,"RunMeta(rid='ZAH7N', variables=[Variable(name=..."
FC3L8,2024-11-28 13:52:01.623504+00:00,"RunMeta(rid='FC3L8', variables=[Variable(name=..."
173RB,2024-12-09 14:09:39.906066+00:00,"RunMeta(rid='173RB', variables=[Variable(name=..."
R1L4W,2024-12-10 16:12:00.929991+00:00,"RunMeta(rid='R1L4W', variables=[Variable(name=..."
AAPFY,2024-12-10 16:16:01.167073+00:00,"RunMeta(rid='AAPFY', variables=[Variable(name=..."
PP33R,2024-12-10 16:20:55.592753+00:00,"RunMeta(rid='PP33R', variables=[Variable(name=..."


In [20]:
# Fetch the run from the database (downloads just metadata from most recent run)
model_run = ch_backend.get_run(rxid.index[-1])

In [31]:
# Import MultiTrace objects from server
idata_x = model_run.to_inferencedata(var_names=['m','b','sigma'])

In [34]:
# Script to generate interpolated PyMC distribution objects
def from_posterior(param, samples):
    smin, smax = samples.min().item(), samples.max().item()
    width = smax - smin
    x = np.linspace(smin, smax, 100)
    y = sp.stats.gaussian_kde(samples)(x)

    # what was never sampled should have a small probability but not 0,
    # so we'll extend the domain and use linear approximation of density on it
    x = np.concatenate([[x[0] - 3 * width], x, [x[-1] + 3 * width]])
    y = np.concatenate([[0], y, [0]])
    return pm.Interpolated(param, x, y)

In [37]:
# Define the PyMC model
with pm.Model() as linear_model2:
    # Add priors
    m = from_posterior('m', az.extract(idata_x, group="posterior", var_names=["m"]))
    b = from_posterior('b', az.extract(idata_x, group="posterior", var_names=["b"]))
    sigma = from_posterior('sigma', az.extract(idata_x, group="posterior", var_names=["sigma"]))
    
    # Likelihood (observed data)
    y_obs = pm.Normal("y_obs", mu=m*x2+b, sigma=sigma, observed=y2)

In [39]:
with linear_model2:
    # Regular model
    idata_y = pm.sample()
    # Regular model
    #pm.sample(trace=ch_backend)
    # Huge model
    #pm.sample(draws=500, tune=1000, trace=ch_backend, idata_kwargs=dict(log_likelihood=False))
print('Done sampling')

Auto-assigning NUTS sampler...
Initializing NUTS using jitter+adapt_diag...
Multiprocess sampling (4 chains in 4 jobs)
NUTS: [m, b, sigma]


Sampling 4 chains for 1_000 tune and 1_000 draw iterations (4_000 + 4_000 draws total) took 1 seconds.


Done sampling


In [41]:
pm.summary(idata_x)

Unnamed: 0,mean,sd,hdi_3%,hdi_97%,mcse_mean,mcse_sd,ess_bulk,ess_tail,r_hat
m,2.44,0.025,2.396,2.49,0.001,0.0,2001.0,1899.0,1.0
b,1.152,0.146,0.879,1.422,0.003,0.002,1980.0,1916.0,1.0
sigma,0.531,0.056,0.426,0.633,0.001,0.001,2109.0,2110.0,1.0


In [42]:
pm.summary(idata_y)

Unnamed: 0,mean,sd,hdi_3%,hdi_97%,mcse_mean,mcse_sd,ess_bulk,ess_tail,r_hat
m,2.476,0.016,2.448,2.507,0.0,0.0,2157.0,2330.0,1.0
b,1.102,0.09,0.932,1.271,0.002,0.001,2116.0,2465.0,1.0
sigma,0.493,0.037,0.42,0.56,0.001,0.001,2695.0,2668.0,1.0
