In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import random
import pymc3 as pm
import arviz as az

from math import log10, floor

useratiodata=1 #1 for Scenario B to incorporate ratio data, 0 otherwise
sigmadeterministic=1 #0 to assign Inverse Gamma prior on variance of noise variables, otherwise plug in estimates will be used. 

In [None]:
dataflows = pd.read_csv("data/aluminiumflow.txt",comment='#')
datastocks = pd.read_csv("data/aluminiumstock.txt",comment='#')

from preprocessingagg import preprocessing, createdesignmatrix, createratiomatrix, createcompactmatrix, createcompactratiomatrix
from prior import round_to_poweroften, definepriors
from model import mfamodel
from posteriorpredictive import filelabeler, ppplots, ppplotsratiodata, top10hdi

availabledatafull, dataparentstockneededcols, dataparentflowsneededcols, processnamesdict, allflownumbersmatrix, m, N=preprocessing(datastocks,dataflows)

#construct the prior

dataflowspriors = pd.read_csv("data/aluminiumflowsprior.txt",comment='#')
datastockspriors = pd.read_csv("data/aluminiumstocksprior.txt",comment='#')


priormean,covariancevec,truevalues=definepriors(datastockspriors, dataflowspriors, availabledatafull, m, N)

#construct design matrix

designmatrix,datavector,availablechildstocksandflows,zerostocksandflows,stockindex,flowindex,CoMindex=createdesignmatrix(availabledatafull, dataparentstockneededcols, dataparentflowsneededcols, m, N)

#construct matrices for flow ratio data

dataratios = pd.read_csv("data/aluminiumratio.txt",comment='#')
ratiovector,ratiomatrixtop,ratiomatrixbottom,availablechildstocksandflows=createratiomatrix(dataratios, m, N, availablechildstocksandflows)


#set function assures uniqueness of the variables selected
availablechildstocksandflows=sorted(list(set(availablechildstocksandflows)))

zerostocksandflows=sorted(list(set(zerostocksandflows)))

availablechildstocksandflows = [x for x in availablechildstocksandflows if x not in zerostocksandflows]


availablechildstocks = [i for i in availablechildstocksandflows if i < m]
availablechildflows = [i for i in availablechildstocksandflows if i >= m]

availablechildstocksnames=[processnamesdict[str(x)] for x in availablechildstocks]

#subset of designmatrix for stocks and flows which actually exist in the system/are non zero.
designmatrixcompact,designmatrixstockscompact,designmatrixflowscompact=createcompactmatrix(designmatrix,availablechildstocksandflows,m)

ratiomatrixtopstockscompact,ratiomatrixtopflowscompact,ratiomatrixbottomstockscompact,ratiomatrixbottomflowscompact=createcompactratiomatrix(ratiomatrixtop,ratiomatrixbottom,availablechildstocksandflows,m)

priormeancompact = priormean[availablechildstocksandflows]
covarianceveccompact = covariancevec[availablechildstocksandflows]
priorcovariancecompact = np.diag(covarianceveccompact)

truevaluescompact=truevalues[availablechildstocksandflows]   

In [None]:
#run the Bayesian model

trace, model=mfamodel(priormean, covariancevec, designmatrix,ratiomatrixtop, \
                   ratiomatrixbottom, datavector, ratiovector, availablechildstocksandflows, m, \
                   stockindex, flowindex, CoMindex,useratiodata,sigmadeterministic)

In [None]:
#summary of samples including rhat values for each posterior variable

az.summary(trace).round(2)

In [None]:
#generate traceplots for the change in stock variables

labels=[processnamesdict[str(x)] for x in availablechildstocks]

import arviz.labels as azl

plt.tight_layout()
for i in range(0,len(labels)):
    
    labeller = azl.MapLabeller(var_name_map={"stocks": r"Stock"+":"+labels[i]})
    
    posteriorstocktraceplots=az.plot_trace(trace, var_names="stocks",compact=True,show=True,backend="matplotlib", coords={'stocks_dim_0': [i]},labeller=labeller,legend=True,chain_prop={"color": ['r', 'b']})

    posteriorstocktraceplots[0,0].get_figure().savefig("outputgraphstrace"+filelabeler(useratiodata)+"/"+"Stocktrace"+"_"+labels[i]+filelabeler(useratiodata)+".pdf")

In [None]:
#generate traceplots for the flow variables

az.rcParams["plot.max_subplots"] = 400 #this increases the maximum plots displayed, default is 40.
#There are around 200 posterior flow plots so without this you only plot the first 40. 

plt.tight_layout()


for i in range(0, len(availablechildflows)):
    relevantrow=np.where(allflownumbersmatrix[:, 0] == str(availablechildflows[i]))
    relevantrow=relevantrow[0][0]
    flownumberfrom=allflownumbersmatrix[relevantrow, 1]
    flownumberto=allflownumbersmatrix[relevantrow, 2]
    
    labeller = azl.MapLabeller(var_name_map={"flows": processnamesdict[str(flownumberfrom)]+"_to_"+processnamesdict[str(flownumberto)]})
    posteriorflowtraceplots=az.plot_trace(trace, var_names="flows",compact=True,show=True,backend="matplotlib", coords={'flows_dim_0': [i]},labeller=labeller,legend=True,chain_prop={"color": ['r', 'b']})

    posteriorflowtraceplots[0,0].get_figure().savefig("outputgraphstrace"+filelabeler(useratiodata)+"/"+"Flowtrace"+processnamesdict[str(flownumberfrom)]+"_to_"+processnamesdict[str(flownumberto)]+filelabeler(useratiodata)+".pdf")

In [None]:
#plot marginal posterior distribution for change in stock variables

labels=[processnamesdict[str(x)] for x in availablechildstocks]

    
for i in range(0,len(labels)):
    posteriorstockplots=az.plot_posterior(trace, var_names="stocks",show=False,backend="matplotlib",round_to=3, hdi_prob=0.95,  coords={'stocks_dim_0': [i]}, textsize=16,figsize=[9.6, 2.4])
    posteriorstockplots.title.set_text("Stock"+":"+labels[i]) 
    plt.axvline(x=truevalues[availablechildstocks][i], color="red")
    posteriorstockplots.get_figure().savefig("outputgraphs"+filelabeler(useratiodata)+"/"+"Stock"+"_"+labels[i]+filelabeler(useratiodata)+".pdf")

In [None]:
#plot marginal posterior distribution for flow variables

az.rcParams["plot.max_subplots"] = 400 #this increases the maximum plots displayed, default is 40.
#There are around 200 posterior flow plots so without this you only plot the first 40. 

for i in range(0, len(availablechildflows)):
    relevantrow=np.where(allflownumbersmatrix[:, 0] == str(availablechildflows[i]))
    relevantrow=relevantrow[0][0]
    flownumberfrom=allflownumbersmatrix[relevantrow, 1]
    flownumberto=allflownumbersmatrix[relevantrow, 2]
    posteriorflowplots=az.plot_posterior(trace, var_names="flows",show=False,backend="matplotlib", round_to=3, hdi_prob=0.95,coords={'flows_dim_0': [i]}, textsize=16, figsize=[9.6, 2.4])
    
    plt.axvline(x=truevalues[availablechildflows][i], color="red")
    posteriorflowplots.title.set_text(processnamesdict[str(flownumberfrom)]+" to "+processnamesdict[str(flownumberto)])
    posteriorflowplots.get_figure().savefig("outputgraphs"+filelabeler(useratiodata)+"/"+processnamesdict[str(flownumberfrom)]+"_to_"+processnamesdict[str(flownumberto)]+filelabeler(useratiodata)+".pdf")


In [None]:
#posterior predictive samples

posterior_pred=pm.sample_posterior_predictive(trace=trace, model=model, random_seed=123456)

In [None]:
#plot posterior predictive 95% HDI and p values, for flow and change in stock data, and CoM conditions.

ppplots(posterior_pred,datavector,stockindex,flowindex,CoMindex,useratiodata)

In [None]:
#plot posterior predictive 95% HDI and p values, for ratio data, scenario B only.

ppplotsratiodata(posterior_pred,ratiovector,useratiodata)

In [None]:
#plot top 10 widths of marginal posterior 95% HDI

ci_95_length=top10hdi(trace,processnamesdict,availablechildstocksandflows,useratiodata,m)

In [None]:
print(np.mean(ci_95_length))
print(np.max(ci_95_length))
print(np.min(ci_95_length))
print(np.shape(ci_95_length))

In [None]:
trace.to_netcdf("model"+filelabeler(useratiodata)+".nc")

In [None]:
print("model"+filelabeler(useratiodata)+".nc")
trace = az.from_netcdf("model"+filelabeler(useratiodata)+".nc")