# Posterior predictions calucations
The following code calculates the posterior predictions for present buckwheat distribution, past buckwheat distribution and counterfactual plots.

In [None]:
### Load libraries
library(here) # for paths
library(rstan) # to read the model and calculate the predictions

In [None]:
### Source the utility functions:

source("R/square.r")
source("R/calculatePredictions.r")

In [None]:
### Define the paths

#Inputs
path2model<-here("outputs","models","parabolic_iCAR.rds")
path2env_sd<-here("data","Sd_Env_by_county.csv")
path2stats<- here("outputs","03_01_Env_Mean_and_SD.csv")

# Outputs
path2predictions <- here("outputs","05_01_Posterior_Predictions.csv")
path2past <- here("outputs","05_02_Past_predictions_")
path2pred_summary <- here("outputs","05_03_Predictions_summary.csv")
path2counterfactual <-here("outputs","05_04_Counterfactual_predictions.csv")

In [None]:
# Read data

env<-read.csv(path2env_sd)
fit<-readRDS(path2model)
stats<-read.csv(path2stats,row.names=1) # To add information about the range

In [None]:
### Extract posterior distribution for all parameters:
posterior <- rstan::extract(fit) 

## Present distribution

In [None]:
### Format new data to extract from the posterior

# Define the size of the sample from the posterior:
n=40000
n_cnt=1000

# Define the order of the variables as in original stan model:
var_ordered<-c('BIO10','BIO17','BIO4','BIO9','npp')

# Get relevant columns from the data frame:
d<-env[ ,paste(var_ordered,"sd",sep="_")]
colnames(d)<-var_ordered

In [None]:
### Calculate predictions
predictions<-calculatePredictions(fit, d, posterior,n,counterfactual=FALSE)

In [None]:
### Save predictions:
write.csv(predictions, path2predictions,col.names=FALSE, row.names=FALSE)

In [None]:
#### Calculate predictions summary:
means_pred<-apply(predictions,2, mean) # Calculate mean of each column (column==county)
quantiles_pred<-apply(predictions,2, quantile,probs = c(0.05, 0.95)) # Calculate quantiles of each column (column==county)
pred<-t(rbind(quantiles_pred, means_pred)) # Bind mean and quantiles, and transpose them, so that each county is a row now
exp_pred<-exp(pred) # Exponentiate the predictions for all counties
colnames(pred)<-paste(colnames(pred),"_log",sep="") # Change original names of the columns with predictions, to indicate that they are in lograrithmic form
pred<-cbind(pred,exp_pred) # Bind logarithmic and exponentiate predictions
row.names(pred)<-c(1:nrow(pred))

## Past predictions

In [None]:
### Calculate the predictions for the past:
timeslices <-c(seq(1000,8000,by=1000),15000)
# Loop over all time slices
for (i in timeslices){
    print(i)
    data<-env[,grep(paste("_",i,sep=""),colnames(env))] # Get only the data for relevent time slice
    predictions<-calculatePredictions(fit, data, posterior,n,counterfactual=FALSE) # Calculate predictions for the time slice
    write.csv(predictions, paste(path2past,i,".csv",sep=""),col.names=FALSE, row.names=FALSE) # Save past predictions
    ### Also calculate the summary
    means_pred<-apply(predictions,2, mean) # Calculate mean of each column (column==county)
    quantiles_pred<-apply(predictions,2, quantile,probs = c(0.05, 0.95),na.rm=TRUE) # Calculate quantiles of each column (column==county)
    fpred<-t(rbind(quantiles_pred, means_pred)) # Bind mean and quantiles, and transpose them, so that each county is a row now
    exp_pred<-exp(fpred) # Exponentiate the predictions for all counties
    colnames(exp_pred)<-paste(colnames(fpred),i,sep="_") # Change original names of the columns with predictions, to indicate that they are in lograrithmic form
    colnames(fpred)<-paste(colnames(fpred),i,"log",sep="_")
    fpred<-cbind(fpred,exp_pred) # Bind logarithmic and exponentiate predictions
    row.names(fpred)<-c(1:nwor(fpred))
    pred<-cbind(pred,fpred)
}

### Write predictions to the file:
write.csv(pred,path2pred_summary)

## Counterfactual predictions

In [None]:
### Calculate counterfactual predictions

means<-colMeans(d) # Get mean valuse of all environmental variables
means<-as.data.frame(rbind(means)) # Bind the mean values
means<-means[rep(seq_len(nrow(means)), each = n_cnt), ] # Repeat each mean value n_cnt time
range<-apply(d,2,range) # Get the range of all the data

In [None]:
# Add information about the range to the existing stats summary
stats<-cbind(stats,t(range))
colnames(stats)[3:4]<-c("low_range_sd","high_range_sd")
stats$low_range<-stats$low_range_sd*stats$sd+stats$mean
stats$high_range<-stats$high_range_sd*stats$sd+stats$mean

In [None]:
# Overwrite the existing stats summary
write.csv(stats,path2stats, row.names=TRUE)

In [None]:
# Loop over all environmental variables
for (i in 1:ncol(data)){
    print(i)
    new_data<-means # use means as new data
    new_data[,i]<-seq(from = range[1,i], to = range[2,i],length.out=n_cnt) # substitute new values for one of the environmental variables
    values<-new_data[,i] # Get the values for which predictions ar made
    predictions<-calculatePredictions(fit, new_data, posterior,n,counterfactual=TRUE) # calculate predictions for new data
    # Save counterfactual predictions
    write.csv(predictions, gsub("predictions", paste("predictions",var_ordered)[i],sep="_"),path2counterfactual),col.names=FALSE, row.names=FALSE)
}