In [None]:
### Load libraries
library(here) # for paths
library(rstan) # to read the model and calculate the predictions

### Source the utility functions:

source(here("R","square.r"))
source(here("R","calculatePredictions.r"))

# Inputs
path2models<-here("outputs","models")
path2env_sd<-here("data","Sd_Env_by_county.csv")
path2past_env<-here("data","env_past_std.csv")
path2stats<- here("data","Mean_and_SD.csv")


# Outputs
path2outputs<-here("outputs")
path2predictions<-here("outputs","predictions")
path2timeslices<-here("outputs","chronology_limits.csv")

In [3]:
stats<-read.csv(path2stats,row.names=1)
env_pres<-read.csv(path2env_sd,row.names=1) # Change name for present
env<-read.csv(path2past_env)

In [4]:
### Format new data to extract from the posterior

# Define the size of the sample from the posterior:
n=40000
n_cnt=1000

# Define the order of the variables as in original stan model:
var_ordered<-c('BIO10','BIO17','BIO4','BIO9','npp')



In [5]:
crops<-c("wheat","barley")

In [6]:
# Get relevant columns from the data frame:
d<-env_pres[ ,paste(var_ordered,"sd",sep="_")]
colnames(d)<-var_ordered

# Present distribution

In [None]:
for (crop in crops){
    print(crop)
    path2model<-paste(path2models,"/",crop,".rds",sep="")
    path2pred<-paste(path2predictions, "/",crop, "_posterior_predictions",".csv",sep="")
    path2pred_s<-paste(path2predictions, "/",crop, "_posterior_predictions_summary",".csv",sep="")
    #Read data
    fit<-readRDS(path2model)
    posterior <- rstan::extract(fit) 
    
    ### Calculate predictions
    print("Calculating posterior predictions...")
    predictions<-calculatePredictions(fit, d, posterior,n,counterfactual=FALSE)
    print("Saving posterior predictions...")
    
    ### Save predictions:
    write.csv(predictions, path2pred,col.names=FALSE, row.names=FALSE)
    print("Calculating summary...")
    
    ### Also calculate the summary
    means_pred<-apply(predictions,2, mean) # Calculate mean of each column (column==county)
    quantiles_pred<-apply(predictions,2, quantile,probs = c(0.05, 0.95),na.rm=TRUE) # Calculate quantiles of each column (column==county)
    fpred<-t(rbind(quantiles_pred, means_pred)) # Bind mean and quantiles, and transpose them, so that each county is a row now
    exp_pred<-exp(fpred) # Exponentiate the predictions for all counties
    colnames(fpred)<-paste(colnames(fpred),"log",sep="_")
    fpred<-cbind(fpred,exp_pred) # Bind logarithmic and exponentiate predictions
    row.names(fpred)<-c(1:nrow(fpred))
    print("Saving summary...")
   
    ### Save predictions:
    write.csv(fpred, path2pred_s,col.names=FALSE, row.names=FALSE)
}

In [25]:
slices<-read.csv(path2timeslices,row.names=1)

In [26]:
row.names(slices)<-tolower(row.names(slices))

In [27]:
slices<-t(slices)

In [28]:
timeslices<-gsub("X","",names(slices[,crop][slices[,crop]>0]))
timeslices<-gsub("\\."," ",timeslices)
timeslices<-factor(timeslices, ordered=TRUE)
timeslices<-factor(timeslices,levels=timeslices,ordered=TRUE)

In [75]:
timeslices <-c(seq(1000,max,by=1000))


In [31]:
timeslices<-timeslices[timeslices>"10000 BP"]

In [33]:
#Read data
    path2model<-paste(path2models,"/",crop,".rds",sep="")
    fit<-readRDS(path2model)

In [None]:
predictions<-calculatePredictions(fit, data, posterior,n,counterfactual=FALSE)

In [None]:
f<-read.csv(here("outputs","predictions","barley_predictions.csv"),row.names=1)

In [None]:
for (i in timeslices){
        print(i)
        data<-env[,grep(paste("_",i,"_",sep=""),colnames(env))] # Get only the data for relevent time slice
        predictions<-calculatePredictions(fit, data, posterior,n,counterfactual=FALSE) # Calculate predictions for the time slice
        write.csv(predictions, paste(path2predictions,"/",crop,"_predictions_",i,".csv",sep=""),col.names=FALSE, row.names=FALSE) # Save past predictions
       
        ### Also calculate the summary
        means_pred<-apply(predictions,2, mean) # Calculate mean of each column (column==county)
        quantiles_pred<-apply(predictions,2, quantile,probs = c(0.05, 0.95),na.rm=TRUE) # Calculate quantiles of each column (column==county)
        fpred<-t(rbind(quantiles_pred, means_pred)) # Bind mean and quantiles, and transpose them, so that each county is a row now
        exp_pred<-exp(fpred) # Exponentiate the predictions for all counties
        colnames(exp_pred)<-paste(colnames(fpred),i,sep="_") # Change original names of the columns with predictions, to indicate that they are in lograrithmic form
        colnames(fpred)<-paste(colnames(fpred),i,"log",sep="_")
        fpred<-cbind(fpred,exp_pred) # Bind logarithmic and exponentiate predictions
        row.names(fpred)<-c(1:nrow(fpred))
        pred<-cbind(pred,fpred)
}

# Past distributions

In [None]:
for (crop in crops){
    ### Calculate the predictions for the past:   
    timeslices<-gsub("X","",names(slices[,crop][slices[,crop]>0]))
    timeslices<-gsub("\\."," ",timeslices)
    timeslices<-factor(timeslices, ordered=TRUE)
    timeslices<-factor(timeslices,levels=timeslices,ordered=TRUE)
    max<-as.numeric(gsub(" BP","",as.character(max(unique(timeslices)))))
    timeslices <-c(seq(1000,max,by=1000))
    if(crop=="buckwheat"){timeslices=c(timeslices,15000)}
    if(crop=="rice"){timeslices<-as.numeric(gsub(" BP","",as.character(unique(timeslices))))}
    pred<-c()
    #Read data
    path2model<-paste(path2models,"/",crop,".rds",sep="")
    fit<-readRDS(path2model)
    posterior <- rstan::extract(fit) 
    # Loop over all time slices
    for (i in timeslices){
        print(i)
        data<-env[,grep(paste("_",i,sep=""),colnames(env))] # Get only the data for relevent time slice
        predictions<-calculatePredictions(fit, data, posterior,n,counterfactual=FALSE) # Calculate predictions for the time slice
        write.csv(predictions, paste(path2predictions,"/",crop,"_predictions_",i,".csv",sep=""),col.names=FALSE, row.names=FALSE) # Save past predictions
       
        ### Also calculate the summary
        means_pred<-apply(predictions,2, mean) # Calculate mean of each column (column==county)
        quantiles_pred<-apply(predictions,2, quantile,probs = c(0.05, 0.95),na.rm=TRUE) # Calculate quantiles of each column (column==county)
        fpred<-t(rbind(quantiles_pred, means_pred)) # Bind mean and quantiles, and transpose them, so that each county is a row now
        exp_pred<-exp(fpred) # Exponentiate the predictions for all counties
        colnames(exp_pred)<-paste(colnames(fpred),i,sep="_") # Change original names of the columns with predictions, to indicate that they are in lograrithmic form
        colnames(fpred)<-paste(colnames(fpred),i,"log",sep="_")
        fpred<-cbind(fpred,exp_pred) # Bind logarithmic and exponentiate predictions
        row.names(fpred)<-c(1:nrow(fpred))
        pred<-cbind(pred,fpred)
}
    # Get summary predictions
     write.csv(pred, paste(path2predictions,"/",crop,"_predictions",".csv",sep=""))
    }

In [1]:
 write.csv(pred, paste(path2predictions,"/",crop,"_predictions",".csv",sep=""))

# Counterfactual predictions

In [10]:
### Calculate counterfactual predictions
# Get new data
means<-colMeans(d) # Get mean valuse of all environmental variables
means<-as.data.frame(rbind(means)) # Bind the mean values
means<-means[rep(seq_len(nrow(means)), each = n_cnt), ] # Repeat each mean value n_cnt time
range<-apply(d,2,range) # Get the range of all the data

In [29]:
# Add information about the range to the existing stats summary
stats<-cbind(stats,t(range))
colnames(stats)[3:4]<-c("low_range_sd","high_range_sd")
stats<-as.data.frame(stats)
stats$low_range<-stats$low_range_sd*stats$sd+stats$mean
stats$high_range<-stats$high_range_sd*stats$sd+stats$mean

In [34]:
write.csv(stats,path2stats)

In [None]:
# Loop over all environmental variables
for (crop in crops){
    print(crop)
    #Read data
    path2model<-paste(path2models,"/",crop,".rds",sep="")
    fit<-readRDS(path2model)
    posterior <- rstan::extract(fit) 
    for (i in 1:ncol(d)){
        print(i)
        new_data<-means # use means as new data
        new_data[,i]<-seq(from = range[1,i], to = range[2,i],length.out=n_cnt) # substitute new values for one of the environmental variables
        values<-new_data[,i] # Get the values for which predictions ar made
        predictions<-calculatePredictions(fit, new_data, posterior,n,counterfactual=TRUE) # calculate predictions for new data
        # Save counterfactual predictions
        write.csv(predictions, paste(path2outputs,"/",crop,"_counterfactual_predictions_",var_ordered[i],".csv",sep=""),col.names=FALSE, row.names=FALSE)
    }
}