In [None]:
library(here)
library(xlsx)
library(tmap)
library(sf)
library(rgdal)
library(raster)
library(tidyr)
library(stringr)

In [2]:
### Load utility functions and themes
source(here("R","getNicheOutline.R"))
source(here("R","theme_tmaps.R"))

In [3]:
# Define imputs paths:

# Admnistrative map of China
path2china<-here('raw_data','CHN_adm')

# Archaeological records location data
path2buckwheat<-here("data","ESM_2.csv") # Buckwheat's occurrence records
path2locations<-here("data","guedesbocinsky2018_crops_across_eurasia_translated.xlsx")

# Production and predictions data
path2predictions <- here("outputs","predictions")

In [4]:
# Define output paths:

path2outputs<-here("manuscript","Chapter6","Figs","Raster")
path2sites<-here("data","site_locations.csv")
path2limits<-here("outputs","chronology_limits.csv")

In [5]:
# Import data:
# Import the outer border of China
outer_border<-st_read(dsn=path2china,layer='CHN_adm0')
# Import the spatial polygons data frame of counties:
china<-readOGR(dsn=path2china,layer='CHN_adm3')
# Import production data

Reading layer `CHN_adm0' from data source `G:\My Drive\my_repositories\PhD\raw_data\CHN_adm' using driver `ESRI Shapefile'
Simple feature collection with 1 feature and 70 fields
geometry type:  MULTIPOLYGON
dimension:      XY
bbox:           xmin: 73.5577 ymin: 18.15931 xmax: 134.7739 ymax: 53.56086
geographic CRS: WGS 84
OGR data source with driver: ESRI Shapefile 
Source: "G:\My Drive\my_repositories\PhD\raw_data\CHN_adm", layer: "CHN_adm3"
with 2409 features
It has 13 fields
Integer64 fields read as strings:  ID_0 ID_1 ID_2 ID_3 


### Load crop locations

In [6]:
locations<-read.xlsx(path2locations,1)

In [7]:
# Inculde only rows with data, that were not explicitly excluded
locations<-locations[!is.na(locations$Site)&is.na(locations$Exclude.),]

In [8]:
locations[,c("Longitude")]<-as.numeric(locations$Longitude)
locations[,c("Latitude")]<-as.numeric(locations$Latitude)

In [9]:
locations$lower_range<-locations$Age.range.lower..BP
locations$upper_range<-locations$Age.range.upper..BP

In [47]:
locations[is.na(locations$Age.range.lower..BP),"lower_range"] <- locations[is.na(locations$Age.range.lower..BP),]$X14C.age.BP + (2*locations[is.na(locations$Age.range.lower..BP),]$X1.sigma.uncertainty)
locations[is.na(locations$Age.range.lower..BP),"upper_range"] <- locations[is.na(locations$Age.range.upper..BP),]$X14C.age.BP - (2*locations[is.na(locations$Age.range.lower..BP),]$X1.sigma.uncertainty)

In [48]:
locations_df<-locations[,c("Site","Longitude","Latitude","X14C.date.on.cereal.","lower_range","upper_range","Wheat","Barley","Foxtail.millet","Broomcorn.millet","Millet..unidentified.","Rice","Rice..wild.")] # Get only required columns

In [49]:
locations_df<-locations_df[!(is.na(locations_df$lower_range))&!(is.na(locations_df$upper_range)),]

In [50]:
buckwheat<-read.csv(path2buckwheat)

In [51]:
buckwheat$Buckwheat<-ifelse(buckwheat$Taxonomic_identification=="Fagopyrum tataricum",NA,TRUE)
buckwheat$TartaryBuckwheat<-ifelse(buckwheat$Taxonomic_identification=="Fagopyrum tataricum",TRUE,NA)
buckwheat$X14C.date.on.cereal.<-ifelse(buckwheat$Sample_type=="Charred seeds"&buckwheat$Dating_method=="AMS 14C",TRUE,FALSE)

In [52]:
sample_type<-buckwheat[,c("Sample_type","Site_short","Buckwheat")]
colnames(sample_type)[2]<-"Site"

In [53]:
buckwheat<-buckwheat[,c("Site_short","longitude","latitude","X14C.date.on.cereal.","start_date","end_date","Buckwheat","TartaryBuckwheat")]
colnames(buckwheat)<-c("Site","Longitude","Latitude","X14C.date.on.cereal.","lower_range","upper_range","Buckwheat","TartaryBuckwheat")

In [54]:
common_sites<- buckwheat[buckwheat$Site %in% locations_df$Site,]$Site
cs_df<-unique(locations_df[locations_df$Site %in% common_sites,c("Site","Longitude","Latitude")])

In [55]:
buckwheat<-merge(buckwheat,cs_df, by=c("Site"),all=TRUE)

In [56]:
buckwheat$Longitude.x<- ifelse(is.na(buckwheat$Longitude.y),buckwheat$Longitude.x,buckwheat$Longitude.y)
buckwheat$Latitude.x<- ifelse(is.na(buckwheat$Latitude.y),buckwheat$Latitude.x,buckwheat$Latitude.y)

In [57]:
buckwheat<-buckwheat[,!colnames(buckwheat)%in%c("Longitude.y","Latitude.y")]

In [58]:
colnames(buckwheat)<-c("Site","Longitude","Latitude","X14C.date.on.cereal.","lower_range","upper_range","Buckwheat","TartaryBuckwheat")

In [59]:
locations_df<-merge(locations_df,buckwheat, by=c("Site","Longitude","Latitude","X14C.date.on.cereal.","lower_range","upper_range"),all=TRUE)

In [60]:
# Transform location data data into a spatial points data.frame
coordinates(locations_df)<-~Longitude+Latitude
crs(locations_df) <-crs(china)

In [61]:
locations_china<-locations_df[china,]

In [88]:
uq<-locations_china[,"Site"]
uq<-uq[which(!duplicated(uq$Site)),]

In [89]:
zd <- zerodist(uq)

In [122]:
locations<-as.data.frame(locations_china)

In [125]:
# Assign occurrence records to time slices (up till 8000 BP)

start=1500
loc<-subset(locations,upper_range<=1500)
loc$period <- "1000 BP"
for (i in c(1:17)){
    sub<-subset(locations,(upper_range<=start+1000 & upper_range>start)| (lower_range>start & lower_range<=start+1000 ))
    start=start+1000
    if(nrow(sub)>0){
       sub$period <- paste(start-500,"BP",sep=" ")
       loc<-rbind(loc,sub)}
}
#older_records<-subset(locations,upper_range>10500 | lower_range>10500)
#older_records$period <- "> 10 000 BP"
#loc<-rbind(loc,older_records)

In [126]:
### Aggregate by period

loc<-loc[,!names(loc)%in%c("lower_range","upper_range","X14C.date.on.cereal.")]

In [127]:
loc<-unique(loc)

In [129]:
# Aggregate sites assigned to the same period

In [130]:
loc<-aggregate(loc[,!(names(loc)%in%c("Site","Longitude","Latitude","period"))], 
             by = list(Site=loc$Site,Longitude=loc$Longitude,Latitude=loc$Latitude,period=loc$period),
             FUN=function(x) {if ((TRUE %in% x==TRUE)) return(TRUE) else return(NA)})

In [131]:
loc$period<-factor(loc$period,levels=paste(c(seq(1000,14000,1000),17000),"BP"),ordered=TRUE)

In [132]:
loc<-loc[!(loc$period>"10000 BP"&!is.na(loc$Buckwheat)),]

In [133]:
loc$Millet<-ifelse((!is.na(loc$Foxtail.millet)|!is.na(loc$Broomcorn.millet)|!is.na(loc$Millet..unidentified.)),TRUE,NA)

In [152]:
 loc<-merge(loc,sample_type,by=c("Site","Buckwheat"),all.x=TRUE)

In [156]:
loc[loc$Site=="Kyung-lung Mesa",]$Buckwheat<-NA

In [157]:
write.csv(loc,path2sites)

In [88]:
#crops<-c("rice","millet","buckwheat","wheat","barley")
#locByCrop<-gather(loc, crop,present,str_to_title(crops),na.rm=TRUE)
#d<-aggregate(period~crop,data=locByCrop,FUN="max")