library(Hmisc);library(mice);library(miceadds);library(foreign);library(Zelig);library(Amelia);library(mitools)
library(psych);library(miceadds);library(readr)
#Built-in package used is epi, from the psych package
HSIY2014 <- epi
options(max.print=2000000000)
colnames(HSIY2014)
#Use this same seed number every time to have the same set of imputed data
set.seed(20170327) 

predictormatrix <- quickpred(HSIY2014,exclude="id",method="spearman",mincor=0.1,minpuc=0.5)
HSIY2014_imp <- mice(HSIY2014,m=100,predictorMatrix=predictormatrix) 
HSIY2014_ImputedData<-complete(HSIY2014_imp,action="long",include=TRUE)
#save imputed R data as a .Rdata object and .csv files, respectively
save(HSIY2014_ImputedData, file="HSIY2014_Imputed.RData")
write.csv(HSIY2014_ImputedData, file ="HSIY2014_Imputed.csv", row.names = FALSE)
#SPSS cannot read .imp and .id as variable names, renaming to export
names(HSIY2014_ImputedData)[1:2]<-c("Imputation","ImpID")
colnames(HSIY2014_ImputedData)
foreign:::writeForeignSPSS(HSIY2014_ImputedData,"HSIY2014_imputed.txt", "HSIY2014_imputed.sps",varnames=names(HSIY2014_ImputedData))
# Note: In the .sps file, specify the full path to the .txt file: DATA LIST FILE= "C:\FILEPATH.txt"  free (",")
#SPSS Syntax used to create the .sav file. 


HSIY2014_imp_R<-spss.get("C:/FILEPATH.sav", use.value.labels=TRUE, to.data.frame=TRUE)
colnames(HSIY2014_imp_R)


## Rename variables Imputation and ImpID to .imp and .id respectively for R to read
names(HSIY2014_imp_R)[1:2]<-c(".imp",".id")
# Check to see if it worked
colnames(HSIY2014_imp_R)

options(max.print=2000000000)

# Run this function which updates the bug in mice::as.mids()
# Source: https://stats.stackexchange.com/questions/73562/analyzing-multiply-imupted-data-from-amelia-in-r-why-do-results-from-zelig-and
as.mids2 <- function(Data2, .imp=1, .id=2){
  ini <- mice(Data2[Data2[, .imp] == 0, -c(.imp, .id)], m = max(as.numeric(Data2[, .imp])), maxit=0)
  names  <- names(ini$imp)
  if (!is.null(.id)){
    rownames(ini$data) <- Data2[Data2[, .imp] == 0, .id]
  }
  for (i in 1:length(names)){
    for(m in 1:(max(as.numeric(Data2[, .imp])))){
      if(!is.null(ini$imp[[i]])){
        indic <- Data2[, .imp] == m & is.na(Data2[Data2[, .imp]==0, names[i]])
        ini$imp[[names[i]]][m] <- Data2[indic, names[i]]
      }
    } 
  }
  return(ini)
}

# convert to mids object using as.mids
HSIY2014_mids<-as.mids2(HSIY2014_imp_R)
# and back to a data frame, so we can compare the two datasets and make sure nothing went wrong 
HSIY2014_imp_R_2 = complete(HSIY2014_mids, action="long",include=TRUE)
# Get a summary before mids
table(HSIY2014_imp_R$V53)
# Get a summary after mids
table(HSIY2014_imp_R_2$V53)
#Inconsistency found between these datasets