library(Hmisc);library(mice);library(miceadds);library(foreign);library(Zelig);library(Amelia);library(mitools) library(psych);library(miceadds);library(readr) #Built-in package used is epi, from the psych package HSIY2014 <- epi options(max.print=2000000000) colnames(HSIY2014) #Use this same seed number every time to have the same set of imputed data set.seed(20170327) predictormatrix <- quickpred(HSIY2014,exclude="id",method="spearman",mincor=0.1,minpuc=0.5) HSIY2014_imp <- mice(HSIY2014,m=100,predictorMatrix=predictormatrix) HSIY2014_ImputedData<-complete(HSIY2014_imp,action="long",include=TRUE) #save imputed R data as a .Rdata object and .csv files, respectively save(HSIY2014_ImputedData, file="HSIY2014_Imputed.RData") write.csv(HSIY2014_ImputedData, file ="HSIY2014_Imputed.csv", row.names = FALSE) #SPSS cannot read .imp and .id as variable names, renaming to export names(HSIY2014_ImputedData)[1:2]<-c("Imputation","ImpID") colnames(HSIY2014_ImputedData) foreign:::writeForeignSPSS(HSIY2014_ImputedData,"HSIY2014_imputed.txt", "HSIY2014_imputed.sps",varnames=names(HSIY2014_ImputedData)) # Note: In the .sps file, specify the full path to the .txt file: DATA LIST FILE= "C:\FILEPATH.txt" free (",") #SPSS Syntax used to create the .sav file. HSIY2014_imp_R<-spss.get("C:/FILEPATH.sav", use.value.labels=TRUE, to.data.frame=TRUE) colnames(HSIY2014_imp_R) ## Rename variables Imputation and ImpID to .imp and .id respectively for R to read names(HSIY2014_imp_R)[1:2]<-c(".imp",".id") # Check to see if it worked colnames(HSIY2014_imp_R) options(max.print=2000000000) # Run this function which updates the bug in mice::as.mids() # Source: https://stats.stackexchange.com/questions/73562/analyzing-multiply-imupted-data-from-amelia-in-r-why-do-results-from-zelig-and as.mids2 <- function(Data2, .imp=1, .id=2){ ini <- mice(Data2[Data2[, .imp] == 0, -c(.imp, .id)], m = max(as.numeric(Data2[, .imp])), maxit=0) names <- names(ini$imp) if (!is.null(.id)){ rownames(ini$data) <- Data2[Data2[, .imp] == 0, .id] } for (i in 1:length(names)){ for(m in 1:(max(as.numeric(Data2[, .imp])))){ if(!is.null(ini$imp[[i]])){ indic <- Data2[, .imp] == m & is.na(Data2[Data2[, .imp]==0, names[i]]) ini$imp[[names[i]]][m] <- Data2[indic, names[i]] } } } return(ini) } # convert to mids object using as.mids HSIY2014_mids<-as.mids2(HSIY2014_imp_R) # and back to a data frame, so we can compare the two datasets and make sure nothing went wrong HSIY2014_imp_R_2 = complete(HSIY2014_mids, action="long",include=TRUE) # Get a summary before mids table(HSIY2014_imp_R$V53) # Get a summary after mids table(HSIY2014_imp_R_2$V53) #Inconsistency found between these datasets