In [14]:
library(poLCA)

In [None]:
df <- read.csv("data/LCA_prep_data.csv")
df$admission_type <- as.factor(df$admission_type)
df$gender <- as.factor(df$gender)
df$age_bucket <- as.factor(df$age_bucket)
df


In [None]:
set.seed(1)
min_aic <- 1000000
min_bic <- 1000000
min_aic_bic_combined <- min_aic + min_bic 
f <- as.formula(cbind(admission_type,gender,age_bucket,congestive_heart_failure,cardiac_arrhythmias,valvular_disease,pulmonary_circulation,peripheral_vascular,hypertension,paralysis,other_neurological,chronic_pulmonary,diabetes_uncomplicated,diabetes_complicated,hypothyroidism,renal_failure,liver_disease,peptic_ulcer,aids,lymphoma,metastatic_cancer,solid_tumor,rheumatoid_arthritis,coagulopathy,obesity,weight_loss,fluid_electrolyte,blood_loss_anemia,deficiency_anemias,alcohol_abuse,drug_abuse,psychoses,depression)~1)

for(i in 6:15){
  lc <- poLCA(f, df, nclass=i, maxiter=7000, 
              tol=1e-5, na.rm=FALSE,  
              nrep=7, verbose=TRUE, calc.se=TRUE)
  if(lc$bic + lc$aic < min_aic_bic_combined){
    min_aic_bic_combined <- lc$bic + lc$aic
    LCA_best_model_aic_bic_combined <- lc
    }

  # Track the best BIC model
  if(lc$bic < min_bic){
    min_bic <- lc$bic
    LCA_best_model_bic <- lc
  }
  
  # Track the best AIC model
  if(lc$aic < min_aic){
    min_aic <- lc$aic
    LCA_best_model_aic <- lc
  }
}


LCA_best_model_bic  # Best model by BIC
LCA_best_model_aic  # Best model by AIC
LCA_best_model_aic_bic_combined

In [None]:
LCA_best_model_aic_bic_combined

In [None]:
plot(LCA_best_model_aic_bic_combined)

# Check Patients fall into which Latent Classes

In [None]:
head(LCA_best_model_aic_bic_combined$posterior)

In [None]:
# Assign the predicted class for each observation to a new column in df
df$class_assignment <- LCA_best_model_bic$predclass

# View the updated DataFrame to check who falls into which group
head(df)

# You can also use table to summarize the number of individuals in each class
table(df$class_assignment)


In [53]:
# Convert the posterior probabilities to a dataframe if it's not already
posterior_df <- as.data.frame(LCA_best_model_aic_bic_combined$posterior)

# Save the dataframe to a CSV file
write.csv(posterior_df, "data/LCA_posterior_probabilities.csv", row.names = FALSE)

In [39]:
write.csv(df,"data/LCA_latent_class_data.csv",row.names = FALSE)