# Computing metrics for simulations

In this notebook, we compute the results for each simulation scenario and save them in the `results` folder. The combined results are then saved and stored in the following CSV files (which are available on github in the `Figures` folder):
 - `simulation_results_fa.csv`
 - `simulation_results_ctm_v1.csv`
 - `simulation_results_ctm_v2.csv`
 - `simulations_additional_results.csv`
These files will be used later for generating figures.

-----

**Alternative Names for Simulation Scenarios**

To clarify the variations in parameters for each scenario, the following alternative names are used:
- Scenario 1: `scaling_weights`
- Scenario 2: `scaling_topics_param`
- Scenario 3: `scaling_D_topics`
- Scenario 4: `scaling_mu0`
- Scenario 5: `scaling_Sigma0`
- Scenario 6: `scaling_sparsity`

## Packages & functions

In [1]:
from compute_metrics_results import *
import pandas as pd

## Scenario 1

In [None]:
simulation_scenario = 'scenario1'
simulation_params = [0.0, 0.5, 1.0, 1.5, 2.0]

In [None]:
results_fa_factm, results_ctm_factm = compute_simulation_scenario(simulation_scenario, simulation_params, 'factm', 10, seed=123)
results_fa_factm_recon, results_ctm_factm_recon = compute_simulation_scenario(simulation_scenario, simulation_params, 'factm', 10, seed=123, which_results = 'reconstruction')
results_fa_fa_ctm, results_ctm_fa_ctm = compute_simulation_scenario(simulation_scenario, simulation_params, 'fa_ctm', 10, seed=123)

In [None]:

results_fa_fa, results_ctm_fa = compute_simulation_scenario(simulation_scenario, simulation_params, 'fa', 10, seed=123)
results_fa_fa_oracle, results_ctm_fa_oracle = compute_simulation_scenario(simulation_scenario, simulation_params, 'fa_oracle', 10, seed=123)
results_fa_muvi = compute_simulation_scenario_fa(simulation_scenario, simulation_params, 'muvi', 10, seed=123)
results_fa_muvi_prior = compute_simulation_scenario_fa(simulation_scenario, simulation_params, 'muvi_prior', 10, seed=123)
results_fa_mofa = compute_simulation_scenario_fa(simulation_scenario, simulation_params, 'mofa', 10, seed=123)
results_fa_pca = compute_simulation_scenario_fa(simulation_scenario, simulation_params, 'pca', 10, seed=123)
results_fa_tucker = compute_simulation_scenario_fa(simulation_scenario, simulation_params, 'tucker', 10, seed=123)

In [None]:
results_ctm_lda_sklearn = compute_simulation_scenario_other(simulation_scenario, simulation_params, 'lda_sklearn', 10, seed=123)
results_ctm_prodlda_pyro = compute_simulation_scenario_other(simulation_scenario, simulation_params, 'prodlda_pyro', 10, seed=123)

### Save results - structered part

In [None]:
results = [results_ctm_factm, results_ctm_factm_recon, results_ctm_fa_ctm,
           results_ctm_lda_sklearn, results_ctm_prodlda_pyro]
var_list = ['muFA_corr_spearmann', 
            'eta_corr_spearmann', 'eta_prob_spearmann', 'eta_prob_wasserstein', 'eta_prob_est_wasserstein',
             'topics_wasserstein', 'topics_corr_spearmann', 'clusters_ARI']
model_names = ['FACTM', 'FACTM(R)', 'CTM', 'LDA', 'ProdLDA']

In [8]:
df = pd.DataFrame()
for var in var_list:
    for i in range(len(results)):
        df_tmp = pd.DataFrame(results[i][var], columns=simulation_params)
        df_tmp = pd.melt(df_tmp, value_name='value', var_name='param')
        df_tmp['Models'] = model_names[i]
        df_tmp['var'] = var
        df = pd.concat([df_tmp, df])
df['sim_scenario'] = 'scaling_weights'
df = df.iloc[::-1]

In [10]:
df.to_csv('./results/scenario1_ctm.csv')

### Save results - latent factors

In [None]:
results = [results_fa_factm, results_fa_factm_recon, results_fa_fa_ctm, results_fa_fa, 
           results_fa_fa_oracle, results_fa_mofa, results_fa_muvi, results_fa_muvi_prior,
           results_fa_tucker, results_fa_pca]
var_list = ['z_corr_rotated', 'z_corr_best_order']
model_names = ['FACTM', 'FACTM(R)', 'FA+CTM', 'FA', 'FA(Oracle)', 'MOFA', 'muVI', 'muVI_prior', 'Tucker', 'PCA']

In [68]:
df = pd.DataFrame()
for var in var_list:
    for i in range(len(results)):
        df_tmp = pd.DataFrame(results[i][var], columns=simulation_params)
        df_tmp = pd.melt(df_tmp, value_name='value', var_name='param')
        df_tmp['Models'] = model_names[i]
        df_tmp['var'] = var
        df = pd.concat([df_tmp, df])
df['sim_scenario'] = 'scaling_weights'
df = df.iloc[::-1]

In [69]:
df.to_csv('./results/scenario1_fa.csv')

## Scenario 2

In [None]:
simulation_scenario = 'scenario2'
simulation_params = [1.0, int(5), int(10)]

In [None]:
results_fa_factm, results_ctm_factm = compute_simulation_scenario(simulation_scenario, simulation_params, 'factm', 10, seed=123)
results_fa_factm_recon, results_ctm_factm_recon = compute_simulation_scenario(simulation_scenario, simulation_params, 'factm', 10, seed=123, which_results = 'reconstruction')
results_fa_fa_ctm, results_ctm_fa_ctm = compute_simulation_scenario(simulation_scenario, simulation_params, 'fa_ctm', 10, seed=123)

In [None]:

results_fa_fa, results_ctm_fa = compute_simulation_scenario(simulation_scenario, simulation_params, 'fa', 10, seed=123)
results_fa_fa_oracle, results_ctm_fa_oracle = compute_simulation_scenario(simulation_scenario, simulation_params, 'fa_oracle', 10, seed=123)
results_fa_muvi = compute_simulation_scenario_fa(simulation_scenario, simulation_params, 'muvi', 10, seed=123)
results_fa_muvi_prior = compute_simulation_scenario_fa(simulation_scenario, simulation_params, 'muvi_prior', 10, seed=123)
results_fa_mofa = compute_simulation_scenario_fa(simulation_scenario, simulation_params, 'mofa', 10, seed=123)
results_fa_pca = compute_simulation_scenario_fa(simulation_scenario, simulation_params, 'pca', 10, seed=123)
results_fa_tucker = compute_simulation_scenario_fa(simulation_scenario, simulation_params, 'tucker', 10, seed=123)

In [None]:
results_ctm_lda_sklearn = compute_simulation_scenario_other(simulation_scenario, simulation_params, 'lda_sklearn', 10, seed=123)
results_ctm_prodlda_pyro = compute_simulation_scenario_other(simulation_scenario, simulation_params, 'prodlda_pyro', 10, seed=123)

### Save results - structered part

In [None]:
results = [results_ctm_factm, results_ctm_factm_recon, results_ctm_fa_ctm,
           results_ctm_lda_sklearn, results_ctm_prodlda_pyro]
var_list = ['muFA_corr_spearmann', 
            'eta_corr_spearmann', 'eta_prob_spearmann', 'eta_prob_wasserstein', 'eta_prob_est_wasserstein',
             'topics_wasserstein', 'topics_corr_spearmann', 'clusters_ARI']
model_names = ['FACTM', 'FACTM(R)', 'CTM', 'LDA', 'ProdLDA']

In [None]:
df = pd.DataFrame()
for var in var_list:
    for i in range(len(results)):
        df_tmp = pd.DataFrame(results[i][var], columns=simulation_params)
        df_tmp = pd.melt(df_tmp, value_name='value', var_name='param')
        df_tmp['Models'] = model_names[i]
        df_tmp['var'] = var
        df = pd.concat([df_tmp, df])
df['sim_scenario'] = 'scaling_topics_param'
df = df.iloc[::-1]

In [None]:
df.to_csv('./results/scenario2_ctm.csv')

### Save results - latent factors

In [None]:
results = [results_fa_factm, results_fa_factm_recon, results_fa_fa_ctm, results_fa_fa, 
           results_fa_fa_oracle, results_fa_mofa, results_fa_muvi, results_fa_muvi_prior,
           results_fa_tucker, results_fa_pca]
var_list = ['z_corr_rotated', 'z_corr_best_order']
model_names = ['FACTM', 'FACTM(R)', 'FA+CTM', 'FA', 'FA(Oracle)', 'MOFA', 'muVI', 'muVI_prior', 'Tucker', 'PCA']

In [None]:
df = pd.DataFrame()
for var in var_list:
    for i in range(len(results)):
        df_tmp = pd.DataFrame(results[i][var], columns=simulation_params)
        df_tmp = pd.melt(df_tmp, value_name='value', var_name='param')
        df_tmp['Models'] = model_names[i]
        df_tmp['var'] = var
        df = pd.concat([df_tmp, df])
df['sim_scenario'] = 'scaling_topics_param'
df = df.iloc[::-1]

In [None]:
df.to_csv('./results/scenario2_fa.csv')

## Scenario 3

In [None]:
simulation_scenario = 'scenario3'
simulation_params = [0.5, 1.0, 1.5]

In [None]:
results_fa_factm, results_ctm_factm = compute_simulation_scenario(simulation_scenario, simulation_params, 'factm', 10, seed=123)
results_fa_factm_recon, results_ctm_factm_recon = compute_simulation_scenario(simulation_scenario, simulation_params, 'factm', 10, seed=123, which_results = 'reconstruction')
results_fa_fa_ctm, results_ctm_fa_ctm = compute_simulation_scenario(simulation_scenario, simulation_params, 'fa_ctm', 10, seed=123)

In [None]:

results_fa_fa, results_ctm_fa = compute_simulation_scenario(simulation_scenario, simulation_params, 'fa', 10, seed=123)
results_fa_fa_oracle, results_ctm_fa_oracle = compute_simulation_scenario(simulation_scenario, simulation_params, 'fa_oracle', 10, seed=123)
results_fa_muvi = compute_simulation_scenario_fa(simulation_scenario, simulation_params, 'muvi', 10, seed=123)
results_fa_muvi_prior = compute_simulation_scenario_fa(simulation_scenario, simulation_params, 'muvi_prior', 10, seed=123)
results_fa_mofa = compute_simulation_scenario_fa(simulation_scenario, simulation_params, 'mofa', 10, seed=123)
results_fa_pca = compute_simulation_scenario_fa(simulation_scenario, simulation_params, 'pca', 10, seed=123)
results_fa_tucker = compute_simulation_scenario_fa(simulation_scenario, simulation_params, 'tucker', 10, seed=123)

In [None]:
results_ctm_lda_sklearn = compute_simulation_scenario_other(simulation_scenario, simulation_params, 'lda_sklearn', 10, seed=123)
results_ctm_prodlda_pyro = compute_simulation_scenario_other(simulation_scenario, simulation_params, 'prodlda_pyro', 10, seed=123)

### Save results - structered part

In [None]:
results = [results_ctm_factm, results_ctm_factm_recon, results_ctm_fa_ctm,
           results_ctm_lda_sklearn, results_ctm_prodlda_pyro]
var_list = ['muFA_corr_spearmann', 
            'eta_corr_spearmann', 'eta_prob_spearmann', 'eta_prob_wasserstein', 'eta_prob_est_wasserstein',
             'topics_wasserstein', 'topics_corr_spearmann', 'clusters_ARI']
model_names = ['FACTM', 'FACTM(R)', 'CTM', 'LDA', 'ProdLDA']

In [None]:
df = pd.DataFrame()
for var in var_list:
    for i in range(len(results)):
        df_tmp = pd.DataFrame(results[i][var], columns=simulation_params)
        df_tmp = pd.melt(df_tmp, value_name='value', var_name='param')
        df_tmp['Models'] = model_names[i]
        df_tmp['var'] = var
        df = pd.concat([df_tmp, df])
df['sim_scenario'] = 'scaling_D_topics'
df = df.iloc[::-1]

In [None]:
df.to_csv('./results/scenario3_ctm.csv')

### Save results - latent factors

In [None]:
results = [results_fa_factm, results_fa_factm_recon, results_fa_fa_ctm, results_fa_fa, 
           results_fa_fa_oracle, results_fa_mofa, results_fa_muvi, results_fa_muvi_prior,
           results_fa_tucker, results_fa_pca]
var_list = ['z_corr_rotated', 'z_corr_best_order']
model_names = ['FACTM', 'FACTM(R)', 'FA+CTM', 'FA', 'FA(Oracle)', 'MOFA', 'muVI', 'muVI_prior', 'Tucker', 'PCA']

In [None]:
df = pd.DataFrame()
for var in var_list:
    for i in range(len(results)):
        df_tmp = pd.DataFrame(results[i][var], columns=simulation_params)
        df_tmp = pd.melt(df_tmp, value_name='value', var_name='param')
        df_tmp['Models'] = model_names[i]
        df_tmp['var'] = var
        df = pd.concat([df_tmp, df])
df['sim_scenario'] = 'scaling_D_topics'
df = df.iloc[::-1]

In [None]:
df.to_csv('./results/scenario3_fa.csv')

## Scenario 4

In [None]:
simulation_scenario = 'scenario4'
simulation_params = [1.0, 0.75, 0.5, 0.25, 0.0]

In [None]:
results_fa_factm, results_ctm_factm = compute_simulation_scenario(simulation_scenario, simulation_params, 'factm', 10, seed=123)
results_fa_factm_recon, results_ctm_factm_recon = compute_simulation_scenario(simulation_scenario, simulation_params, 'factm', 10, seed=123, which_results = 'reconstruction')
results_fa_fa_ctm, results_ctm_fa_ctm = compute_simulation_scenario(simulation_scenario, simulation_params, 'fa_ctm', 10, seed=123)

In [None]:
results_ctm_lda_sklearn = compute_simulation_scenario_other(simulation_scenario, simulation_params, 'lda_sklearn', 10, seed=123)
results_ctm_prodlda_pyro = compute_simulation_scenario_other(simulation_scenario, simulation_params, 'prodlda_pyro', 10, seed=123)

### Save results - structered part

In [None]:
results = [results_ctm_factm, results_ctm_factm_recon, results_ctm_fa_ctm,
           results_ctm_lda_sklearn, results_ctm_prodlda_pyro]
var_list = ['muFA_corr_spearmann', 
            'eta_corr_spearmann', 'eta_prob_spearmann', 'eta_prob_wasserstein', 'eta_prob_est_wasserstein',
             'topics_wasserstein', 'topics_corr_spearmann', 'clusters_ARI']
model_names = ['FACTM', 'FACTM(R)', 'CTM', 'LDA', 'ProdLDA']

In [None]:
df = pd.DataFrame()
for var in var_list:
    for i in range(len(results)):
        df_tmp = pd.DataFrame(results[i][var], columns=simulation_params)
        df_tmp = pd.melt(df_tmp, value_name='value', var_name='param')
        df_tmp['Models'] = model_names[i]
        df_tmp['var'] = var
        df = pd.concat([df_tmp, df])
df['sim_scenario'] = 'scaling_mu0'
df = df.iloc[::-1]

In [None]:
df.to_csv('./results/scenario4_ctm.csv')

## Scenario 5

In [None]:
simulation_scenario = 'scenario5'
simulation_params = [0.2, 0.6, 1.0]

In [None]:
results_fa_factm, results_ctm_factm = compute_simulation_scenario(simulation_scenario, simulation_params, 'factm', 10, seed=123)
results_fa_factm_recon, results_ctm_factm_recon = compute_simulation_scenario(simulation_scenario, simulation_params, 'factm', 10, seed=123, which_results = 'reconstruction')
results_fa_fa_ctm, results_ctm_fa_ctm = compute_simulation_scenario(simulation_scenario, simulation_params, 'fa_ctm', 10, seed=123)

In [None]:
results_ctm_lda_sklearn = compute_simulation_scenario_other(simulation_scenario, simulation_params, 'lda_sklearn', 10, seed=123)
results_ctm_prodlda_pyro = compute_simulation_scenario_other(simulation_scenario, simulation_params, 'prodlda_pyro', 10, seed=123)

### Save results - structered part

In [None]:
results = [results_ctm_factm, results_ctm_factm_recon, results_ctm_fa_ctm,
           results_ctm_lda_sklearn, results_ctm_prodlda_pyro]
var_list = ['muFA_corr_spearmann', 
            'eta_corr_spearmann', 'eta_prob_spearmann', 'eta_prob_wasserstein', 'eta_prob_est_wasserstein',
             'topics_wasserstein', 'topics_corr_spearmann', 'clusters_ARI']
model_names = ['FACTM', 'FACTM(R)', 'CTM', 'LDA', 'ProdLDA']

In [None]:
df = pd.DataFrame()
for var in var_list:
    for i in range(len(results)):
        df_tmp = pd.DataFrame(results[i][var], columns=simulation_params)
        df_tmp = pd.melt(df_tmp, value_name='value', var_name='param')
        df_tmp['Models'] = model_names[i]
        df_tmp['var'] = var
        df = pd.concat([df_tmp, df])
df['sim_scenario'] = 'scaling_Sigma0'
df = df.iloc[::-1]

In [None]:
df.to_csv('./results/scenario5_ctm.csv')

## Scenario 6

his section may not function correctly because it requires different "basic" results than those for scenarios 1-5. The functions in `compute_metrics_results.py` use the basic results for scenarios 1-5 (`basic`) instead of the results for scenario 6 (`basic_sparsity`).

In [None]:
simulation_scenario = 'scenario6'
simulation_params = [0.0, 0.5, 1.0, 1.5, 2.0]

In [None]:
results_fa_factm, results_ctm_factm = compute_simulation_scenario(simulation_scenario, simulation_params, 'factm', 10, seed=123)
results_fa_factm_recon, results_ctm_factm_recon = compute_simulation_scenario(simulation_scenario, simulation_params, 'factm', 10, seed=123, which_results = 'reconstruction')
results_fa_fa_ctm, results_ctm_fa_ctm = compute_simulation_scenario(simulation_scenario, simulation_params, 'fa_ctm', 10, seed=123)

In [None]:

results_fa_fa, results_ctm_fa = compute_simulation_scenario(simulation_scenario, simulation_params, 'fa', 10, seed=123)
results_fa_fa_oracle, results_ctm_fa_oracle = compute_simulation_scenario(simulation_scenario, simulation_params, 'fa_oracle', 10, seed=123)
results_fa_muvi = compute_simulation_scenario_fa(simulation_scenario, simulation_params, 'muvi', 10, seed=123)
results_fa_muvi_prior = compute_simulation_scenario_fa(simulation_scenario, simulation_params, 'muvi_prior', 10, seed=123)
results_fa_mofa = compute_simulation_scenario_fa(simulation_scenario, simulation_params, 'mofa', 10, seed=123)
results_fa_pca = compute_simulation_scenario_fa(simulation_scenario, simulation_params, 'pca', 10, seed=123)
results_fa_tucker = compute_simulation_scenario_fa(simulation_scenario, simulation_params, 'tucker', 10, seed=123)

### Save results - latent factors

In [None]:
results = [results_fa_mofa, results_fa_muvi_prior]
var_list = ['z_corr_rotated', 'z_corr_best_order']
model_names = ['MOFA', 'muVI_prior']

In [None]:
df = pd.DataFrame()
for var in var_list:
    for i in range(len(results)):
        df_tmp = pd.DataFrame(results[i][var], columns=simulation_params)
        df_tmp = pd.melt(df_tmp, value_name='value', var_name='param')
        df_tmp['Models'] = model_names[i]
        df_tmp['var'] = var
        df = pd.concat([df_tmp, df])
df['sim_scenario'] = 'scaling_sparsity'
df = df.iloc[::-1]

In [None]:
df.to_csv('./results/scenario6_fa.csv')

## Save tables

### Results for FA

In [None]:
df1 = pd.read_csv('./results/scenario1_fa.csv', index_col=0)
df2 = pd.read_csv('./results/scenario2_fa.csv', index_col=0)
df3 = pd.read_csv('./results/scenario3_fa.csv', index_col=0)
df = pd.concat([df1, df2, df3])

In [None]:
df.to_csv('simulation_results_fa.csv')

### Results for structered data v1

In [None]:
df1 = pd.read_csv('./results/scenario1_ctm.csv', index_col=0)
df2 = pd.read_csv('./results/scenario2_ctm.csv', index_col=0)
df3 = pd.read_csv('./results/scenario3_ctm.csv', index_col=0)
df = pd.concat([df1, df2, df3])

In [None]:
df.to_csv('simulation_results_ctm_v1.csv')

### Results for structered data v2

In [None]:
df1 = pd.read_csv('./results/scenario4_ctm.csv', index_col=0)
df2 = pd.read_csv('./results/scenario5_ctm.csv', index_col=0)
df = pd.concat([df1, df2])

In [None]:
df.to_csv('simulation_results_ctm_v2.csv')

### Results for additional simulation scenario 6

In [None]:
df = pd.read_csv('./results/scenario6_fa.csv', index_col=0)

In [None]:
df.to_csv('simulation_additional_results.csv')