In [None]:
from google.colab import drive
drive.mount('/content/drive')
%cd /content/drive/MyDrive/ChiLit_Topic_Modeling

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
/content/drive/MyDrive/ChiLit_Topic_Modeling


In [None]:
!pip install git+https://github.com/tonazzog/OCTIS.git
!pip install optuna

In [1]:
import octis
from octis.models.LDA import LDA
from octis.models.ProdLDA import ProdLDA
from octis.models.ETM import ETM
from octis.dataset.dataset import Dataset
from octis.evaluation_metrics.diversity_metrics import TopicDiversity
from octis.evaluation_metrics.coherence_metrics import Coherence
from octis.dataset.dataset import Dataset
from octis.optimization.optimizer import Optimizer
from skopt.space.space import Real
from skopt.space.space import Integer
from skopt.space.space import Categorical
import optuna
from typing import Tuple, List
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from plotly.io import show
import json
import os
import pickle

Note: OCTIS extension for multi-objective optimization doesn't work. OPTUNA was used for multi-objective optimization

In [2]:
chunk_size = 200
octis_folder = f"./octis_{chunk_size}/"
optuna_folder = f"./optuna_{chunk_size}/"

### Create OCTIS dataset

In [3]:
def prepare_octis_corpus(output_folder, docs):
  # Write to docs.tsv
  with open(os.path.join(output_folder, "corpus.tsv"), "w", encoding="utf-8") as f:
      for doc in docs:
          f.write(f"{doc}\n")
  # Tokenize and create vocabulary
  vocab = set()
  for doc in docs:
      vocab.update(doc.split())

  vocab = {w for w in vocab if w.isalpha() and len(w) > 2}
  vocab = sorted(vocab)  # sorting is optional but nice for consistency

  # Save vocab.json
  with open(os.path.join(output_folder, "vocab.json"), "w", encoding="utf-8") as f:
      json.dump(vocab, f)

In [4]:
df_chilit = pd.read_csv(f"./data/ChiLit_Chunks_{chunk_size}.csv")
#df_chilit = pd.read_csv(f"./data/ChiLit_Paragraphs.csv")
df_chilit = df_chilit.fillna("")
#df_chilit = df_chilit[df_chilit['tokens'] != '']

In [None]:
df_chilit.head()

Unnamed: 0,book_id,chapter_num,paragraph_num,paragraph_text,tokens,num_tokens
1,alice,1,2,Alice was beginning to get very tired of sitti...,begin get tired sit sister bank have do peep b...,20
2,alice,1,3,So she was considering in her own mind (as wel...,consider own mind hot day make feel sleepy stu...,23
3,alice,1,4,There was nothing so VERY remarkable in that; ...,be remarkable think way hear say dear dear lat...,41
6,alice,1,7,"Either the well was very deep, or she fell ver...",deep fall have plenty time go look wonder go h...,46
8,alice,1,9,"Down, down, down. Would the fall NEVER come to...",fall come end wonder many mile fall time say g...,43


In [5]:
docs = []
for _, row in df_chilit.iterrows():
  docs.append(row["tokens"])

prepare_octis_corpus(octis_folder, docs)

#### Load OCTIS Dataset

In [6]:
dataset = Dataset()
dataset.load_custom_dataset_from_folder(octis_folder)

### Custom metric for OCTIS optimizer

In [None]:
from octis.evaluation_metrics.metrics import AbstractMetric

class CoherenceDiversityCombination(AbstractMetric):
    def __init__(self, dataset, alpha=0.7):
        """
        alpha: weight for coherence (between 0 and 1).
        (1 - alpha) will be the weight for diversity.
        """
        super().__init__()
        self.alpha = alpha
        # Initialize base metrics
        self.c_v = Coherence(texts=dataset.get_corpus(), measure="c_npmi")
        self.diversity = TopicDiversity(topk=10)

    def score(self, model_output, **kwargs):
        # Compute individual scores
        coherence_score = self.c_v.score(model_output)
        diversity_score = self.diversity.score(model_output)

        # Weighted combination
        combined_score = (self.alpha * coherence_score + (1 - self.alpha) * diversity_score)

        return combined_score

## LDA model

### OCTIS LDA model optimization

In [None]:
# Define evaluation metric
eval_metric = CoherenceDiversityCombination(dataset) # Initialize metric

# Istantiate model
model = LDA()

# Define the search space.
search_space = {
    "num_topics": Integer(10, 50),
    "alpha": Real(low=0.01, high=2.0),
    "eta": Real(low=0.0, high=2.0),
    "iterations": Integer(50, 200)
}

# Initialize an optimizer object and start the optimization.
optimizer = Optimizer()
optResult = optimizer.optimize(
    model, dataset,
    eval_metric,
    search_space,
    save_path=octis_folder,
    save_name = 'OCTIS_LDA',
    number_of_call=50, # number of optimization iterations
    model_runs=5 # number of runs of the topic model
)

#save the results of th optimization in a csv file
optResult.save_to_csv("OCTIS_LDA.csv")

#### Train model with best parameters

In [85]:
results = json.load(open(f"{octis_folder}/OCTIS_LDA.json",'r'))
best_iter = results['f_val'].index(max(results['f_val']))
model = LDA(
    num_topics=results['x_iters']['num_topics'][best_iter],
    alpha= results['x_iters']['alpha'][best_iter],
    eta= results['x_iters']['eta'][best_iter],
    passes=20)

output = model.train_model(dataset)

pickle.dump(output, open(octis_folder + "OCTIS_LDA_output.pkl", "wb"))

### Optuna LDA multi-objective optimization

In [None]:
optuna.delete_study(study_name="LDA_Study", storage=f"sqlite:///{optuna_folder}LDA_Study.db")

In [None]:
def objectiveLDA(trial) -> Tuple[float, float]:

    # Define hyperparameters to optimize
    num_topics = trial.suggest_int("num_topics", 10, 50)
    alpha = trial.suggest_float("alpha", 0.01, 0.2)
    eta = trial.suggest_float("eta", 0.01, 0.2)
    passes = trial.suggest_int("passes", 5, 30)
    iterations = trial.suggest_int("iterations", 50, 200)

    # Train LDA model
    model = LDA(
        num_topics=num_topics,
        alpha=alpha,
        eta=eta,
        passes=passes,
        iterations=iterations,
        random_state=42,
    )

    output = model.train_model(dataset)

    # Compute coherence score
    coherence_metrics = Coherence(texts=dataset.get_corpus(), #list of our documents
                    measure='c_npmi')
    coherence = coherence_metrics.score(output)

    # Compute diversity score
    diverisity_metric = TopicDiversity(topk=10) # Initialize metric
    diversity = diverisity_metric.score(output)

    return coherence, diversity  # Optuna will maximize these


# Run optimization
study = optuna.create_study(
    directions=["maximize","maximize"],
    storage=f"sqlite:///{optuna_folder}LDA_Study.db",
    study_name="LDA_Study")
study.optimize(objectiveLDA, n_trials=50)

[I 2025-08-12 23:35:10,519] A new study created in RDB with name: LDA_Study
[I 2025-08-12 23:40:50,226] Trial 0 finished with values: [-0.01527873111567863, 0.74] and parameters: {'num_topics': 25, 'alpha': 0.040399725923978266, 'eta': 0.012447161904934178, 'passes': 10, 'iterations': 195}.
[I 2025-08-12 23:46:25,126] Trial 1 finished with values: [-0.1409386265621207, 0.9129032258064517] and parameters: {'num_topics': 31, 'alpha': 0.19711802101251766, 'eta': 0.13859233231342827, 'passes': 26, 'iterations': 153}.
[I 2025-08-12 23:49:15,992] Trial 2 finished with values: [-0.06718309715057955, 0.8076923076923077] and parameters: {'num_topics': 13, 'alpha': 0.02329181016923864, 'eta': 0.1422413420782142, 'passes': 12, 'iterations': 176}.
[I 2025-08-12 23:56:14,521] Trial 3 finished with values: [-0.061582172735959735, 0.8377777777777777] and parameters: {'num_topics': 45, 'alpha': 0.18177054258786077, 'eta': 0.03887815152676666, 'passes': 21, 'iterations': 126}.
[I 2025-08-12 23:59:36,81

In [101]:
def train_final_LDA_model(params):
    """Train final LDA model with selected parameters"""
    print(f"\nTraining final model with parameters: {params}")

    model = LDA(
        num_topics=params["num_topics"],
        alpha=params["alpha"],
        eta=params["eta"],
        passes=params["passes"],
        iterations=params["iterations"],
        random_state=42,
    )

    output = model.train_model(dataset)

    # Calculate final metrics
    coherence_metrics = Coherence(texts=dataset.get_corpus(), #list of our documents
                    measure='c_npmi')
    coherence = coherence_metrics.score(output)

    diverisity_metric = TopicDiversity(topk=10) # Initialize metric
    diversity = diverisity_metric.score(output)

    print(f"Final model metrics:")
    print(f"  Coherence: {coherence:.4f}")
    print(f"  Diversity: {diversity:.4f}")

    return model, output, coherence, diversity

In [102]:
LDA_study = optuna.load_study(
    storage=f"sqlite:///{optuna_folder}LDA_Study.db",
    study_name="LDA_Study"
)

In [103]:
if LDA_study.best_trials:
    # Get balanced solution
    pareto_trials = LDA_study.best_trials

    # Pick the first Pareto optimal solution
    selected_params = pareto_trials[0].params
    final_model, final_output, final_coherence, final_diversity = train_final_LDA_model(selected_params)

    print(f"\nFinal model trained successfully!")

pickle.dump(final_output, open(optuna_folder + "Optuna_LDA_output.pkl", "wb"))


Training final model with parameters: {'num_topics': 14, 'alpha': 0.08263819021129706, 'eta': 0.11478421778029362, 'passes': 29, 'iterations': 139}
Final model metrics:
  Coherence: -0.0220
  Diversity: 0.8214

Final model trained successfully!


## ProdLDA

### OCTIS ProdLDA optimization

In [4]:
# Initialize metric
eval_metric = CoherenceDiversityCombination(dataset) # Initialize metric

# Initialize model
model = ProdLDA(use_partitions=False)

# Define the search space.
search_space = {
    "num_topics": Integer(10,50),
    "dropout" : Real(low=0, high=0.60),
    "num_neurons" : Categorical([50, 100, 200, 300]),
    "num_layers": Integer(1,3),
    "activation" : Categorical(["softplus","relu", "sigmoid"])
}

# Initialize an optimizer object and start the optimization.
optimizer=Optimizer()
optResult=optimizer.optimize(
    model, dataset,
    eval_metric,
    search_space,
    save_name='OCTIS_ProdLDA',
    save_path=octis_folder,
    number_of_call=50, # number of optimization iterations
    model_runs=5  # number of runs of the topic model
)

#save the results of th optimization in a csv file
optResult.save_to_csv("OCTIS_ProdLDA.csv")

NameError: name 'CoherenceDiversityCombination' is not defined

### Train ProdLDA model with best parameters

In [97]:
results = json.load(open(f"{octis_folder}/OCTIS_ProdLDA.json",'r'))
best_iter = results['f_val'].index(max(results['f_val']))

model = ProdLDA(
    num_topics=results['x_iters']['num_topics'][best_iter],
    dropout=results['x_iters']['dropout'][best_iter],
    num_neurons=results['x_iters']['num_neurons'][best_iter],
    num_layers=results['x_iters']['num_layers'][best_iter],
    activation=results['x_iters']['activation'][best_iter],
    use_partitions=False
)

output = model.train_model(dataset)

pickle.dump(output, open(octis_folder + "OCTIS_ProdLDA_output.pkl", "wb"))

Epoch: [1/100]	Samples: [5843/584300]	Train Loss: 2257.5753472370784	Time: 0:00:04.272165
Epoch: [2/100]	Samples: [11686/584300]	Train Loss: 2216.680636819699	Time: 0:00:04.100038
Epoch: [3/100]	Samples: [17529/584300]	Train Loss: 2188.057128989817	Time: 0:00:04.197414
Epoch: [4/100]	Samples: [23372/584300]	Train Loss: 2171.4995895195107	Time: 0:00:03.960664
Epoch: [5/100]	Samples: [29215/584300]	Train Loss: 2159.9728153613296	Time: 0:00:03.670171
Epoch: [6/100]	Samples: [35058/584300]	Train Loss: 2152.416890403046	Time: 0:00:04.057952
Epoch: [7/100]	Samples: [40901/584300]	Train Loss: 2146.714863137515	Time: 0:00:03.619083
Epoch: [8/100]	Samples: [46744/584300]	Train Loss: 2141.3794129995294	Time: 0:00:03.998018
Epoch: [9/100]	Samples: [52587/584300]	Train Loss: 2139.1175999860943	Time: 0:00:04.149957
Epoch: [10/100]	Samples: [58430/584300]	Train Loss: 2137.014820752396	Time: 0:00:04.178687
Epoch: [11/100]	Samples: [64273/584300]	Train Loss: 2133.2107302541503	Time: 0:00:04.289923
Epo

### Optuna ProdLDA multi-objective optimization

In [None]:
def objectiveProdLDA(trial) -> Tuple[float, float]:
    # Define hyperparameters to optimize
    num_topics = trial.suggest_int("num_topics", 10, 50)
    dropout = trial.suggest_float("dropout", 0, 0.60)
    num_neurons = trial.suggest_categorical("num_neurons", [50, 100, 200, 300])
    num_layers = trial.suggest_int("num_layers", 1, 2)
    activation = trial.suggest_categorical("activation", ["softplus","relu", "sigmoid"])


    # Train ProdLDA model
    model = ProdLDA(
        num_topics=num_topics,
        dropout = dropout,
        num_neurons=num_neurons,
        num_layers=num_layers,
        activation=activation,
        use_partitions=False
    )

    output = model.train_model(dataset)

    # Compute coherence score (can also use perplexity, but coherence is often better)
    coherence_metrics = Coherence(texts=dataset.get_corpus(), #list of our documents
                    measure='c_npmi')
    coherence = coherence_metrics.score(output)

    diverisity_metric = TopicDiversity(topk=10) # Initialize metric
    diversity = diverisity_metric.score(output)

    return coherence, diversity  # Optuna will maximize these

# Run optimization
study = optuna.create_study(
    directions=["maximize","maximize"],
    storage=f"sqlite:///{optuna_folder}ProdLDA_Study.db",
    study_name="ProdLDA_Study"
  )
study.optimize(objectiveProdLDA, n_trials=50)

[I 2025-08-13 22:03:02,934] A new study created in RDB with name: ProdLDA_Study


Epoch: [1/100]	Samples: [26586/2658600]	Train Loss: 422.110667349298	Time: 0:00:24.608389
Epoch: [2/100]	Samples: [53172/2658600]	Train Loss: 410.1606030605864	Time: 0:00:21.659467
Epoch: [3/100]	Samples: [79758/2658600]	Train Loss: 406.31787976255123	Time: 0:00:21.464043
Epoch: [4/100]	Samples: [106344/2658600]	Train Loss: 404.4543563738316	Time: 0:00:23.315591
Epoch: [5/100]	Samples: [132930/2658600]	Train Loss: 403.264892229169	Time: 0:00:22.471757
Epoch: [6/100]	Samples: [159516/2658600]	Train Loss: 402.3959376895382	Time: 0:00:21.106510
Epoch: [7/100]	Samples: [186102/2658600]	Train Loss: 402.0111157540623	Time: 0:00:22.316645
Epoch: [8/100]	Samples: [212688/2658600]	Train Loss: 401.4959139825707	Time: 0:00:22.660983
Epoch: [9/100]	Samples: [239274/2658600]	Train Loss: 401.0708883141033	Time: 0:00:22.463995
Epoch: [10/100]	Samples: [265860/2658600]	Train Loss: 400.4433144515088	Time: 0:00:22.273739
Epoch: [11/100]	Samples: [292446/2658600]	Train Loss: 400.4027887974404	Time: 0:00:

[I 2025-08-13 22:40:46,735] Trial 0 finished with values: [0.030310650198585733, 0.9842105263157894] and parameters: {'num_topics': 19, 'dropout': 0.20138708349811507, 'num_neurons': 300, 'num_layers': 1, 'activation': 'sigmoid'}.


Epoch: [1/100]	Samples: [26586/2658600]	Train Loss: 424.3356337613664	Time: 0:00:14.580425
Epoch: [2/100]	Samples: [53172/2658600]	Train Loss: 415.51297521193015	Time: 0:00:11.899327
Epoch: [3/100]	Samples: [79758/2658600]	Train Loss: 413.02796595688284	Time: 0:00:12.639376
Epoch: [4/100]	Samples: [106344/2658600]	Train Loss: 411.6510305368074	Time: 0:00:12.408113
Epoch: [5/100]	Samples: [132930/2658600]	Train Loss: 410.8688356374713	Time: 0:00:12.876275
Epoch: [6/100]	Samples: [159516/2658600]	Train Loss: 410.2421379482505	Time: 0:00:12.397923
Epoch: [7/100]	Samples: [186102/2658600]	Train Loss: 409.52713239287124	Time: 0:00:12.313817
Epoch: [8/100]	Samples: [212688/2658600]	Train Loss: 409.19875280193287	Time: 0:00:12.396397
Epoch: [9/100]	Samples: [239274/2658600]	Train Loss: 408.9437831177593	Time: 0:00:11.805877
Epoch: [10/100]	Samples: [265860/2658600]	Train Loss: 408.59055525288215	Time: 0:00:12.740643
Epoch: [11/100]	Samples: [292446/2658600]	Train Loss: 408.29346191663376	Time

[I 2025-08-13 23:04:27,680] Trial 1 finished with values: [-0.003990636319932234, 0.915] and parameters: {'num_topics': 20, 'dropout': 0.45703562599863395, 'num_neurons': 50, 'num_layers': 1, 'activation': 'relu'}.


Epoch: [1/100]	Samples: [26586/2658600]	Train Loss: 424.81051605667693	Time: 0:00:13.834156
Epoch: [2/100]	Samples: [53172/2658600]	Train Loss: 415.0921675397178	Time: 0:00:12.579149
Epoch: [3/100]	Samples: [79758/2658600]	Train Loss: 413.5501647733417	Time: 0:00:11.831077
Epoch: [4/100]	Samples: [106344/2658600]	Train Loss: 412.9319410210026	Time: 0:00:11.957262
Epoch: [5/100]	Samples: [132930/2658600]	Train Loss: 412.09935871440797	Time: 0:00:11.990128
Epoch: [6/100]	Samples: [159516/2658600]	Train Loss: 411.84363348543405	Time: 0:00:18.715786
Epoch: [7/100]	Samples: [186102/2658600]	Train Loss: 411.10930278447773	Time: 0:00:12.057881
Epoch: [8/100]	Samples: [212688/2658600]	Train Loss: 410.9579253119593	Time: 0:00:11.923338
Epoch: [9/100]	Samples: [239274/2658600]	Train Loss: 410.9293189657503	Time: 0:00:11.514043
Epoch: [10/100]	Samples: [265860/2658600]	Train Loss: 410.366135416079	Time: 0:00:11.779045
Epoch: [11/100]	Samples: [292446/2658600]	Train Loss: 410.31104389851566	Time: 

[I 2025-08-13 23:26:33,602] Trial 2 finished with values: [-0.03284730826914241, 0.91] and parameters: {'num_topics': 10, 'dropout': 0.5582377178350553, 'num_neurons': 100, 'num_layers': 1, 'activation': 'sigmoid'}.


Epoch: [1/100]	Samples: [26586/2658600]	Train Loss: 443.67042859581875	Time: 0:00:13.099941
Epoch: [2/100]	Samples: [53172/2658600]	Train Loss: 426.83665267610303	Time: 0:00:11.554904
Epoch: [3/100]	Samples: [79758/2658600]	Train Loss: 423.0755272982021	Time: 0:00:11.855371
Epoch: [4/100]	Samples: [106344/2658600]	Train Loss: 420.89350076461767	Time: 0:00:11.554806
Epoch: [5/100]	Samples: [132930/2658600]	Train Loss: 419.77932795044853	Time: 0:00:11.660808
Epoch: [6/100]	Samples: [159516/2658600]	Train Loss: 418.84737337526093	Time: 0:00:11.983233
Epoch: [7/100]	Samples: [186102/2658600]	Train Loss: 418.020652684331	Time: 0:00:11.546026
Epoch: [8/100]	Samples: [212688/2658600]	Train Loss: 417.65202293088936	Time: 0:00:11.743834
Epoch: [9/100]	Samples: [239274/2658600]	Train Loss: 417.393177117127	Time: 0:00:11.494567
Epoch: [10/100]	Samples: [265860/2658600]	Train Loss: 417.01337849484923	Time: 0:00:11.571585
Epoch: [11/100]	Samples: [292446/2658600]	Train Loss: 416.8597727730878	Time:

[I 2025-08-13 23:48:14,985] Trial 3 finished with values: [-0.013398629182610847, 0.7627906976744186] and parameters: {'num_topics': 43, 'dropout': 0.5880701441619546, 'num_neurons': 50, 'num_layers': 1, 'activation': 'softplus'}.


Epoch: [1/100]	Samples: [26586/2658600]	Train Loss: 427.3327668859433	Time: 0:00:25.133505
Epoch: [2/100]	Samples: [53172/2658600]	Train Loss: 417.4972098581608	Time: 0:00:24.052715
Epoch: [3/100]	Samples: [79758/2658600]	Train Loss: 413.5679756336159	Time: 0:00:23.097059
Epoch: [4/100]	Samples: [106344/2658600]	Train Loss: 410.5752629732284	Time: 0:00:29.399144
Epoch: [5/100]	Samples: [132930/2658600]	Train Loss: 409.1527912584984	Time: 0:00:22.180403
Epoch: [6/100]	Samples: [159516/2658600]	Train Loss: 408.17424283163604	Time: 0:00:22.472501
Epoch: [7/100]	Samples: [186102/2658600]	Train Loss: 407.40872457543446	Time: 0:00:22.024775
Epoch: [8/100]	Samples: [212688/2658600]	Train Loss: 406.7685313992797	Time: 0:00:28.893719
Epoch: [9/100]	Samples: [239274/2658600]	Train Loss: 406.4794176945808	Time: 0:00:22.247935
Epoch: [10/100]	Samples: [265860/2658600]	Train Loss: 406.02086675964324	Time: 0:00:21.846270
Epoch: [11/100]	Samples: [292446/2658600]	Train Loss: 405.79956963809667	Time: 

[I 2025-08-14 00:44:53,327] Trial 4 finished with values: [0.009075721183529242, 0.9628571428571429] and parameters: {'num_topics': 35, 'dropout': 0.2750981101173346, 'num_neurons': 300, 'num_layers': 2, 'activation': 'relu'}.


Epoch: [1/100]	Samples: [26586/2658600]	Train Loss: 442.56136321160574	Time: 0:00:15.622944
Epoch: [2/100]	Samples: [53172/2658600]	Train Loss: 425.79693006158084	Time: 0:00:13.829149
Epoch: [3/100]	Samples: [79758/2658600]	Train Loss: 422.24221012704055	Time: 0:00:13.288090
Epoch: [4/100]	Samples: [106344/2658600]	Train Loss: 419.9251064058621	Time: 0:00:13.388614
Epoch: [5/100]	Samples: [132930/2658600]	Train Loss: 418.49935152958795	Time: 0:00:13.409805
Epoch: [6/100]	Samples: [159516/2658600]	Train Loss: 417.16149018015824	Time: 0:00:13.399751
Epoch: [7/100]	Samples: [186102/2658600]	Train Loss: 416.5575041624939	Time: 0:00:13.585025
Epoch: [8/100]	Samples: [212688/2658600]	Train Loss: 416.08118045858254	Time: 0:00:13.350619
Epoch: [9/100]	Samples: [239274/2658600]	Train Loss: 415.32176305492084	Time: 0:00:13.626445
Epoch: [10/100]	Samples: [265860/2658600]	Train Loss: 415.0676385421368	Time: 0:00:13.282521
Epoch: [11/100]	Samples: [292446/2658600]	Train Loss: 414.9851155947914	Tim

[I 2025-08-14 01:10:07,005] Trial 5 finished with values: [-0.015170787215110804, 0.8131578947368421] and parameters: {'num_topics': 38, 'dropout': 0.5896432285558239, 'num_neurons': 100, 'num_layers': 1, 'activation': 'sigmoid'}.


Epoch: [1/100]	Samples: [26586/2658600]	Train Loss: 421.18180856465807	Time: 0:00:12.242895
Epoch: [2/100]	Samples: [53172/2658600]	Train Loss: 408.79989681187936	Time: 0:00:11.251981
Epoch: [3/100]	Samples: [79758/2658600]	Train Loss: 404.80758607531925	Time: 0:00:11.456262
Epoch: [4/100]	Samples: [106344/2658600]	Train Loss: 403.40059867622995	Time: 0:00:11.324232
Epoch: [5/100]	Samples: [132930/2658600]	Train Loss: 402.4532883113998	Time: 0:00:11.441377
Epoch: [6/100]	Samples: [159516/2658600]	Train Loss: 401.74210848901913	Time: 0:00:11.428178
Epoch: [7/100]	Samples: [186102/2658600]	Train Loss: 401.34768350529885	Time: 0:00:11.573401
Epoch: [8/100]	Samples: [212688/2658600]	Train Loss: 401.0047350755567	Time: 0:00:11.578340
Epoch: [9/100]	Samples: [239274/2658600]	Train Loss: 400.68348652868286	Time: 0:00:11.215528
Epoch: [10/100]	Samples: [265860/2658600]	Train Loss: 400.42194879500727	Time: 0:00:11.015650
Epoch: [11/100]	Samples: [292446/2658600]	Train Loss: 400.21673501796056	T

[I 2025-08-14 01:33:27,805] Trial 6 finished with values: [0.04838178970449884, 0.9409090909090909] and parameters: {'num_topics': 22, 'dropout': 0.07548434734218092, 'num_neurons': 50, 'num_layers': 2, 'activation': 'relu'}.


Epoch: [1/100]	Samples: [26586/2658600]	Train Loss: 439.95748577435023	Time: 0:00:13.979254
Epoch: [2/100]	Samples: [53172/2658600]	Train Loss: 423.21297239089654	Time: 0:00:11.853821
Epoch: [3/100]	Samples: [79758/2658600]	Train Loss: 421.1395302699847	Time: 0:00:18.673322
Epoch: [4/100]	Samples: [106344/2658600]	Train Loss: 417.236539702522	Time: 0:00:11.440985
Epoch: [5/100]	Samples: [132930/2658600]	Train Loss: 414.0313911251458	Time: 0:00:11.405575
Epoch: [6/100]	Samples: [159516/2658600]	Train Loss: 412.34324618102573	Time: 0:00:11.498718
Epoch: [7/100]	Samples: [186102/2658600]	Train Loss: 411.3935707394752	Time: 0:00:11.717772
Epoch: [8/100]	Samples: [212688/2658600]	Train Loss: 410.581199559331	Time: 0:00:11.161358
Epoch: [9/100]	Samples: [239274/2658600]	Train Loss: 410.175739889533	Time: 0:00:11.328492
Epoch: [10/100]	Samples: [265860/2658600]	Train Loss: 409.9196312864971	Time: 0:00:11.340007
Epoch: [11/100]	Samples: [292446/2658600]	Train Loss: 409.55619950790594	Time: 0:0

[I 2025-08-14 01:55:07,892] Trial 7 finished with values: [0.01002298028311595, 0.9166666666666666] and parameters: {'num_topics': 42, 'dropout': 0.2764980570235083, 'num_neurons': 50, 'num_layers': 2, 'activation': 'softplus'}.


Epoch: [1/100]	Samples: [26586/2658600]	Train Loss: 422.59135652928234	Time: 0:00:12.440292
Epoch: [2/100]	Samples: [53172/2658600]	Train Loss: 412.80337017280004	Time: 0:00:11.205068
Epoch: [3/100]	Samples: [79758/2658600]	Train Loss: 410.3204973364741	Time: 0:00:11.151071
Epoch: [4/100]	Samples: [106344/2658600]	Train Loss: 409.0354300518835	Time: 0:00:11.095583
Epoch: [5/100]	Samples: [132930/2658600]	Train Loss: 408.18617760375525	Time: 0:00:11.322118
Epoch: [6/100]	Samples: [159516/2658600]	Train Loss: 407.4643384984696	Time: 0:00:11.417186
Epoch: [7/100]	Samples: [186102/2658600]	Train Loss: 407.1449531194167	Time: 0:00:11.130333
Epoch: [8/100]	Samples: [212688/2658600]	Train Loss: 406.7592543493874	Time: 0:00:10.981791
Epoch: [9/100]	Samples: [239274/2658600]	Train Loss: 406.6010563301644	Time: 0:00:17.962662
Epoch: [10/100]	Samples: [265860/2658600]	Train Loss: 406.4462504202165	Time: 0:00:11.421425
Epoch: [11/100]	Samples: [292446/2658600]	Train Loss: 406.3183630482209	Time: 0

[I 2025-08-14 02:19:40,191] Trial 8 finished with values: [-0.015580945667647072, 0.9238095238095239] and parameters: {'num_topics': 21, 'dropout': 0.35233262278556754, 'num_neurons': 50, 'num_layers': 2, 'activation': 'relu'}.


Epoch: [1/100]	Samples: [26586/2658600]	Train Loss: 442.5028497214817	Time: 0:00:13.251888
Epoch: [2/100]	Samples: [53172/2658600]	Train Loss: 425.2598549209993	Time: 0:00:11.893321
Epoch: [3/100]	Samples: [79758/2658600]	Train Loss: 421.70169846794363	Time: 0:00:11.827840
Epoch: [4/100]	Samples: [106344/2658600]	Train Loss: 416.621891830698	Time: 0:00:11.893568
Epoch: [5/100]	Samples: [132930/2658600]	Train Loss: 414.15668523994066	Time: 0:00:12.196723
Epoch: [6/100]	Samples: [159516/2658600]	Train Loss: 412.95479315652506	Time: 0:00:12.037945
Epoch: [7/100]	Samples: [186102/2658600]	Train Loss: 412.1842906334631	Time: 0:00:11.977989
Epoch: [8/100]	Samples: [212688/2658600]	Train Loss: 411.71998820602244	Time: 0:00:12.330050
Epoch: [9/100]	Samples: [239274/2658600]	Train Loss: 411.2066329993935	Time: 0:00:11.813547
Epoch: [10/100]	Samples: [265860/2658600]	Train Loss: 410.7130836160244	Time: 0:00:11.868513
Epoch: [11/100]	Samples: [292446/2658600]	Train Loss: 410.3043798677523	Time: 0

[I 2025-08-14 02:41:46,215] Trial 9 finished with values: [0.020191554537591486, 0.8551020408163266] and parameters: {'num_topics': 49, 'dropout': 0.20266326363726397, 'num_neurons': 50, 'num_layers': 2, 'activation': 'softplus'}.


Epoch: [1/100]	Samples: [26586/2658600]	Train Loss: 430.34936297534415	Time: 0:00:23.288000
Epoch: [2/100]	Samples: [53172/2658600]	Train Loss: 418.6727022460717	Time: 0:00:20.448128
Epoch: [3/100]	Samples: [79758/2658600]	Train Loss: 417.82672476820505	Time: 0:00:20.380069
Epoch: [4/100]	Samples: [106344/2658600]	Train Loss: 416.3391943980149	Time: 0:00:28.580712
Epoch: [5/100]	Samples: [132930/2658600]	Train Loss: 414.1767175539993	Time: 0:00:20.193216
Epoch: [6/100]	Samples: [159516/2658600]	Train Loss: 412.32426730997986	Time: 0:00:20.817417
Epoch: [7/100]	Samples: [186102/2658600]	Train Loss: 411.02339650685514	Time: 0:00:20.663075
Epoch: [8/100]	Samples: [212688/2658600]	Train Loss: 409.78916266873307	Time: 0:00:21.222836
Epoch: [9/100]	Samples: [239274/2658600]	Train Loss: 408.99417434516755	Time: 0:00:28.531959
Epoch: [10/100]	Samples: [265860/2658600]	Train Loss: 408.0760168283472	Time: 0:00:20.578467
Epoch: [11/100]	Samples: [292446/2658600]	Train Loss: 407.58396005827785	Tim

[I 2025-08-14 03:21:43,587] Trial 10 finished with values: [-0.008300724428236402, 0.95] and parameters: {'num_topics': 30, 'dropout': 0.36432313165852914, 'num_neurons': 300, 'num_layers': 2, 'activation': 'softplus'}.


Epoch: [1/100]	Samples: [26586/2658600]	Train Loss: 423.32456116764934	Time: 0:00:13.338556
Epoch: [2/100]	Samples: [53172/2658600]	Train Loss: 414.58229197521723	Time: 0:00:11.826871
Epoch: [3/100]	Samples: [79758/2658600]	Train Loss: 413.8061575964558	Time: 0:00:11.763629
Epoch: [4/100]	Samples: [106344/2658600]	Train Loss: 412.5765820268421	Time: 0:00:11.594547
Epoch: [5/100]	Samples: [132930/2658600]	Train Loss: 411.5521232979764	Time: 0:00:11.855850
Epoch: [6/100]	Samples: [159516/2658600]	Train Loss: 410.78718992873365	Time: 0:00:11.725569
Epoch: [7/100]	Samples: [186102/2658600]	Train Loss: 410.39319989109634	Time: 0:00:12.773767
Epoch: [8/100]	Samples: [212688/2658600]	Train Loss: 410.12753724058246	Time: 0:00:18.645275
Epoch: [9/100]	Samples: [239274/2658600]	Train Loss: 409.953929068048	Time: 0:00:11.576901
Epoch: [10/100]	Samples: [265860/2658600]	Train Loss: 409.6309241338839	Time: 0:00:11.533083
Epoch: [11/100]	Samples: [292446/2658600]	Train Loss: 409.43107781409856	Time:

[I 2025-08-14 03:44:42,684] Trial 11 finished with values: [-0.035445390846388966, 0.92] and parameters: {'num_topics': 10, 'dropout': 0.5026603148780894, 'num_neurons': 100, 'num_layers': 2, 'activation': 'softplus'}.


Epoch: [1/100]	Samples: [26586/2658600]	Train Loss: 428.40807316644566	Time: 0:00:16.486160
Epoch: [2/100]	Samples: [53172/2658600]	Train Loss: 415.3848389792207	Time: 0:00:19.890101
Epoch: [3/100]	Samples: [79758/2658600]	Train Loss: 413.11383245352346	Time: 0:00:12.611499
Epoch: [4/100]	Samples: [106344/2658600]	Train Loss: 409.98623309877615	Time: 0:00:12.749105
Epoch: [5/100]	Samples: [132930/2658600]	Train Loss: 406.61909518725787	Time: 0:00:12.899524
Epoch: [6/100]	Samples: [159516/2658600]	Train Loss: 404.7256269673771	Time: 0:00:12.495744
Epoch: [7/100]	Samples: [186102/2658600]	Train Loss: 404.0100616498698	Time: 0:00:12.778505
Epoch: [8/100]	Samples: [212688/2658600]	Train Loss: 403.62845525194183	Time: 0:00:12.597137
Epoch: [9/100]	Samples: [239274/2658600]	Train Loss: 403.07132649262064	Time: 0:00:13.306691
Epoch: [10/100]	Samples: [265860/2658600]	Train Loss: 402.6869326343047	Time: 0:00:19.417962
Epoch: [11/100]	Samples: [292446/2658600]	Train Loss: 402.174008186287	Time:

[I 2025-08-14 04:08:52,582] Trial 12 finished with values: [0.006328999413223143, 0.9695652173913043] and parameters: {'num_topics': 23, 'dropout': 0.1732542046092017, 'num_neurons': 100, 'num_layers': 2, 'activation': 'sigmoid'}.


Epoch: [1/100]	Samples: [26586/2658600]	Train Loss: 437.06148350958915	Time: 0:00:19.938168
Epoch: [2/100]	Samples: [53172/2658600]	Train Loss: 420.1491689852037	Time: 0:00:17.628829
Epoch: [3/100]	Samples: [79758/2658600]	Train Loss: 414.93849735293014	Time: 0:00:17.280214
Epoch: [4/100]	Samples: [106344/2658600]	Train Loss: 411.97413957005097	Time: 0:00:17.097738
Epoch: [5/100]	Samples: [132930/2658600]	Train Loss: 410.16850004466636	Time: 0:00:17.787802
Epoch: [6/100]	Samples: [159516/2658600]	Train Loss: 408.998496256841	Time: 0:00:17.342579
Epoch: [7/100]	Samples: [186102/2658600]	Train Loss: 408.3043732192225	Time: 0:00:17.215280
Epoch: [8/100]	Samples: [212688/2658600]	Train Loss: 407.52309331920463	Time: 0:00:24.103928
Epoch: [9/100]	Samples: [239274/2658600]	Train Loss: 407.11603421884405	Time: 0:00:17.127374
Epoch: [10/100]	Samples: [265860/2658600]	Train Loss: 406.41575181427726	Time: 0:00:17.707743
Epoch: [11/100]	Samples: [292446/2658600]	Train Loss: 406.1478184888193	Time

[I 2025-08-14 04:42:59,654] Trial 13 finished with values: [0.031043014858498412, 0.9390243902439024] and parameters: {'num_topics': 41, 'dropout': 0.27961490715381193, 'num_neurons': 200, 'num_layers': 1, 'activation': 'sigmoid'}.


Epoch: [1/100]	Samples: [26586/2658600]	Train Loss: 441.5523574290745	Time: 0:00:15.219454
Epoch: [2/100]	Samples: [53172/2658600]	Train Loss: 423.84146187725685	Time: 0:00:13.035755
Epoch: [3/100]	Samples: [79758/2658600]	Train Loss: 421.5638594957755	Time: 0:00:13.117027
Epoch: [4/100]	Samples: [106344/2658600]	Train Loss: 420.80898436030714	Time: 0:00:12.909837
Epoch: [5/100]	Samples: [132930/2658600]	Train Loss: 419.7556371451375	Time: 0:00:13.341839
Epoch: [6/100]	Samples: [159516/2658600]	Train Loss: 417.8882381778121	Time: 0:00:20.044669
Epoch: [7/100]	Samples: [186102/2658600]	Train Loss: 416.49231602599815	Time: 0:00:12.828860
Epoch: [8/100]	Samples: [212688/2658600]	Train Loss: 415.65997530714003	Time: 0:00:12.921064
Epoch: [9/100]	Samples: [239274/2658600]	Train Loss: 414.7894946177029	Time: 0:00:14.251099
Epoch: [10/100]	Samples: [265860/2658600]	Train Loss: 414.13530360492837	Time: 0:00:13.570995
Epoch: [11/100]	Samples: [292446/2658600]	Train Loss: 413.72889110163715	Time

[I 2025-08-14 05:07:36,904] Trial 14 finished with values: [-0.030735699460577456, 0.8729729729729729] and parameters: {'num_topics': 37, 'dropout': 0.4998873295356704, 'num_neurons': 100, 'num_layers': 2, 'activation': 'sigmoid'}.


Epoch: [1/100]	Samples: [26586/2658600]	Train Loss: 420.5887863031765	Time: 0:00:23.050427
Epoch: [2/100]	Samples: [53172/2658600]	Train Loss: 410.74774721364156	Time: 0:00:19.957590
Epoch: [3/100]	Samples: [79758/2658600]	Train Loss: 408.37650473492863	Time: 0:00:26.309171
Epoch: [4/100]	Samples: [106344/2658600]	Train Loss: 407.5347933357782	Time: 0:00:19.830797
Epoch: [5/100]	Samples: [132930/2658600]	Train Loss: 406.60268151000525	Time: 0:00:19.584435
Epoch: [6/100]	Samples: [159516/2658600]	Train Loss: 406.27444686436235	Time: 0:00:19.674198
Epoch: [7/100]	Samples: [186102/2658600]	Train Loss: 405.93056995458136	Time: 0:00:19.912480
Epoch: [8/100]	Samples: [212688/2658600]	Train Loss: 405.78743265516863	Time: 0:00:26.315792
Epoch: [9/100]	Samples: [239274/2658600]	Train Loss: 405.40980409831536	Time: 0:00:19.889575
Epoch: [10/100]	Samples: [265860/2658600]	Train Loss: 405.1516013185981	Time: 0:00:19.880597
Epoch: [11/100]	Samples: [292446/2658600]	Train Loss: 405.1067452309839	Tim

[I 2025-08-14 05:46:13,925] Trial 15 finished with values: [0.0016312618392387013, 0.9833333333333333] and parameters: {'num_topics': 12, 'dropout': 0.3695435414172589, 'num_neurons': 300, 'num_layers': 1, 'activation': 'sigmoid'}.


Epoch: [1/100]	Samples: [26586/2658600]	Train Loss: 421.68222580581653	Time: 0:00:11.903947
Epoch: [2/100]	Samples: [53172/2658600]	Train Loss: 410.2168683191459	Time: 0:00:10.975296
Epoch: [3/100]	Samples: [79758/2658600]	Train Loss: 406.689418449795	Time: 0:00:10.754426
Epoch: [4/100]	Samples: [106344/2658600]	Train Loss: 405.14957708004215	Time: 0:00:10.846483
Epoch: [5/100]	Samples: [132930/2658600]	Train Loss: 404.06052130938275	Time: 0:00:11.231547
Epoch: [6/100]	Samples: [159516/2658600]	Train Loss: 403.32143782794515	Time: 0:00:10.735084
Epoch: [7/100]	Samples: [186102/2658600]	Train Loss: 402.5401542547064	Time: 0:00:11.173729
Epoch: [8/100]	Samples: [212688/2658600]	Train Loss: 402.07042982267455	Time: 0:00:10.721816
Epoch: [9/100]	Samples: [239274/2658600]	Train Loss: 401.87728180480326	Time: 0:00:10.602965
Epoch: [10/100]	Samples: [265860/2658600]	Train Loss: 401.42161460537267	Time: 0:00:11.010927
Epoch: [11/100]	Samples: [292446/2658600]	Train Loss: 401.14608774061065	Tim

[I 2025-08-14 06:09:05,308] Trial 16 finished with values: [0.020866667832279653, 0.9590909090909091] and parameters: {'num_topics': 22, 'dropout': 0.13993250556253728, 'num_neurons': 50, 'num_layers': 1, 'activation': 'relu'}.


Epoch: [1/100]	Samples: [26586/2658600]	Train Loss: 432.19140588267794	Time: 0:00:24.329027
Epoch: [2/100]	Samples: [53172/2658600]	Train Loss: 417.22922742173984	Time: 0:00:20.613610
Epoch: [3/100]	Samples: [79758/2658600]	Train Loss: 413.33754739924206	Time: 0:00:20.367936
Epoch: [4/100]	Samples: [106344/2658600]	Train Loss: 411.6131586011552	Time: 0:00:20.378588
Epoch: [5/100]	Samples: [132930/2658600]	Train Loss: 410.4988810634591	Time: 0:00:20.451509
Epoch: [6/100]	Samples: [159516/2658600]	Train Loss: 409.7410350636848	Time: 0:00:26.882241
Epoch: [7/100]	Samples: [186102/2658600]	Train Loss: 409.1983689503522	Time: 0:00:20.284119
Epoch: [8/100]	Samples: [212688/2658600]	Train Loss: 408.880254395523	Time: 0:00:20.771376
Epoch: [9/100]	Samples: [239274/2658600]	Train Loss: 408.432766981447	Time: 0:00:20.173341
Epoch: [10/100]	Samples: [265860/2658600]	Train Loss: 408.30385463390036	Time: 0:00:20.246970
Epoch: [11/100]	Samples: [292446/2658600]	Train Loss: 407.9647775218277	Time: 0:

[I 2025-08-14 06:47:19,331] Trial 17 finished with values: [0.011328758028971585, 0.9171428571428571] and parameters: {'num_topics': 35, 'dropout': 0.40793422280208835, 'num_neurons': 300, 'num_layers': 1, 'activation': 'softplus'}.


Epoch: [1/100]	Samples: [26586/2658600]	Train Loss: 423.35685046876176	Time: 0:00:19.728600
Epoch: [2/100]	Samples: [53172/2658600]	Train Loss: 414.2128232581293	Time: 0:00:18.420701
Epoch: [3/100]	Samples: [79758/2658600]	Train Loss: 411.4597703708014	Time: 0:00:18.068877
Epoch: [4/100]	Samples: [106344/2658600]	Train Loss: 410.2470768141009	Time: 0:00:17.614308
Epoch: [5/100]	Samples: [132930/2658600]	Train Loss: 409.37481031487437	Time: 0:00:24.992334
Epoch: [6/100]	Samples: [159516/2658600]	Train Loss: 408.8325133602389	Time: 0:00:17.569494
Epoch: [7/100]	Samples: [186102/2658600]	Train Loss: 408.2864040431289	Time: 0:00:17.708524
Epoch: [8/100]	Samples: [212688/2658600]	Train Loss: 407.98471572796774	Time: 0:00:17.447431
Epoch: [9/100]	Samples: [239274/2658600]	Train Loss: 407.92177365583626	Time: 0:00:17.715764
Epoch: [10/100]	Samples: [265860/2658600]	Train Loss: 407.59471150229444	Time: 0:00:18.087488
Epoch: [11/100]	Samples: [292446/2658600]	Train Loss: 407.23012104291263	Time

[I 2025-08-14 07:30:50,330] Trial 18 finished with values: [-0.0056879246977228235, 0.9619047619047619] and parameters: {'num_topics': 21, 'dropout': 0.4448893746990935, 'num_neurons': 200, 'num_layers': 1, 'activation': 'relu'}.


Epoch: [1/100]	Samples: [26586/2658600]	Train Loss: 427.7054999576845	Time: 0:00:18.855267
Epoch: [2/100]	Samples: [53172/2658600]	Train Loss: 416.3579912242346	Time: 0:00:16.001401
Epoch: [3/100]	Samples: [79758/2658600]	Train Loss: 415.1818272980845	Time: 0:00:15.855278
Epoch: [4/100]	Samples: [106344/2658600]	Train Loss: 413.90716808482375	Time: 0:00:16.011158
Epoch: [5/100]	Samples: [132930/2658600]	Train Loss: 412.55599061183517	Time: 0:00:15.828273
Epoch: [6/100]	Samples: [159516/2658600]	Train Loss: 411.65619864837225	Time: 0:00:17.081808
Epoch: [7/100]	Samples: [186102/2658600]	Train Loss: 411.01781996986193	Time: 0:00:16.098004
Epoch: [8/100]	Samples: [212688/2658600]	Train Loss: 410.61935381873917	Time: 0:00:23.939842
Epoch: [9/100]	Samples: [239274/2658600]	Train Loss: 410.1668934880553	Time: 0:00:16.430411
Epoch: [10/100]	Samples: [265860/2658600]	Train Loss: 410.0585715270137	Time: 0:00:16.051772
Epoch: [11/100]	Samples: [292446/2658600]	Train Loss: 409.7865707338919	Time:

[I 2025-08-14 08:01:53,741] Trial 19 finished with values: [-0.021238737195587866, 0.8941176470588236] and parameters: {'num_topics': 17, 'dropout': 0.5038375802935597, 'num_neurons': 200, 'num_layers': 2, 'activation': 'sigmoid'}.


Epoch: [1/100]	Samples: [26586/2658600]	Train Loss: 435.2601405873862	Time: 0:00:23.708240
Epoch: [2/100]	Samples: [53172/2658600]	Train Loss: 420.9563911912637	Time: 0:00:20.975305
Epoch: [3/100]	Samples: [79758/2658600]	Train Loss: 417.24252837489655	Time: 0:00:21.168311
Epoch: [4/100]	Samples: [106344/2658600]	Train Loss: 415.3661584104416	Time: 0:00:27.427544
Epoch: [5/100]	Samples: [132930/2658600]	Train Loss: 414.5777170153582	Time: 0:00:20.567319
Epoch: [6/100]	Samples: [159516/2658600]	Train Loss: 413.7791662185337	Time: 0:00:20.432417
Epoch: [7/100]	Samples: [186102/2658600]	Train Loss: 413.3115819519084	Time: 0:00:20.268111
Epoch: [8/100]	Samples: [212688/2658600]	Train Loss: 412.909660218254	Time: 0:00:27.065842
Epoch: [9/100]	Samples: [239274/2658600]	Train Loss: 412.5656077652477	Time: 0:00:20.340288
Epoch: [10/100]	Samples: [265860/2658600]	Train Loss: 412.29999947840264	Time: 0:00:27.441452
Epoch: [11/100]	Samples: [292446/2658600]	Train Loss: 412.07004060819133	Time: 0:

[I 2025-08-14 08:41:17,853] Trial 20 finished with values: [-0.008877864617609412, 0.9027027027027027] and parameters: {'num_topics': 37, 'dropout': 0.5238555834109606, 'num_neurons': 300, 'num_layers': 1, 'activation': 'softplus'}.


Epoch: [1/100]	Samples: [26586/2658600]	Train Loss: 432.0620901420155	Time: 0:00:24.453382
Epoch: [2/100]	Samples: [53172/2658600]	Train Loss: 419.89490562467086	Time: 0:00:21.148468
Epoch: [3/100]	Samples: [79758/2658600]	Train Loss: 416.0832329809392	Time: 0:00:20.826301
Epoch: [4/100]	Samples: [106344/2658600]	Train Loss: 411.77953611187513	Time: 0:00:20.952808
Epoch: [5/100]	Samples: [132930/2658600]	Train Loss: 408.8344380177772	Time: 0:00:28.138004
Epoch: [6/100]	Samples: [159516/2658600]	Train Loss: 406.8874654790703	Time: 0:00:20.931118
Epoch: [7/100]	Samples: [186102/2658600]	Train Loss: 405.5720126135466	Time: 0:00:21.341248
Epoch: [8/100]	Samples: [212688/2658600]	Train Loss: 404.4583929129758	Time: 0:00:21.467269
Epoch: [9/100]	Samples: [239274/2658600]	Train Loss: 403.5883648378141	Time: 0:00:29.921883
Epoch: [10/100]	Samples: [265860/2658600]	Train Loss: 402.9772540205606	Time: 0:00:26.937856
Epoch: [11/100]	Samples: [292446/2658600]	Train Loss: 402.3394104201342	Time: 0:

[I 2025-08-14 09:23:35,945] Trial 21 finished with values: [0.06312547111192116, 0.9230769230769231] and parameters: {'num_topics': 39, 'dropout': 0.0867707153667109, 'num_neurons': 300, 'num_layers': 2, 'activation': 'softplus'}.


Epoch: [1/100]	Samples: [26586/2658600]	Train Loss: 433.9391776334349	Time: 0:00:15.501825
Epoch: [2/100]	Samples: [53172/2658600]	Train Loss: 419.46182087288656	Time: 0:00:14.699790
Epoch: [3/100]	Samples: [79758/2658600]	Train Loss: 415.6911228977422	Time: 0:00:14.477923
Epoch: [4/100]	Samples: [106344/2658600]	Train Loss: 413.44710789689594	Time: 0:00:14.292728
Epoch: [5/100]	Samples: [132930/2658600]	Train Loss: 411.5307711589276	Time: 0:00:14.093025
Epoch: [6/100]	Samples: [159516/2658600]	Train Loss: 410.4545347087048	Time: 0:00:27.028812
Epoch: [7/100]	Samples: [186102/2658600]	Train Loss: 409.4623997357632	Time: 0:00:17.386009
Epoch: [8/100]	Samples: [212688/2658600]	Train Loss: 408.7069299425685	Time: 0:00:13.945980
Epoch: [9/100]	Samples: [239274/2658600]	Train Loss: 408.248344883409	Time: 0:00:20.893218
Epoch: [10/100]	Samples: [265860/2658600]	Train Loss: 407.9325338421185	Time: 0:00:14.046491
Epoch: [11/100]	Samples: [292446/2658600]	Train Loss: 407.4557632908885	Time: 0:0

[I 2025-08-14 09:56:15,756] Trial 22 finished with values: [0.041256692534933206, 0.9] and parameters: {'num_topics': 49, 'dropout': 0.22067237951632693, 'num_neurons': 100, 'num_layers': 1, 'activation': 'relu'}.


Epoch: [1/100]	Samples: [26586/2658600]	Train Loss: 436.5222726481983	Time: 0:00:13.427136
Epoch: [2/100]	Samples: [53172/2658600]	Train Loss: 422.34673989159	Time: 0:00:12.487130
Epoch: [3/100]	Samples: [79758/2658600]	Train Loss: 419.12147260600034	Time: 0:00:12.482738
Epoch: [4/100]	Samples: [106344/2658600]	Train Loss: 417.35657446294573	Time: 0:00:12.208636
Epoch: [5/100]	Samples: [132930/2658600]	Train Loss: 416.0051654301136	Time: 0:00:12.103859
Epoch: [6/100]	Samples: [159516/2658600]	Train Loss: 415.0810449534647	Time: 0:00:11.524460
Epoch: [7/100]	Samples: [186102/2658600]	Train Loss: 414.2584992086413	Time: 0:00:12.004095
Epoch: [8/100]	Samples: [212688/2658600]	Train Loss: 413.6412661239703	Time: 0:00:12.021914
Epoch: [9/100]	Samples: [239274/2658600]	Train Loss: 413.1942007997043	Time: 0:00:12.416276
Epoch: [10/100]	Samples: [265860/2658600]	Train Loss: 412.96772542850323	Time: 0:00:11.875192
Epoch: [11/100]	Samples: [292446/2658600]	Train Loss: 412.66846963039757	Time: 0:

[I 2025-08-14 10:22:04,862] Trial 23 finished with values: [0.01097526794443546, 0.8295454545454546] and parameters: {'num_topics': 44, 'dropout': 0.42870312432478136, 'num_neurons': 50, 'num_layers': 1, 'activation': 'relu'}.


Epoch: [1/100]	Samples: [26586/2658600]	Train Loss: 430.2933097350932	Time: 0:00:18.770370
Epoch: [2/100]	Samples: [53172/2658600]	Train Loss: 414.64390066817356	Time: 0:00:16.439563
Epoch: [3/100]	Samples: [79758/2658600]	Train Loss: 409.72317314564054	Time: 0:00:16.098310
Epoch: [4/100]	Samples: [106344/2658600]	Train Loss: 407.8159006311475	Time: 0:00:15.979982
Epoch: [5/100]	Samples: [132930/2658600]	Train Loss: 406.43991554677507	Time: 0:00:16.189463
Epoch: [6/100]	Samples: [159516/2658600]	Train Loss: 405.6482247470473	Time: 0:00:22.497713
Epoch: [7/100]	Samples: [186102/2658600]	Train Loss: 404.9409695775032	Time: 0:00:16.114301
Epoch: [8/100]	Samples: [212688/2658600]	Train Loss: 404.57074711549313	Time: 0:00:16.003202
Epoch: [9/100]	Samples: [239274/2658600]	Train Loss: 404.1765996803416	Time: 0:00:16.011113
Epoch: [10/100]	Samples: [265860/2658600]	Train Loss: 403.84325720068836	Time: 0:00:15.816745
Epoch: [11/100]	Samples: [292446/2658600]	Train Loss: 403.57040058383643	Time

[I 2025-08-14 10:57:10,059] Trial 24 finished with values: [0.029703412510495842, 0.9464285714285714] and parameters: {'num_topics': 28, 'dropout': 0.2824070696826984, 'num_neurons': 200, 'num_layers': 1, 'activation': 'sigmoid'}.


Epoch: [1/100]	Samples: [26586/2658600]	Train Loss: 422.82315134874796	Time: 0:00:16.630475
Epoch: [2/100]	Samples: [53172/2658600]	Train Loss: 411.89731125374965	Time: 0:00:30.077381
Epoch: [3/100]	Samples: [79758/2658600]	Train Loss: 408.3941036503587	Time: 0:00:19.446467
Epoch: [4/100]	Samples: [106344/2658600]	Train Loss: 406.6305794021054	Time: 0:00:26.147842
Epoch: [5/100]	Samples: [132930/2658600]	Train Loss: 405.61030270866246	Time: 0:00:27.563568
Epoch: [6/100]	Samples: [159516/2658600]	Train Loss: 404.8860378817799	Time: 0:00:14.223277
Epoch: [7/100]	Samples: [186102/2658600]	Train Loss: 404.4237249074936	Time: 0:00:14.439469
Epoch: [8/100]	Samples: [212688/2658600]	Train Loss: 404.0600174904085	Time: 0:00:23.837353
Epoch: [9/100]	Samples: [239274/2658600]	Train Loss: 403.8162599088806	Time: 0:00:16.509766
Epoch: [10/100]	Samples: [265860/2658600]	Train Loss: 403.62386203617507	Time: 0:00:16.046315
Epoch: [11/100]	Samples: [292446/2658600]	Train Loss: 403.5430743918322	Time: 

[I 2025-08-14 11:29:32,508] Trial 25 finished with values: [0.024056229502831386, 0.9541666666666667] and parameters: {'num_topics': 24, 'dropout': 0.2560502719343839, 'num_neurons': 100, 'num_layers': 2, 'activation': 'relu'}.


Epoch: [1/100]	Samples: [26586/2658600]	Train Loss: 437.54526429264934	Time: 0:00:18.529306
Epoch: [2/100]	Samples: [53172/2658600]	Train Loss: 422.26581872566857	Time: 0:00:16.049788
Epoch: [3/100]	Samples: [79758/2658600]	Train Loss: 420.85805014269727	Time: 0:00:16.183350
Epoch: [4/100]	Samples: [106344/2658600]	Train Loss: 420.2948055440951	Time: 0:00:16.632609
Epoch: [5/100]	Samples: [132930/2658600]	Train Loss: 419.0310751546867	Time: 0:00:16.464312
Epoch: [6/100]	Samples: [159516/2658600]	Train Loss: 417.55413460679495	Time: 0:00:16.321143
Epoch: [7/100]	Samples: [186102/2658600]	Train Loss: 416.42565915213834	Time: 0:00:22.975054
Epoch: [8/100]	Samples: [212688/2658600]	Train Loss: 415.4582941400667	Time: 0:00:16.336687
Epoch: [9/100]	Samples: [239274/2658600]	Train Loss: 414.79447524807466	Time: 0:00:16.219376
Epoch: [10/100]	Samples: [265860/2658600]	Train Loss: 414.3051164954652	Time: 0:00:16.481835
Epoch: [11/100]	Samples: [292446/2658600]	Train Loss: 413.89501038493006	Tim

[I 2025-08-14 12:02:00,878] Trial 26 finished with values: [-0.03235441120360148, 0.8647058823529412] and parameters: {'num_topics': 34, 'dropout': 0.5732847471753364, 'num_neurons': 200, 'num_layers': 2, 'activation': 'softplus'}.


Epoch: [1/100]	Samples: [26586/2658600]	Train Loss: 442.27294646383433	Time: 0:00:20.493708
Epoch: [2/100]	Samples: [53172/2658600]	Train Loss: 424.2153931903774	Time: 0:00:19.307179
Epoch: [3/100]	Samples: [79758/2658600]	Train Loss: 421.7456390052565	Time: 0:00:20.044085
Epoch: [4/100]	Samples: [106344/2658600]	Train Loss: 420.84215843013004	Time: 0:00:18.884866
Epoch: [5/100]	Samples: [132930/2658600]	Train Loss: 418.61039064557	Time: 0:00:19.497582
Epoch: [6/100]	Samples: [159516/2658600]	Train Loss: 415.9290992704102	Time: 0:00:19.179740
Epoch: [7/100]	Samples: [186102/2658600]	Train Loss: 414.0862231663869	Time: 0:00:18.740645
Epoch: [8/100]	Samples: [212688/2658600]	Train Loss: 412.94568720526075	Time: 0:00:19.032511
Epoch: [9/100]	Samples: [239274/2658600]	Train Loss: 412.02167718236336	Time: 0:00:19.734024
Epoch: [10/100]	Samples: [265860/2658600]	Train Loss: 411.4533767625583	Time: 0:00:19.190702
Epoch: [11/100]	Samples: [292446/2658600]	Train Loss: 410.91125535114816	Time: 0

[I 2025-08-14 12:34:46,082] Trial 27 finished with values: [0.0020566948382613106, 0.9219512195121952] and parameters: {'num_topics': 41, 'dropout': 0.3633238032417218, 'num_neurons': 200, 'num_layers': 2, 'activation': 'sigmoid'}.


Epoch: [1/100]	Samples: [26586/2658600]	Train Loss: 439.83323731533983	Time: 0:00:19.763858
Epoch: [2/100]	Samples: [53172/2658600]	Train Loss: 422.78223155808035	Time: 0:00:17.258552
Epoch: [3/100]	Samples: [79758/2658600]	Train Loss: 417.6725393387262	Time: 0:00:17.262731
Epoch: [4/100]	Samples: [106344/2658600]	Train Loss: 415.11814128853064	Time: 0:00:16.918493
Epoch: [5/100]	Samples: [132930/2658600]	Train Loss: 413.64349488746427	Time: 0:00:23.691497
Epoch: [6/100]	Samples: [159516/2658600]	Train Loss: 412.49509132802507	Time: 0:00:17.199758
Epoch: [7/100]	Samples: [186102/2658600]	Train Loss: 411.59048913490653	Time: 0:00:17.364006
Epoch: [8/100]	Samples: [212688/2658600]	Train Loss: 410.975720414116	Time: 0:00:17.199702
Epoch: [9/100]	Samples: [239274/2658600]	Train Loss: 410.53489581864045	Time: 0:00:17.636687
Epoch: [10/100]	Samples: [265860/2658600]	Train Loss: 410.003056597281	Time: 0:00:16.999110
Epoch: [11/100]	Samples: [292446/2658600]	Train Loss: 409.67991814153834	Time

[I 2025-08-14 13:07:44,902] Trial 28 finished with values: [0.003447487817901202, 0.926829268292683] and parameters: {'num_topics': 41, 'dropout': 0.4115247588488429, 'num_neurons': 200, 'num_layers': 1, 'activation': 'sigmoid'}.


Epoch: [1/100]	Samples: [26586/2658600]	Train Loss: 437.94603204723586	Time: 0:00:12.712819
Epoch: [2/100]	Samples: [53172/2658600]	Train Loss: 418.9882232865747	Time: 0:00:11.659099
Epoch: [3/100]	Samples: [79758/2658600]	Train Loss: 413.8048371470182	Time: 0:00:11.509201
Epoch: [4/100]	Samples: [106344/2658600]	Train Loss: 411.4651030059289	Time: 0:00:11.688500
Epoch: [5/100]	Samples: [132930/2658600]	Train Loss: 410.3121104181947	Time: 0:00:11.843415
Epoch: [6/100]	Samples: [159516/2658600]	Train Loss: 409.2899843682413	Time: 0:00:15.347065
Epoch: [7/100]	Samples: [186102/2658600]	Train Loss: 408.7172836134972	Time: 0:00:15.351335
Epoch: [8/100]	Samples: [212688/2658600]	Train Loss: 408.17911976698264	Time: 0:00:11.490119
Epoch: [9/100]	Samples: [239274/2658600]	Train Loss: 407.8497599770556	Time: 0:00:11.840830
Epoch: [10/100]	Samples: [265860/2658600]	Train Loss: 407.4051212192272	Time: 0:00:11.785632
Epoch: [11/100]	Samples: [292446/2658600]	Train Loss: 407.1340976130529	Time: 0:

[I 2025-08-14 13:29:21,396] Trial 29 finished with values: [0.03254146393692767, 0.8976744186046511] and parameters: {'num_topics': 43, 'dropout': 0.20694688744564027, 'num_neurons': 50, 'num_layers': 1, 'activation': 'softplus'}.


Epoch: [1/100]	Samples: [26586/2658600]	Train Loss: 421.623958311294	Time: 0:00:13.840032
Epoch: [2/100]	Samples: [53172/2658600]	Train Loss: 412.07452572106797	Time: 0:00:11.934425
Epoch: [3/100]	Samples: [79758/2658600]	Train Loss: 409.5923208232246	Time: 0:00:11.874464
Epoch: [4/100]	Samples: [106344/2658600]	Train Loss: 407.36263615895115	Time: 0:00:11.744187
Epoch: [5/100]	Samples: [132930/2658600]	Train Loss: 406.3356933777411	Time: 0:00:11.825908
Epoch: [6/100]	Samples: [159516/2658600]	Train Loss: 405.9972945259018	Time: 0:00:11.734146
Epoch: [7/100]	Samples: [186102/2658600]	Train Loss: 405.7165630744917	Time: 0:00:12.035409
Epoch: [8/100]	Samples: [212688/2658600]	Train Loss: 405.28921192662494	Time: 0:00:12.006624
Epoch: [9/100]	Samples: [239274/2658600]	Train Loss: 405.226932503538	Time: 0:00:18.699669
Epoch: [10/100]	Samples: [265860/2658600]	Train Loss: 404.7698757981174	Time: 0:00:12.068629
Epoch: [11/100]	Samples: [292446/2658600]	Train Loss: 404.4636183635184	Time: 0:0

[I 2025-08-14 13:53:14,662] Trial 30 finished with values: [-0.006188129312372979, 0.97] and parameters: {'num_topics': 10, 'dropout': 0.28969739027489144, 'num_neurons': 100, 'num_layers': 2, 'activation': 'sigmoid'}.


Epoch: [1/100]	Samples: [26586/2658600]	Train Loss: 426.7627737357655	Time: 0:00:13.883908
Epoch: [2/100]	Samples: [53172/2658600]	Train Loss: 412.7979170927603	Time: 0:00:12.550662
Epoch: [3/100]	Samples: [79758/2658600]	Train Loss: 408.542021059016	Time: 0:00:12.591967
Epoch: [4/100]	Samples: [106344/2658600]	Train Loss: 406.84386739613893	Time: 0:00:12.456143
Epoch: [5/100]	Samples: [132930/2658600]	Train Loss: 405.950545590842	Time: 0:00:12.292850
Epoch: [6/100]	Samples: [159516/2658600]	Train Loss: 405.0273622997654	Time: 0:00:12.167626
Epoch: [7/100]	Samples: [186102/2658600]	Train Loss: 404.4233903505369	Time: 0:00:12.646197
Epoch: [8/100]	Samples: [212688/2658600]	Train Loss: 404.13212013459855	Time: 0:00:18.725563
Epoch: [9/100]	Samples: [239274/2658600]	Train Loss: 403.7604242335017	Time: 0:00:12.243565
Epoch: [10/100]	Samples: [265860/2658600]	Train Loss: 403.36615708808205	Time: 0:00:12.754879
Epoch: [11/100]	Samples: [292446/2658600]	Train Loss: 403.0824306760372	Time: 0:0

[I 2025-08-14 14:16:42,444] Trial 31 finished with values: [0.014637389142775908, 0.9739130434782609] and parameters: {'num_topics': 23, 'dropout': 0.2557590762065598, 'num_neurons': 100, 'num_layers': 1, 'activation': 'sigmoid'}.


Epoch: [1/100]	Samples: [26586/2658600]	Train Loss: 434.3478784063981	Time: 0:00:25.269254
Epoch: [2/100]	Samples: [53172/2658600]	Train Loss: 423.85509823221105	Time: 0:00:23.381662
Epoch: [3/100]	Samples: [79758/2658600]	Train Loss: 420.080710212847	Time: 0:00:29.865734
Epoch: [4/100]	Samples: [106344/2658600]	Train Loss: 417.15697281640246	Time: 0:00:22.860040
Epoch: [5/100]	Samples: [132930/2658600]	Train Loss: 415.4478870237743	Time: 0:00:23.222323
Epoch: [6/100]	Samples: [159516/2658600]	Train Loss: 414.86061867912156	Time: 0:00:30.791765
Epoch: [7/100]	Samples: [186102/2658600]	Train Loss: 414.34895752522476	Time: 0:00:22.798947
Epoch: [8/100]	Samples: [212688/2658600]	Train Loss: 413.98041445979555	Time: 0:00:29.079431
Epoch: [9/100]	Samples: [239274/2658600]	Train Loss: 413.6424914002553	Time: 0:00:22.422825
Epoch: [10/100]	Samples: [265860/2658600]	Train Loss: 413.2535511964709	Time: 0:00:28.950601
Epoch: [11/100]	Samples: [292446/2658600]	Train Loss: 413.0285517254147	Time: 

[I 2025-08-14 15:18:02,178] Trial 32 finished with values: [0.001493669073961431, 0.8795454545454545] and parameters: {'num_topics': 44, 'dropout': 0.48517351505038425, 'num_neurons': 300, 'num_layers': 2, 'activation': 'relu'}.


Epoch: [1/100]	Samples: [26586/2658600]	Train Loss: 434.7885107822256	Time: 0:00:16.222797
Epoch: [2/100]	Samples: [53172/2658600]	Train Loss: 418.27998846902506	Time: 0:00:14.811514
Epoch: [3/100]	Samples: [79758/2658600]	Train Loss: 413.1451291401607	Time: 0:00:13.965620
Epoch: [4/100]	Samples: [106344/2658600]	Train Loss: 410.75763853625966	Time: 0:00:14.364870
Epoch: [5/100]	Samples: [132930/2658600]	Train Loss: 409.09258239328267	Time: 0:00:13.983302
Epoch: [6/100]	Samples: [159516/2658600]	Train Loss: 408.02248433591694	Time: 0:00:14.035631
Epoch: [7/100]	Samples: [186102/2658600]	Train Loss: 407.24657593370335	Time: 0:00:14.091062
Epoch: [8/100]	Samples: [212688/2658600]	Train Loss: 406.3195519596193	Time: 0:00:13.902606
Epoch: [9/100]	Samples: [239274/2658600]	Train Loss: 405.74905671688106	Time: 0:00:14.221128
Epoch: [10/100]	Samples: [265860/2658600]	Train Loss: 405.3576333790576	Time: 0:00:13.894136
Epoch: [11/100]	Samples: [292446/2658600]	Train Loss: 405.2357559841176	Time

[I 2025-08-14 15:42:39,759] Trial 33 finished with values: [0.037349219599564304, 0.9416666666666667] and parameters: {'num_topics': 36, 'dropout': 0.25301250335141695, 'num_neurons': 100, 'num_layers': 1, 'activation': 'sigmoid'}.


Epoch: [1/100]	Samples: [26586/2658600]	Train Loss: 434.6297402547981	Time: 0:00:15.393408
Epoch: [2/100]	Samples: [53172/2658600]	Train Loss: 416.6899637761651	Time: 0:00:12.920394
Epoch: [3/100]	Samples: [79758/2658600]	Train Loss: 410.5331265383449	Time: 0:00:12.816753
Epoch: [4/100]	Samples: [106344/2658600]	Train Loss: 407.67744518232104	Time: 0:00:12.280762
Epoch: [5/100]	Samples: [132930/2658600]	Train Loss: 406.11453859350314	Time: 0:00:12.217550
Epoch: [6/100]	Samples: [159516/2658600]	Train Loss: 404.8775560107411	Time: 0:00:12.340791
Epoch: [7/100]	Samples: [186102/2658600]	Train Loss: 404.2637107376768	Time: 0:00:11.525836
Epoch: [8/100]	Samples: [212688/2658600]	Train Loss: 403.41594557667804	Time: 0:00:11.422423
Epoch: [9/100]	Samples: [239274/2658600]	Train Loss: 403.08593342272485	Time: 0:00:11.468013
Epoch: [10/100]	Samples: [265860/2658600]	Train Loss: 402.5762277079572	Time: 0:00:12.066791
Epoch: [11/100]	Samples: [292446/2658600]	Train Loss: 402.1647379685854	Time: 

[I 2025-08-14 16:04:15,903] Trial 34 finished with values: [0.06957557441532379, 0.8125] and parameters: {'num_topics': 40, 'dropout': 0.03433790037691362, 'num_neurons': 50, 'num_layers': 1, 'activation': 'softplus'}.


Epoch: [1/100]	Samples: [26586/2658600]	Train Loss: 424.575434696304	Time: 0:00:21.383007
Epoch: [2/100]	Samples: [53172/2658600]	Train Loss: 414.3252357546618	Time: 0:00:20.524199
Epoch: [3/100]	Samples: [79758/2658600]	Train Loss: 409.9393815706575	Time: 0:00:20.181550
Epoch: [4/100]	Samples: [106344/2658600]	Train Loss: 406.9987674140055	Time: 0:00:20.011797
Epoch: [5/100]	Samples: [132930/2658600]	Train Loss: 405.3805028154503	Time: 0:00:19.825803
Epoch: [6/100]	Samples: [159516/2658600]	Train Loss: 404.25865932433896	Time: 0:00:20.000566
Epoch: [7/100]	Samples: [186102/2658600]	Train Loss: 403.6520216085299	Time: 0:00:20.492401
Epoch: [8/100]	Samples: [212688/2658600]	Train Loss: 403.1792220661838	Time: 0:00:19.089506
Epoch: [9/100]	Samples: [239274/2658600]	Train Loss: 402.8752724427918	Time: 0:00:19.673146
Epoch: [10/100]	Samples: [265860/2658600]	Train Loss: 402.61551754359675	Time: 0:00:19.499851
Epoch: [11/100]	Samples: [292446/2658600]	Train Loss: 402.40261531121877	Time: 0:

[I 2025-08-14 16:49:36,785] Trial 35 finished with values: [0.035804896465600665, 0.9451612903225807] and parameters: {'num_topics': 31, 'dropout': 0.18908476511582786, 'num_neurons': 200, 'num_layers': 2, 'activation': 'relu'}.


Epoch: [1/100]	Samples: [26586/2658600]	Train Loss: 429.02862122275354	Time: 0:00:25.429694
Epoch: [2/100]	Samples: [53172/2658600]	Train Loss: 418.5901585450284	Time: 0:00:30.116447
Epoch: [3/100]	Samples: [79758/2658600]	Train Loss: 413.6274113225711	Time: 0:00:23.477419
Epoch: [4/100]	Samples: [106344/2658600]	Train Loss: 410.8408644646148	Time: 0:00:22.479271
Epoch: [5/100]	Samples: [132930/2658600]	Train Loss: 409.63570552875575	Time: 0:00:29.527161
Epoch: [6/100]	Samples: [159516/2658600]	Train Loss: 408.4820751964145	Time: 0:00:22.473069
Epoch: [7/100]	Samples: [186102/2658600]	Train Loss: 407.75936186456215	Time: 0:00:30.102366
Epoch: [8/100]	Samples: [212688/2658600]	Train Loss: 407.2120706886378	Time: 0:00:22.083966
Epoch: [9/100]	Samples: [239274/2658600]	Train Loss: 406.87504022176853	Time: 0:00:29.790051
Epoch: [10/100]	Samples: [265860/2658600]	Train Loss: 406.3572711260156	Time: 0:00:22.049644
Epoch: [11/100]	Samples: [292446/2658600]	Train Loss: 406.2262663282014	Time: 

[I 2025-08-14 17:49:56,873] Trial 36 finished with values: [0.01943737764346765, 0.9315789473684211] and parameters: {'num_topics': 38, 'dropout': 0.29106020570876007, 'num_neurons': 300, 'num_layers': 2, 'activation': 'relu'}.


Epoch: [1/100]	Samples: [26586/2658600]	Train Loss: 413.0036099312726	Time: 0:00:20.714647
Epoch: [2/100]	Samples: [53172/2658600]	Train Loss: 404.286014534788	Time: 0:00:19.722218
Epoch: [3/100]	Samples: [79758/2658600]	Train Loss: 401.7207208299246	Time: 0:00:18.379890
Epoch: [4/100]	Samples: [106344/2658600]	Train Loss: 400.2114027501552	Time: 0:00:19.697212
Epoch: [5/100]	Samples: [132930/2658600]	Train Loss: 399.29005919175034	Time: 0:00:19.567661
Epoch: [6/100]	Samples: [159516/2658600]	Train Loss: 398.7276149145109	Time: 0:00:19.972792
Epoch: [7/100]	Samples: [186102/2658600]	Train Loss: 398.29431616087885	Time: 0:00:18.708905
Epoch: [8/100]	Samples: [212688/2658600]	Train Loss: 397.90306788876194	Time: 0:00:19.396169
Epoch: [9/100]	Samples: [239274/2658600]	Train Loss: 397.6230881252703	Time: 0:00:19.529996
Epoch: [10/100]	Samples: [265860/2658600]	Train Loss: 397.1999800617571	Time: 0:00:17.894347
Epoch: [11/100]	Samples: [292446/2658600]	Train Loss: 397.1133260265624	Time: 0:

[I 2025-08-14 18:33:28,803] Trial 37 finished with values: [0.06570286309629972, 0.9727272727272728] and parameters: {'num_topics': 11, 'dropout': 0.03457635564856709, 'num_neurons': 200, 'num_layers': 1, 'activation': 'relu'}.


Epoch: [1/100]	Samples: [26586/2658600]	Train Loss: 422.6027830378301	Time: 0:00:25.258055
Epoch: [2/100]	Samples: [53172/2658600]	Train Loss: 412.9924826065645	Time: 0:00:21.492841
Epoch: [3/100]	Samples: [79758/2658600]	Train Loss: 410.6789391914447	Time: 0:00:20.858736
Epoch: [4/100]	Samples: [106344/2658600]	Train Loss: 409.7099144345238	Time: 0:00:21.018843
Epoch: [5/100]	Samples: [132930/2658600]	Train Loss: 409.00064850714426	Time: 0:00:20.707018
Epoch: [6/100]	Samples: [159516/2658600]	Train Loss: 408.33945211119106	Time: 0:00:21.022770
Epoch: [7/100]	Samples: [186102/2658600]	Train Loss: 408.0504711566943	Time: 0:00:20.726864
Epoch: [8/100]	Samples: [212688/2658600]	Train Loss: 407.5652859176352	Time: 0:00:21.306549
Epoch: [9/100]	Samples: [239274/2658600]	Train Loss: 407.4429959832595	Time: 0:00:21.509183
Epoch: [10/100]	Samples: [265860/2658600]	Train Loss: 407.3409252931524	Time: 0:00:21.593413
Epoch: [11/100]	Samples: [292446/2658600]	Train Loss: 407.1273977316539	Time: 0:

[I 2025-08-14 19:11:22,180] Trial 38 finished with values: [-0.0069633691403946105, 0.9769230769230769] and parameters: {'num_topics': 13, 'dropout': 0.45518437913526, 'num_neurons': 300, 'num_layers': 1, 'activation': 'sigmoid'}.


Epoch: [1/100]	Samples: [26586/2658600]	Train Loss: 415.47284982873975	Time: 0:00:26.311212
Epoch: [2/100]	Samples: [53172/2658600]	Train Loss: 408.0903299007819	Time: 0:00:24.561949
Epoch: [3/100]	Samples: [79758/2658600]	Train Loss: 405.9750928443307	Time: 0:00:23.968903
Epoch: [4/100]	Samples: [106344/2658600]	Train Loss: 404.3205218735895	Time: 0:00:24.844696
Epoch: [5/100]	Samples: [132930/2658600]	Train Loss: 402.7349133105182	Time: 0:00:24.159908
Epoch: [6/100]	Samples: [159516/2658600]	Train Loss: 401.8698459201389	Time: 0:00:24.271549
Epoch: [7/100]	Samples: [186102/2658600]	Train Loss: 401.2482085334504	Time: 0:00:23.659534
Epoch: [8/100]	Samples: [212688/2658600]	Train Loss: 400.71958113968583	Time: 0:00:24.190609
Epoch: [9/100]	Samples: [239274/2658600]	Train Loss: 400.3879271662012	Time: 0:00:23.753100
Epoch: [10/100]	Samples: [265860/2658600]	Train Loss: 400.0797857366189	Time: 0:00:23.660633
Epoch: [11/100]	Samples: [292446/2658600]	Train Loss: 399.83542104514595	Time: 0

[I 2025-08-14 20:10:18,597] Trial 39 finished with values: [0.03958860361429498, 0.9928571428571429] and parameters: {'num_topics': 14, 'dropout': 0.16603425305446737, 'num_neurons': 300, 'num_layers': 2, 'activation': 'relu'}.


Epoch: [1/100]	Samples: [26586/2658600]	Train Loss: 446.30865101551336	Time: 0:00:26.518254
Epoch: [2/100]	Samples: [53172/2658600]	Train Loss: 427.501864233055	Time: 0:00:23.430865
Epoch: [3/100]	Samples: [79758/2658600]	Train Loss: 422.26134709351254	Time: 0:00:22.692068
Epoch: [4/100]	Samples: [106344/2658600]	Train Loss: 419.59884292073366	Time: 0:00:22.354223
Epoch: [5/100]	Samples: [132930/2658600]	Train Loss: 418.14719165367814	Time: 0:00:22.255888
Epoch: [6/100]	Samples: [159516/2658600]	Train Loss: 417.1782988388508	Time: 0:00:22.415590
Epoch: [7/100]	Samples: [186102/2658600]	Train Loss: 416.31980761579166	Time: 0:00:22.462718
Epoch: [8/100]	Samples: [212688/2658600]	Train Loss: 415.4147183903182	Time: 0:00:22.236204
Epoch: [9/100]	Samples: [239274/2658600]	Train Loss: 414.90160293481534	Time: 0:00:22.610625
Epoch: [10/100]	Samples: [265860/2658600]	Train Loss: 414.5240679348976	Time: 0:00:22.160776
Epoch: [11/100]	Samples: [292446/2658600]	Train Loss: 414.2206833630717	Time:

[I 2025-08-14 20:48:43,969] Trial 40 finished with values: [-0.006879098628452791, 0.882] and parameters: {'num_topics': 50, 'dropout': 0.4874449971620841, 'num_neurons': 300, 'num_layers': 1, 'activation': 'sigmoid'}.


Epoch: [1/100]	Samples: [26586/2658600]	Train Loss: 431.15369964601905	Time: 0:00:15.405063
Epoch: [2/100]	Samples: [53172/2658600]	Train Loss: 418.54631163811547	Time: 0:00:14.143606
Epoch: [3/100]	Samples: [79758/2658600]	Train Loss: 415.18126562588157	Time: 0:00:14.107422
Epoch: [4/100]	Samples: [106344/2658600]	Train Loss: 413.30262865970343	Time: 0:00:14.119276
Epoch: [5/100]	Samples: [132930/2658600]	Train Loss: 412.57709186989865	Time: 0:00:14.108395
Epoch: [6/100]	Samples: [159516/2658600]	Train Loss: 412.0391987030298	Time: 0:00:13.987583
Epoch: [7/100]	Samples: [186102/2658600]	Train Loss: 411.53290706339806	Time: 0:00:14.191577
Epoch: [8/100]	Samples: [212688/2658600]	Train Loss: 411.30261896240035	Time: 0:00:13.769925
Epoch: [9/100]	Samples: [239274/2658600]	Train Loss: 410.93285631417854	Time: 0:00:14.801905
Epoch: [10/100]	Samples: [265860/2658600]	Train Loss: 410.8928815697759	Time: 0:00:14.159595
Epoch: [11/100]	Samples: [292446/2658600]	Train Loss: 410.6661592479359	Ti

[I 2025-08-14 21:20:41,777] Trial 41 finished with values: [0.001625821363162621, 0.8794871794871795] and parameters: {'num_topics': 39, 'dropout': 0.4148372733132932, 'num_neurons': 100, 'num_layers': 2, 'activation': 'relu'}.


Epoch: [1/100]	Samples: [26586/2658600]	Train Loss: 419.84130866721443	Time: 0:00:14.417639
Epoch: [2/100]	Samples: [53172/2658600]	Train Loss: 408.6134832404155	Time: 0:00:13.359699
Epoch: [3/100]	Samples: [79758/2658600]	Train Loss: 404.8535432476209	Time: 0:00:13.574250
Epoch: [4/100]	Samples: [106344/2658600]	Train Loss: 403.07551179720997	Time: 0:00:13.344664
Epoch: [5/100]	Samples: [132930/2658600]	Train Loss: 401.8472721045616	Time: 0:00:14.130000
Epoch: [6/100]	Samples: [159516/2658600]	Train Loss: 400.96962319806477	Time: 0:00:13.318654
Epoch: [7/100]	Samples: [186102/2658600]	Train Loss: 400.18952159383696	Time: 0:00:13.238433
Epoch: [8/100]	Samples: [212688/2658600]	Train Loss: 399.6847508513057	Time: 0:00:13.142481
Epoch: [9/100]	Samples: [239274/2658600]	Train Loss: 399.3624333236948	Time: 0:00:13.375597
Epoch: [10/100]	Samples: [265860/2658600]	Train Loss: 399.08277669344295	Time: 0:00:17.259589
Epoch: [11/100]	Samples: [292446/2658600]	Train Loss: 398.7699475728532	Time:

[I 2025-08-14 21:53:04,482] Trial 42 finished with values: [0.04600972296174738, 0.9541666666666667] and parameters: {'num_topics': 24, 'dropout': 0.055163514424525585, 'num_neurons': 100, 'num_layers': 2, 'activation': 'relu'}.


Epoch: [1/100]	Samples: [26586/2658600]	Train Loss: 427.0364686918162	Time: 0:00:23.344927
Epoch: [2/100]	Samples: [53172/2658600]	Train Loss: 415.37001363940374	Time: 0:00:20.894873
Epoch: [3/100]	Samples: [79758/2658600]	Train Loss: 411.84132321316906	Time: 0:00:20.594249
Epoch: [4/100]	Samples: [106344/2658600]	Train Loss: 409.8761966251622	Time: 0:00:20.411951
Epoch: [5/100]	Samples: [132930/2658600]	Train Loss: 408.2891679581711	Time: 0:00:19.924293
Epoch: [6/100]	Samples: [159516/2658600]	Train Loss: 407.3806401204464	Time: 0:00:20.202483
Epoch: [7/100]	Samples: [186102/2658600]	Train Loss: 406.7173503191882	Time: 0:00:20.183627
Epoch: [8/100]	Samples: [212688/2658600]	Train Loss: 406.02690450624857	Time: 0:00:19.810897
Epoch: [9/100]	Samples: [239274/2658600]	Train Loss: 405.6098099828152	Time: 0:00:20.089123
Epoch: [10/100]	Samples: [265860/2658600]	Train Loss: 405.29137442521437	Time: 0:00:20.104087
Epoch: [11/100]	Samples: [292446/2658600]	Train Loss: 405.14669694429165	Time:

[I 2025-08-14 22:38:14,846] Trial 43 finished with values: [0.03137382277301838, 0.9333333333333333] and parameters: {'num_topics': 36, 'dropout': 0.2789086532949821, 'num_neurons': 200, 'num_layers': 1, 'activation': 'relu'}.


Epoch: [1/100]	Samples: [26586/2658600]	Train Loss: 426.19904081918116	Time: 0:00:23.429932
Epoch: [2/100]	Samples: [53172/2658600]	Train Loss: 412.61738864557475	Time: 0:00:22.431431
Epoch: [3/100]	Samples: [79758/2658600]	Train Loss: 408.60326830703895	Time: 0:00:22.290791
Epoch: [4/100]	Samples: [106344/2658600]	Train Loss: 406.9976744471362	Time: 0:00:21.974514
Epoch: [5/100]	Samples: [132930/2658600]	Train Loss: 405.9218002499553	Time: 0:00:22.483143
Epoch: [6/100]	Samples: [159516/2658600]	Train Loss: 405.24101350187834	Time: 0:00:22.432049
Epoch: [7/100]	Samples: [186102/2658600]	Train Loss: 404.7271502520711	Time: 0:00:22.324316
Epoch: [8/100]	Samples: [212688/2658600]	Train Loss: 404.3138760252694	Time: 0:00:22.256088
Epoch: [9/100]	Samples: [239274/2658600]	Train Loss: 403.7752245954168	Time: 0:00:22.687158
Epoch: [10/100]	Samples: [265860/2658600]	Train Loss: 403.6631147722838	Time: 0:00:21.883325
Epoch: [11/100]	Samples: [292446/2658600]	Train Loss: 403.3510298829741	Time: 

[I 2025-08-14 23:17:02,873] Trial 44 finished with values: [0.025556015483082305, 0.9629629629629629] and parameters: {'num_topics': 27, 'dropout': 0.28794102864508697, 'num_neurons': 300, 'num_layers': 1, 'activation': 'softplus'}.


Epoch: [1/100]	Samples: [26586/2658600]	Train Loss: 432.06360545582027	Time: 0:00:15.145349
Epoch: [2/100]	Samples: [53172/2658600]	Train Loss: 419.0899762430767	Time: 0:00:13.350322
Epoch: [3/100]	Samples: [79758/2658600]	Train Loss: 417.8855930180005	Time: 0:00:13.672976
Epoch: [4/100]	Samples: [106344/2658600]	Train Loss: 416.8136889481306	Time: 0:00:13.367157
Epoch: [5/100]	Samples: [132930/2658600]	Train Loss: 414.74957067394496	Time: 0:00:13.407197
Epoch: [6/100]	Samples: [159516/2658600]	Train Loss: 413.3945298909083	Time: 0:00:13.775104
Epoch: [7/100]	Samples: [186102/2658600]	Train Loss: 412.4867452559618	Time: 0:00:13.334641
Epoch: [8/100]	Samples: [212688/2658600]	Train Loss: 411.78316690704224	Time: 0:00:13.501343
Epoch: [9/100]	Samples: [239274/2658600]	Train Loss: 411.1246759851872	Time: 0:00:13.374298
Epoch: [10/100]	Samples: [265860/2658600]	Train Loss: 410.82700025977016	Time: 0:00:13.583158
Epoch: [11/100]	Samples: [292446/2658600]	Train Loss: 410.4543637937378	Time: 

[I 2025-08-14 23:41:25,436] Trial 45 finished with values: [-0.012669106096583975, 0.8692307692307693] and parameters: {'num_topics': 26, 'dropout': 0.5018796111645731, 'num_neurons': 100, 'num_layers': 2, 'activation': 'softplus'}.


Epoch: [1/100]	Samples: [26586/2658600]	Train Loss: 419.4412927842074	Time: 0:00:25.983834
Epoch: [2/100]	Samples: [53172/2658600]	Train Loss: 413.0467860713228	Time: 0:00:23.451568
Epoch: [3/100]	Samples: [79758/2658600]	Train Loss: 411.32487346488756	Time: 0:00:21.912613
Epoch: [4/100]	Samples: [106344/2658600]	Train Loss: 410.0354464344486	Time: 0:00:21.898844
Epoch: [5/100]	Samples: [132930/2658600]	Train Loss: 409.02599160066626	Time: 0:00:21.361639
Epoch: [6/100]	Samples: [159516/2658600]	Train Loss: 408.5860183843235	Time: 0:00:22.640198
Epoch: [7/100]	Samples: [186102/2658600]	Train Loss: 408.1988438610782	Time: 0:00:21.037640
Epoch: [8/100]	Samples: [212688/2658600]	Train Loss: 407.83094989050863	Time: 0:00:21.124682
Epoch: [9/100]	Samples: [239274/2658600]	Train Loss: 407.6624172643732	Time: 0:00:20.792585
Epoch: [10/100]	Samples: [265860/2658600]	Train Loss: 407.6960659510645	Time: 0:00:21.084926
Epoch: [11/100]	Samples: [292446/2658600]	Train Loss: 407.34225709284254	Time: 

[I 2025-08-15 00:36:11,387] Trial 46 finished with values: [-0.022292517757675455, 0.95] and parameters: {'num_topics': 10, 'dropout': 0.45950325894064187, 'num_neurons': 300, 'num_layers': 2, 'activation': 'relu'}.


Epoch: [1/100]	Samples: [26586/2658600]	Train Loss: 433.93415421004005	Time: 0:00:23.257915
Epoch: [2/100]	Samples: [53172/2658600]	Train Loss: 420.7973713256037	Time: 0:00:20.530296
Epoch: [3/100]	Samples: [79758/2658600]	Train Loss: 415.65737426271113	Time: 0:00:20.503391
Epoch: [4/100]	Samples: [106344/2658600]	Train Loss: 411.41860921279056	Time: 0:00:20.357070
Epoch: [5/100]	Samples: [132930/2658600]	Train Loss: 409.2295074680988	Time: 0:00:20.545912
Epoch: [6/100]	Samples: [159516/2658600]	Train Loss: 407.9135706557258	Time: 0:00:27.111398
Epoch: [7/100]	Samples: [186102/2658600]	Train Loss: 406.71632872300034	Time: 0:00:20.383828
Epoch: [8/100]	Samples: [212688/2658600]	Train Loss: 405.773088984804	Time: 0:00:20.733210
Epoch: [9/100]	Samples: [239274/2658600]	Train Loss: 404.82798056895547	Time: 0:00:20.598747
Epoch: [10/100]	Samples: [265860/2658600]	Train Loss: 404.1935006470746	Time: 0:00:20.612653
Epoch: [11/100]	Samples: [292446/2658600]	Train Loss: 403.6296199935469	Time: 

[I 2025-08-15 01:14:44,584] Trial 47 finished with values: [0.049236189061073056, 0.926829268292683] and parameters: {'num_topics': 41, 'dropout': 0.10669488347501321, 'num_neurons': 300, 'num_layers': 2, 'activation': 'softplus'}.


Epoch: [1/100]	Samples: [26586/2658600]	Train Loss: 428.86398595742355	Time: 0:00:15.323080
Epoch: [2/100]	Samples: [53172/2658600]	Train Loss: 416.15079493642094	Time: 0:00:14.055252
Epoch: [3/100]	Samples: [79758/2658600]	Train Loss: 412.21777751477515	Time: 0:00:13.896217
Epoch: [4/100]	Samples: [106344/2658600]	Train Loss: 410.3147031978767	Time: 0:00:14.059058
Epoch: [5/100]	Samples: [132930/2658600]	Train Loss: 409.14679439484127	Time: 0:00:14.031136
Epoch: [6/100]	Samples: [159516/2658600]	Train Loss: 408.5514023549166	Time: 0:00:13.927852
Epoch: [7/100]	Samples: [186102/2658600]	Train Loss: 408.0300839360354	Time: 0:00:14.302967
Epoch: [8/100]	Samples: [212688/2658600]	Train Loss: 407.63350251454006	Time: 0:00:13.895020
Epoch: [9/100]	Samples: [239274/2658600]	Train Loss: 407.32589307753614	Time: 0:00:14.059989
Epoch: [10/100]	Samples: [265860/2658600]	Train Loss: 406.98908506092255	Time: 0:00:13.991269
Epoch: [11/100]	Samples: [292446/2658600]	Train Loss: 406.7766473073234	Tim

[I 2025-08-15 01:45:57,669] Trial 48 finished with values: [0.028531295707150617, 0.94] and parameters: {'num_topics': 35, 'dropout': 0.2890307828033601, 'num_neurons': 100, 'num_layers': 2, 'activation': 'relu'}.


Epoch: [1/100]	Samples: [26586/2658600]	Train Loss: 439.27148580755613	Time: 0:00:23.348715
Epoch: [2/100]	Samples: [53172/2658600]	Train Loss: 422.55804996638267	Time: 0:00:20.757790
Epoch: [3/100]	Samples: [79758/2658600]	Train Loss: 417.0997653399581	Time: 0:00:20.569704
Epoch: [4/100]	Samples: [106344/2658600]	Train Loss: 414.5152610572764	Time: 0:00:19.985652
Epoch: [5/100]	Samples: [132930/2658600]	Train Loss: 413.02028337870496	Time: 0:00:20.143417
Epoch: [6/100]	Samples: [159516/2658600]	Train Loss: 411.68399269851847	Time: 0:00:26.785909
Epoch: [7/100]	Samples: [186102/2658600]	Train Loss: 410.820102501963	Time: 0:00:20.331504
Epoch: [8/100]	Samples: [212688/2658600]	Train Loss: 410.1682572080348	Time: 0:00:20.044223
Epoch: [9/100]	Samples: [239274/2658600]	Train Loss: 409.4866017432225	Time: 0:00:20.087074
Epoch: [10/100]	Samples: [265860/2658600]	Train Loss: 409.06440507927107	Time: 0:00:19.948703
Epoch: [11/100]	Samples: [292446/2658600]	Train Loss: 408.8392454190528	Time: 

[I 2025-08-15 02:23:40,577] Trial 49 finished with values: [0.026318954114752924, 0.9166666666666666] and parameters: {'num_topics': 42, 'dropout': 0.377587470715686, 'num_neurons': 300, 'num_layers': 1, 'activation': 'sigmoid'}.


In [8]:
def train_final_ProdLDA_model(params):
    """Train final LDA model with selected parameters"""
    print(f"\nTraining final model with parameters: {params}")

    model = ProdLDA(
        num_topics=params["num_topics"],
        dropout=params["dropout"],
        num_neurons=params["num_neurons"],
        num_layers=params["num_layers"],
        activation=params["activation"],
        use_partitions=False
    )

    output = model.train_model(dataset)

    # Calculate final metrics
    coherence_metrics = Coherence(texts=dataset.get_corpus(), #list of our documents
                    measure='c_npmi')
    coherence = coherence_metrics.score(output)

    diverisity_metric = TopicDiversity(topk=10) # Initialize metric
    diversity = diverisity_metric.score(output)

    print(f"Final model metrics:")
    print(f"  Coherence: {coherence:.4f}")
    print(f"  Diversity: {diversity:.4f}")

    return model, output, coherence, diversity

In [7]:
ProdLDA_study = optuna.load_study(
    storage=f"sqlite:///{optuna_folder}ProdLDA_Study.db",
    study_name="ProdLDA_Study"
)

In [9]:
if ProdLDA_study.best_trials:
    # Get balanced solution
    pareto_trials = ProdLDA_study.best_trials

    # Pick the first Pareto optimal solution
    selected_params = pareto_trials[0].params
    final_model, final_output, final_coherence, final_diversity = train_final_ProdLDA_model(selected_params)

    print(f"\nFinal model trained successfully!")

    pickle.dump(final_output, open(optuna_folder + "Optuna_ProdLDA_output.pkl", "wb"))


Training final model with parameters: {'num_topics': 18, 'dropout': 0.1373903093404273, 'num_neurons': 300, 'num_layers': 1, 'activation': 'sigmoid'}
Epoch: [1/100]	Samples: [5843/584300]	Train Loss: 2246.3884215086428	Time: 0:00:06.148821
Epoch: [2/100]	Samples: [11686/584300]	Train Loss: 2196.185287817688	Time: 0:00:04.758910
Epoch: [3/100]	Samples: [17529/584300]	Train Loss: 2170.5700049739003	Time: 0:00:04.917258
Epoch: [4/100]	Samples: [23372/584300]	Train Loss: 2154.904042497433	Time: 0:00:04.796963
Epoch: [5/100]	Samples: [29215/584300]	Train Loss: 2144.8061088054083	Time: 0:00:04.559612
Epoch: [6/100]	Samples: [35058/584300]	Train Loss: 2136.6208911838953	Time: 0:00:05.026892
Epoch: [7/100]	Samples: [40901/584300]	Train Loss: 2131.2892764311996	Time: 0:00:04.862225
Epoch: [8/100]	Samples: [46744/584300]	Train Loss: 2126.4075543117833	Time: 0:00:04.804730
Epoch: [9/100]	Samples: [52587/584300]	Train Loss: 2124.5415561355467	Time: 0:00:04.724329
Epoch: [10/100]	Samples: [58430/5

## ETM

### OCTIS ETM optimization

In [None]:
# Initialize metric
eval_metric = CoherenceDiversityCombination(dataset) # Initialize metric

# Initialize odel
model = ETM(
    use_partitions=False,
    device='cuda',
    embeddings_path='./data/chilit-19th-century-averaged-embeddings.txt',
)

# Define the search space.
search_space = {
    "num_topics": Integer(10,50),
    "dropout" : Real(low=0, high=0.60),
    "t_hidden_size" : Categorical([50, 100, 200, 300]),
    "activation" : Categorical(["softplus", "relu", "sigmoid"])
}

# Initialize an optimizer object and start the optimization.
optimizer=Optimizer()
optResult=optimizer.optimize(
    model, dataset,
    eval_metric,
    search_space,
    save_name='OCTIS_ETM',
    save_path=octis_folder,
    number_of_call=50, # number of optimization iterations
    model_runs=5  # number of runs of the topic model
)

# Save the results of th optimization in a csv file
optResult.save_to_csv("OCTIS_ETM.csv")

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Epoch----->13 .. LR: 0.005 .. KL_theta: 0.0 .. Rec_loss: 1726.29 .. NELBO: 1726.29
****************************************************************************************************
Epoch: 14 .. batch: 20/46 .. LR: 0.005 .. KL_theta: 0.0 .. Rec_loss: 1714.74 .. NELBO: 1714.74
Epoch: 14 .. batch: 40/46 .. LR: 0.005 .. KL_theta: 0.0 .. Rec_loss: 1723.1 .. NELBO: 1723.1
****************************************************************************************************
Epoch----->14 .. LR: 0.005 .. KL_theta: 0.0 .. Rec_loss: 1726.23 .. NELBO: 1726.23
****************************************************************************************************
Epoch: 15 .. batch: 20/46 .. LR: 0.005 .. KL_theta: 0.0 .. Rec_loss: 1714.52 .. NELBO: 1714.52
Epoch: 15 .. batch: 40/46 .. LR: 0.005 .. KL_theta: 0.0 .. Rec_loss: 1722.99 .. NELBO: 1722.99
****************************************************************************************

### Train ETM model with best parameters

In [105]:
results = json.load(open(f"{octis_folder}/OCTIS_ETM.json",'r'))
best_iter = results['f_val'].index(max(results['f_val']))
model = ETM(
    num_topics=results['x_iters']['num_topics'][best_iter],
    dropout=results['x_iters']['dropout'][best_iter],
    t_hidden_size=results['x_iters']['t_hidden_size'][best_iter],
    activation=results['x_iters']['activation'][best_iter],
    device = 'cuda',
    embeddings_path='./data/chilit-19th-century-averaged-embeddings.txt',
    use_partitions=False
)

output = model.train_model(dataset)
pickle.dump(output, open(octis_folder + "OCTIS_ETM_Output.pkl", "wb"))



model: ETM(
  (t_drop): Dropout(p=0.5920383989283502, inplace=False)
  (theta_act): ReLU()
  (rho): Linear(in_features=300, out_features=23743, bias=False)
  (alphas): Linear(in_features=300, out_features=10, bias=False)
  (q_theta): Sequential(
    (0): Linear(in_features=23743, out_features=300, bias=True)
    (1): ReLU()
    (2): Linear(in_features=300, out_features=300, bias=True)
    (3): ReLU()
  )
  (mu_q_theta): Linear(in_features=300, out_features=10, bias=True)
  (logsigma_q_theta): Linear(in_features=300, out_features=10, bias=True)
)
Epoch: 1 .. batch: 20/46 .. LR: 0.005 .. KL_theta: 0.34 .. Rec_loss: 1991.97 .. NELBO: 1992.31
Epoch: 1 .. batch: 40/46 .. LR: 0.005 .. KL_theta: 0.5 .. Rec_loss: 1878.18 .. NELBO: 1878.68
****************************************************************************************************
Epoch----->1 .. LR: 0.005 .. KL_theta: 0.54 .. Rec_loss: 1866.79 .. NELBO: 1867.33
***************************************************************************

### Optuna ETM multi-objective optimization

In [None]:
optuna.delete_study(study_name="ETM_Study", storage=f"sqlite:///{optuna_folder}ETM_Study.db")

In [None]:
def objectiveETM(trial) -> Tuple[float, float]:

    # Define hyperparameters to optimize
    num_topics = trial.suggest_int("num_topics", 10, 50)
    dropout = trial.suggest_float("dropout", 0, 0.60)
    t_hidden_size = trial.suggest_categorical("t_hidden_size", [50, 100, 200, 300])
    activation = trial.suggest_categorical("activation", ["softplus", "relu", "sigmoid"])

    # Train ETM model
    model = ETM(
        num_topics=num_topics,
        dropout = dropout,
        t_hidden_size=t_hidden_size,
        activation=activation,
        device = 'cuda',
        embeddings_path='./data/chilit-19th-century-averaged-embeddings.txt',
        use_partitions=False,
        num_epochs = 50
    )

    output = model.train_model(dataset)

    # Compute coherence score (can also use perplexity, but coherence is often better)
    coherence_metrics = Coherence(texts=dataset.get_corpus(), #list of our documents
                    measure='c_npmi')
    coherence = coherence_metrics.score(output)

    diverisity_metric = TopicDiversity(topk=10) # Initialize metric
    diversity = diverisity_metric.score(output)

    return coherence, diversity  # Optuna will maximize these

# Run optimization
study = optuna.create_study(
    directions=["maximize","maximize"],
    storage=f"sqlite:///{octis_folder}ETM_Study.db",
    study_name="ETM_Study"
  )
study.optimize(objectiveETM, n_trials=50)

[I 2025-08-18 16:32:42,639] A new study created in RDB with name: ETM_Study


model: ETM(
  (t_drop): Dropout(p=0.5017876433740717, inplace=False)
  (theta_act): ReLU()
  (rho): Linear(in_features=300, out_features=23743, bias=False)
  (alphas): Linear(in_features=300, out_features=26, bias=False)
  (q_theta): Sequential(
    (0): Linear(in_features=23743, out_features=100, bias=True)
    (1): ReLU()
    (2): Linear(in_features=100, out_features=100, bias=True)
    (3): ReLU()
  )
  (mu_q_theta): Linear(in_features=100, out_features=26, bias=True)
  (logsigma_q_theta): Linear(in_features=100, out_features=26, bias=True)
)
Epoch: 1 .. batch: 20/46 .. LR: 0.005 .. KL_theta: 0.31 .. Rec_loss: 2001.76 .. NELBO: 2002.07
Epoch: 1 .. batch: 40/46 .. LR: 0.005 .. KL_theta: 0.41 .. Rec_loss: 1883.95 .. NELBO: 1884.36
****************************************************************************************************
Epoch----->1 .. LR: 0.005 .. KL_theta: 0.42 .. Rec_loss: 1871.89 .. NELBO: 1872.31
**************************************************************************

[I 2025-08-18 16:34:00,834] Trial 0 finished with values: [-0.00842551799202895, 0.046153846153846156] and parameters: {'num_topics': 26, 'dropout': 0.5017876433740717, 't_hidden_size': 100, 'activation': 'relu'}.


model: ETM(
  (t_drop): Dropout(p=0.23344863649595227, inplace=False)
  (theta_act): Softplus(beta=1.0, threshold=20.0)
  (rho): Linear(in_features=300, out_features=23743, bias=False)
  (alphas): Linear(in_features=300, out_features=21, bias=False)
  (q_theta): Sequential(
    (0): Linear(in_features=23743, out_features=300, bias=True)
    (1): Softplus(beta=1.0, threshold=20.0)
    (2): Linear(in_features=300, out_features=300, bias=True)
    (3): Softplus(beta=1.0, threshold=20.0)
  )
  (mu_q_theta): Linear(in_features=300, out_features=21, bias=True)
  (logsigma_q_theta): Linear(in_features=300, out_features=21, bias=True)
)
Epoch: 1 .. batch: 20/46 .. LR: 0.005 .. KL_theta: 1.76 .. Rec_loss: 2015.32 .. NELBO: 2017.08
Epoch: 1 .. batch: 40/46 .. LR: 0.005 .. KL_theta: 0.93 .. Rec_loss: 1891.9 .. NELBO: 1892.83
****************************************************************************************************
Epoch----->1 .. LR: 0.005 .. KL_theta: 0.83 .. Rec_loss: 1879.11 .. NELBO

[I 2025-08-18 16:35:15,249] Trial 1 finished with values: [-0.00816951656097918, 0.05238095238095238] and parameters: {'num_topics': 21, 'dropout': 0.23344863649595227, 't_hidden_size': 300, 'activation': 'softplus'}.


model: ETM(
  (t_drop): Dropout(p=0.37822912279481546, inplace=False)
  (theta_act): Sigmoid()
  (rho): Linear(in_features=300, out_features=23743, bias=False)
  (alphas): Linear(in_features=300, out_features=34, bias=False)
  (q_theta): Sequential(
    (0): Linear(in_features=23743, out_features=300, bias=True)
    (1): Sigmoid()
    (2): Linear(in_features=300, out_features=300, bias=True)
    (3): Sigmoid()
  )
  (mu_q_theta): Linear(in_features=300, out_features=34, bias=True)
  (logsigma_q_theta): Linear(in_features=300, out_features=34, bias=True)
)
Epoch: 1 .. batch: 20/46 .. LR: 0.005 .. KL_theta: 0.98 .. Rec_loss: 2006.28 .. NELBO: 2007.26
Epoch: 1 .. batch: 40/46 .. LR: 0.005 .. KL_theta: 0.55 .. Rec_loss: 1886.94 .. NELBO: 1887.49
****************************************************************************************************
Epoch----->1 .. LR: 0.005 .. KL_theta: 0.49 .. Rec_loss: 1874.65 .. NELBO: 1875.14
****************************************************************

[I 2025-08-18 16:36:30,867] Trial 2 finished with values: [-0.008843022643195624, 0.03235294117647059] and parameters: {'num_topics': 34, 'dropout': 0.37822912279481546, 't_hidden_size': 300, 'activation': 'sigmoid'}.


model: ETM(
  (t_drop): Dropout(p=0.39586080414106467, inplace=False)
  (theta_act): Softplus(beta=1.0, threshold=20.0)
  (rho): Linear(in_features=300, out_features=23743, bias=False)
  (alphas): Linear(in_features=300, out_features=31, bias=False)
  (q_theta): Sequential(
    (0): Linear(in_features=23743, out_features=200, bias=True)
    (1): Softplus(beta=1.0, threshold=20.0)
    (2): Linear(in_features=200, out_features=200, bias=True)
    (3): Softplus(beta=1.0, threshold=20.0)
  )
  (mu_q_theta): Linear(in_features=200, out_features=31, bias=True)
  (logsigma_q_theta): Linear(in_features=200, out_features=31, bias=True)
)
Epoch: 1 .. batch: 20/46 .. LR: 0.005 .. KL_theta: 1.16 .. Rec_loss: 2006.36 .. NELBO: 2007.52
Epoch: 1 .. batch: 40/46 .. LR: 0.005 .. KL_theta: 0.66 .. Rec_loss: 1887.0 .. NELBO: 1887.66
****************************************************************************************************
Epoch----->1 .. LR: 0.005 .. KL_theta: 0.59 .. Rec_loss: 1874.72 .. NELBO

[I 2025-08-18 16:37:47,480] Trial 3 finished with values: [-0.008319798632324162, 0.035483870967741936] and parameters: {'num_topics': 31, 'dropout': 0.39586080414106467, 't_hidden_size': 200, 'activation': 'softplus'}.


model: ETM(
  (t_drop): Dropout(p=0.4078696512690512, inplace=False)
  (theta_act): ReLU()
  (rho): Linear(in_features=300, out_features=23743, bias=False)
  (alphas): Linear(in_features=300, out_features=50, bias=False)
  (q_theta): Sequential(
    (0): Linear(in_features=23743, out_features=300, bias=True)
    (1): ReLU()
    (2): Linear(in_features=300, out_features=300, bias=True)
    (3): ReLU()
  )
  (mu_q_theta): Linear(in_features=300, out_features=50, bias=True)
  (logsigma_q_theta): Linear(in_features=300, out_features=50, bias=True)
)
Epoch: 1 .. batch: 20/46 .. LR: 0.005 .. KL_theta: 0.29 .. Rec_loss: 2008.51 .. NELBO: 2008.8
Epoch: 1 .. batch: 40/46 .. LR: 0.005 .. KL_theta: 0.27 .. Rec_loss: 1887.96 .. NELBO: 1888.23
****************************************************************************************************
Epoch----->1 .. LR: 0.005 .. KL_theta: 0.27 .. Rec_loss: 1875.49 .. NELBO: 1875.76
***************************************************************************

[I 2025-08-18 16:39:12,344] Trial 4 finished with values: [0.0045602776711707645, 0.134] and parameters: {'num_topics': 50, 'dropout': 0.4078696512690512, 't_hidden_size': 300, 'activation': 'relu'}.


model: ETM(
  (t_drop): Dropout(p=0.4640083858922023, inplace=False)
  (theta_act): Softplus(beta=1.0, threshold=20.0)
  (rho): Linear(in_features=300, out_features=23743, bias=False)
  (alphas): Linear(in_features=300, out_features=22, bias=False)
  (q_theta): Sequential(
    (0): Linear(in_features=23743, out_features=100, bias=True)
    (1): Softplus(beta=1.0, threshold=20.0)
    (2): Linear(in_features=100, out_features=100, bias=True)
    (3): Softplus(beta=1.0, threshold=20.0)
  )
  (mu_q_theta): Linear(in_features=100, out_features=22, bias=True)
  (logsigma_q_theta): Linear(in_features=100, out_features=22, bias=True)
)
Epoch: 1 .. batch: 20/46 .. LR: 0.005 .. KL_theta: 0.95 .. Rec_loss: 1999.37 .. NELBO: 2000.32
Epoch: 1 .. batch: 40/46 .. LR: 0.005 .. KL_theta: 0.57 .. Rec_loss: 1883.03 .. NELBO: 1883.6
****************************************************************************************************
Epoch----->1 .. LR: 0.005 .. KL_theta: 0.51 .. Rec_loss: 1871.14 .. NELBO:

[I 2025-08-18 16:40:22,109] Trial 5 finished with values: [-0.007972072099875022, 0.045454545454545456] and parameters: {'num_topics': 22, 'dropout': 0.4640083858922023, 't_hidden_size': 100, 'activation': 'softplus'}.


model: ETM(
  (t_drop): Dropout(p=0.48463792990665217, inplace=False)
  (theta_act): ReLU()
  (rho): Linear(in_features=300, out_features=23743, bias=False)
  (alphas): Linear(in_features=300, out_features=18, bias=False)
  (q_theta): Sequential(
    (0): Linear(in_features=23743, out_features=200, bias=True)
    (1): ReLU()
    (2): Linear(in_features=200, out_features=200, bias=True)
    (3): ReLU()
  )
  (mu_q_theta): Linear(in_features=200, out_features=18, bias=True)
  (logsigma_q_theta): Linear(in_features=200, out_features=18, bias=True)
)
Epoch: 1 .. batch: 20/46 .. LR: 0.005 .. KL_theta: 0.65 .. Rec_loss: 1996.26 .. NELBO: 1996.91
Epoch: 1 .. batch: 40/46 .. LR: 0.005 .. KL_theta: 0.57 .. Rec_loss: 1880.68 .. NELBO: 1881.25
****************************************************************************************************
Epoch----->1 .. LR: 0.005 .. KL_theta: 0.55 .. Rec_loss: 1868.94 .. NELBO: 1869.49
*************************************************************************

[I 2025-08-18 16:41:35,803] Trial 6 finished with values: [-0.006370235112611755, 0.17222222222222222] and parameters: {'num_topics': 18, 'dropout': 0.48463792990665217, 't_hidden_size': 200, 'activation': 'relu'}.


model: ETM(
  (t_drop): Dropout(p=0.3755583179318846, inplace=False)
  (theta_act): Softplus(beta=1.0, threshold=20.0)
  (rho): Linear(in_features=300, out_features=23743, bias=False)
  (alphas): Linear(in_features=300, out_features=22, bias=False)
  (q_theta): Sequential(
    (0): Linear(in_features=23743, out_features=300, bias=True)
    (1): Softplus(beta=1.0, threshold=20.0)
    (2): Linear(in_features=300, out_features=300, bias=True)
    (3): Softplus(beta=1.0, threshold=20.0)
  )
  (mu_q_theta): Linear(in_features=300, out_features=22, bias=True)
  (logsigma_q_theta): Linear(in_features=300, out_features=22, bias=True)
)
Epoch: 1 .. batch: 20/46 .. LR: 0.005 .. KL_theta: 1.75 .. Rec_loss: 2009.72 .. NELBO: 2011.47
Epoch: 1 .. batch: 40/46 .. LR: 0.005 .. KL_theta: 0.91 .. Rec_loss: 1888.86 .. NELBO: 1889.77
****************************************************************************************************
Epoch----->1 .. LR: 0.005 .. KL_theta: 0.81 .. Rec_loss: 1876.4 .. NELBO:

[I 2025-08-18 16:42:48,239] Trial 7 finished with values: [-0.00827759561610265, 0.05] and parameters: {'num_topics': 22, 'dropout': 0.3755583179318846, 't_hidden_size': 300, 'activation': 'softplus'}.


model: ETM(
  (t_drop): Dropout(p=0.4173045599935797, inplace=False)
  (theta_act): Softplus(beta=1.0, threshold=20.0)
  (rho): Linear(in_features=300, out_features=23743, bias=False)
  (alphas): Linear(in_features=300, out_features=47, bias=False)
  (q_theta): Sequential(
    (0): Linear(in_features=23743, out_features=100, bias=True)
    (1): Softplus(beta=1.0, threshold=20.0)
    (2): Linear(in_features=100, out_features=100, bias=True)
    (3): Softplus(beta=1.0, threshold=20.0)
  )
  (mu_q_theta): Linear(in_features=100, out_features=47, bias=True)
  (logsigma_q_theta): Linear(in_features=100, out_features=47, bias=True)
)
Epoch: 1 .. batch: 20/46 .. LR: 0.005 .. KL_theta: 1.33 .. Rec_loss: 2004.64 .. NELBO: 2005.97
Epoch: 1 .. batch: 40/46 .. LR: 0.005 .. KL_theta: 0.78 .. Rec_loss: 1886.01 .. NELBO: 1886.79
****************************************************************************************************
Epoch----->1 .. LR: 0.005 .. KL_theta: 0.7 .. Rec_loss: 1873.82 .. NELBO:

[I 2025-08-18 16:44:01,900] Trial 8 finished with values: [-0.008356155852233012, 0.023404255319148935] and parameters: {'num_topics': 47, 'dropout': 0.4173045599935797, 't_hidden_size': 100, 'activation': 'softplus'}.


model: ETM(
  (t_drop): Dropout(p=0.18875913947455583, inplace=False)
  (theta_act): ReLU()
  (rho): Linear(in_features=300, out_features=23743, bias=False)
  (alphas): Linear(in_features=300, out_features=45, bias=False)
  (q_theta): Sequential(
    (0): Linear(in_features=23743, out_features=100, bias=True)
    (1): ReLU()
    (2): Linear(in_features=100, out_features=100, bias=True)
    (3): ReLU()
  )
  (mu_q_theta): Linear(in_features=100, out_features=45, bias=True)
  (logsigma_q_theta): Linear(in_features=100, out_features=45, bias=True)
)
Epoch: 1 .. batch: 20/46 .. LR: 0.005 .. KL_theta: 0.31 .. Rec_loss: 2010.73 .. NELBO: 2011.04
Epoch: 1 .. batch: 40/46 .. LR: 0.005 .. KL_theta: 0.37 .. Rec_loss: 1888.94 .. NELBO: 1889.31
****************************************************************************************************
Epoch----->1 .. LR: 0.005 .. KL_theta: 0.38 .. Rec_loss: 1876.33 .. NELBO: 1876.71
*************************************************************************

[I 2025-08-18 16:45:19,162] Trial 9 finished with values: [0.002552780807302627, 0.12444444444444444] and parameters: {'num_topics': 45, 'dropout': 0.18875913947455583, 't_hidden_size': 100, 'activation': 'relu'}.


model: ETM(
  (t_drop): Dropout(p=0.24738805161804528, inplace=False)
  (theta_act): Sigmoid()
  (rho): Linear(in_features=300, out_features=23743, bias=False)
  (alphas): Linear(in_features=300, out_features=20, bias=False)
  (q_theta): Sequential(
    (0): Linear(in_features=23743, out_features=100, bias=True)
    (1): Sigmoid()
    (2): Linear(in_features=100, out_features=100, bias=True)
    (3): Sigmoid()
  )
  (mu_q_theta): Linear(in_features=100, out_features=20, bias=True)
  (logsigma_q_theta): Linear(in_features=100, out_features=20, bias=True)
)
Epoch: 1 .. batch: 20/46 .. LR: 0.005 .. KL_theta: 1.0 .. Rec_loss: 2001.65 .. NELBO: 2002.65
Epoch: 1 .. batch: 40/46 .. LR: 0.005 .. KL_theta: 0.59 .. Rec_loss: 1883.82 .. NELBO: 1884.41
****************************************************************************************************
Epoch----->1 .. LR: 0.005 .. KL_theta: 0.57 .. Rec_loss: 1871.75 .. NELBO: 1872.32
*****************************************************************

[I 2025-08-18 16:46:29,581] Trial 10 finished with values: [-0.007835759647631554, 0.055] and parameters: {'num_topics': 20, 'dropout': 0.24738805161804528, 't_hidden_size': 100, 'activation': 'sigmoid'}.


model: ETM(
  (t_drop): Dropout(p=0.35252520908516577, inplace=False)
  (theta_act): Sigmoid()
  (rho): Linear(in_features=300, out_features=23743, bias=False)
  (alphas): Linear(in_features=300, out_features=12, bias=False)
  (q_theta): Sequential(
    (0): Linear(in_features=23743, out_features=100, bias=True)
    (1): Sigmoid()
    (2): Linear(in_features=100, out_features=100, bias=True)
    (3): Sigmoid()
  )
  (mu_q_theta): Linear(in_features=100, out_features=12, bias=True)
  (logsigma_q_theta): Linear(in_features=100, out_features=12, bias=True)
)
Epoch: 1 .. batch: 20/46 .. LR: 0.005 .. KL_theta: 1.29 .. Rec_loss: 1995.2 .. NELBO: 1996.49
Epoch: 1 .. batch: 40/46 .. LR: 0.005 .. KL_theta: 0.73 .. Rec_loss: 1879.76 .. NELBO: 1880.49
****************************************************************************************************
Epoch----->1 .. LR: 0.005 .. KL_theta: 0.67 .. Rec_loss: 1868.09 .. NELBO: 1868.76
*****************************************************************

[I 2025-08-18 16:47:39,667] Trial 11 finished with values: [-0.007972072099875023, 0.08333333333333333] and parameters: {'num_topics': 12, 'dropout': 0.35252520908516577, 't_hidden_size': 100, 'activation': 'sigmoid'}.


model: ETM(
  (t_drop): Dropout(p=0.28698367901587823, inplace=False)
  (theta_act): Softplus(beta=1.0, threshold=20.0)
  (rho): Linear(in_features=300, out_features=23743, bias=False)
  (alphas): Linear(in_features=300, out_features=31, bias=False)
  (q_theta): Sequential(
    (0): Linear(in_features=23743, out_features=300, bias=True)
    (1): Softplus(beta=1.0, threshold=20.0)
    (2): Linear(in_features=300, out_features=300, bias=True)
    (3): Softplus(beta=1.0, threshold=20.0)
  )
  (mu_q_theta): Linear(in_features=300, out_features=31, bias=True)
  (logsigma_q_theta): Linear(in_features=300, out_features=31, bias=True)
)
Epoch: 1 .. batch: 20/46 .. LR: 0.005 .. KL_theta: 1.94 .. Rec_loss: 2011.46 .. NELBO: 2013.4
Epoch: 1 .. batch: 40/46 .. LR: 0.005 .. KL_theta: 1.01 .. Rec_loss: 1889.87 .. NELBO: 1890.88
****************************************************************************************************
Epoch----->1 .. LR: 0.005 .. KL_theta: 0.9 .. Rec_loss: 1877.31 .. NELBO:

[I 2025-08-18 16:49:02,168] Trial 12 finished with values: [-0.008319798632324162, 0.035483870967741936] and parameters: {'num_topics': 31, 'dropout': 0.28698367901587823, 't_hidden_size': 300, 'activation': 'softplus'}.


model: ETM(
  (t_drop): Dropout(p=0.3296475363484476, inplace=False)
  (theta_act): Softplus(beta=1.0, threshold=20.0)
  (rho): Linear(in_features=300, out_features=23743, bias=False)
  (alphas): Linear(in_features=300, out_features=18, bias=False)
  (q_theta): Sequential(
    (0): Linear(in_features=23743, out_features=100, bias=True)
    (1): Softplus(beta=1.0, threshold=20.0)
    (2): Linear(in_features=100, out_features=100, bias=True)
    (3): Softplus(beta=1.0, threshold=20.0)
  )
  (mu_q_theta): Linear(in_features=100, out_features=18, bias=True)
  (logsigma_q_theta): Linear(in_features=100, out_features=18, bias=True)
)
Epoch: 1 .. batch: 20/46 .. LR: 0.005 .. KL_theta: 3.22 .. Rec_loss: 1999.95 .. NELBO: 2003.17
Epoch: 1 .. batch: 40/46 .. LR: 0.005 .. KL_theta: 2.08 .. Rec_loss: 1880.82 .. NELBO: 1882.9
****************************************************************************************************
Epoch----->1 .. LR: 0.005 .. KL_theta: 1.86 .. Rec_loss: 1868.95 .. NELBO:

[I 2025-08-18 16:50:13,065] Trial 13 finished with values: [-0.007972072099875022, 0.05555555555555555] and parameters: {'num_topics': 18, 'dropout': 0.3296475363484476, 't_hidden_size': 100, 'activation': 'softplus'}.


model: ETM(
  (t_drop): Dropout(p=0.14595764585550755, inplace=False)
  (theta_act): ReLU()
  (rho): Linear(in_features=300, out_features=23743, bias=False)
  (alphas): Linear(in_features=300, out_features=43, bias=False)
  (q_theta): Sequential(
    (0): Linear(in_features=23743, out_features=200, bias=True)
    (1): ReLU()
    (2): Linear(in_features=200, out_features=200, bias=True)
    (3): ReLU()
  )
  (mu_q_theta): Linear(in_features=200, out_features=43, bias=True)
  (logsigma_q_theta): Linear(in_features=200, out_features=43, bias=True)
)
Epoch: 1 .. batch: 20/46 .. LR: 0.005 .. KL_theta: 0.3 .. Rec_loss: 2005.7 .. NELBO: 2006.0
Epoch: 1 .. batch: 40/46 .. LR: 0.005 .. KL_theta: 0.27 .. Rec_loss: 1886.3 .. NELBO: 1886.57
****************************************************************************************************
Epoch----->1 .. LR: 0.005 .. KL_theta: 0.26 .. Rec_loss: 1874.02 .. NELBO: 1874.28
*****************************************************************************

[I 2025-08-18 16:51:34,329] Trial 14 finished with values: [0.0011476612687747398, 0.13023255813953488] and parameters: {'num_topics': 43, 'dropout': 0.14595764585550755, 't_hidden_size': 200, 'activation': 'relu'}.


model: ETM(
  (t_drop): Dropout(p=0.31064109780951843, inplace=False)
  (theta_act): Sigmoid()
  (rho): Linear(in_features=300, out_features=23743, bias=False)
  (alphas): Linear(in_features=300, out_features=40, bias=False)
  (q_theta): Sequential(
    (0): Linear(in_features=23743, out_features=50, bias=True)
    (1): Sigmoid()
    (2): Linear(in_features=50, out_features=50, bias=True)
    (3): Sigmoid()
  )
  (mu_q_theta): Linear(in_features=50, out_features=40, bias=True)
  (logsigma_q_theta): Linear(in_features=50, out_features=40, bias=True)
)
Epoch: 1 .. batch: 20/46 .. LR: 0.005 .. KL_theta: 1.22 .. Rec_loss: 2005.09 .. NELBO: 2006.31
Epoch: 1 .. batch: 40/46 .. LR: 0.005 .. KL_theta: 0.77 .. Rec_loss: 1885.92 .. NELBO: 1886.69
****************************************************************************************************
Epoch----->1 .. LR: 0.005 .. KL_theta: 0.71 .. Rec_loss: 1873.65 .. NELBO: 1874.36
*********************************************************************

[I 2025-08-18 16:52:46,837] Trial 15 finished with values: [-0.007863069193821251, 0.0275] and parameters: {'num_topics': 40, 'dropout': 0.31064109780951843, 't_hidden_size': 50, 'activation': 'sigmoid'}.


model: ETM(
  (t_drop): Dropout(p=0.5171898059281129, inplace=False)
  (theta_act): Sigmoid()
  (rho): Linear(in_features=300, out_features=23743, bias=False)
  (alphas): Linear(in_features=300, out_features=28, bias=False)
  (q_theta): Sequential(
    (0): Linear(in_features=23743, out_features=300, bias=True)
    (1): Sigmoid()
    (2): Linear(in_features=300, out_features=300, bias=True)
    (3): Sigmoid()
  )
  (mu_q_theta): Linear(in_features=300, out_features=28, bias=True)
  (logsigma_q_theta): Linear(in_features=300, out_features=28, bias=True)
)
Epoch: 1 .. batch: 20/46 .. LR: 0.005 .. KL_theta: 0.88 .. Rec_loss: 2004.53 .. NELBO: 2005.41
Epoch: 1 .. batch: 40/46 .. LR: 0.005 .. KL_theta: 0.51 .. Rec_loss: 1886.0 .. NELBO: 1886.51
****************************************************************************************************
Epoch----->1 .. LR: 0.005 .. KL_theta: 0.47 .. Rec_loss: 1873.8 .. NELBO: 1874.27
*******************************************************************

[I 2025-08-18 16:54:02,792] Trial 16 finished with values: [-0.00810621117329217, 0.039285714285714285] and parameters: {'num_topics': 28, 'dropout': 0.5171898059281129, 't_hidden_size': 300, 'activation': 'sigmoid'}.


model: ETM(
  (t_drop): Dropout(p=0.07029839529521899, inplace=False)
  (theta_act): Sigmoid()
  (rho): Linear(in_features=300, out_features=23743, bias=False)
  (alphas): Linear(in_features=300, out_features=16, bias=False)
  (q_theta): Sequential(
    (0): Linear(in_features=23743, out_features=200, bias=True)
    (1): Sigmoid()
    (2): Linear(in_features=200, out_features=200, bias=True)
    (3): Sigmoid()
  )
  (mu_q_theta): Linear(in_features=200, out_features=16, bias=True)
  (logsigma_q_theta): Linear(in_features=200, out_features=16, bias=True)
)
Epoch: 1 .. batch: 20/46 .. LR: 0.005 .. KL_theta: 0.7 .. Rec_loss: 2000.59 .. NELBO: 2001.29
Epoch: 1 .. batch: 40/46 .. LR: 0.005 .. KL_theta: 0.4 .. Rec_loss: 1883.59 .. NELBO: 1883.99
****************************************************************************************************
Epoch----->1 .. LR: 0.005 .. KL_theta: 0.4 .. Rec_loss: 1871.61 .. NELBO: 1872.01
*******************************************************************

[I 2025-08-18 16:55:13,497] Trial 17 finished with values: [-0.007972072099875022, 0.0625] and parameters: {'num_topics': 16, 'dropout': 0.07029839529521899, 't_hidden_size': 200, 'activation': 'sigmoid'}.


model: ETM(
  (t_drop): Dropout(p=0.2733758614729633, inplace=False)
  (theta_act): Sigmoid()
  (rho): Linear(in_features=300, out_features=23743, bias=False)
  (alphas): Linear(in_features=300, out_features=22, bias=False)
  (q_theta): Sequential(
    (0): Linear(in_features=23743, out_features=300, bias=True)
    (1): Sigmoid()
    (2): Linear(in_features=300, out_features=300, bias=True)
    (3): Sigmoid()
  )
  (mu_q_theta): Linear(in_features=300, out_features=22, bias=True)
  (logsigma_q_theta): Linear(in_features=300, out_features=22, bias=True)
)
Epoch: 1 .. batch: 20/46 .. LR: 0.005 .. KL_theta: 1.55 .. Rec_loss: 2004.99 .. NELBO: 2006.54
Epoch: 1 .. batch: 40/46 .. LR: 0.005 .. KL_theta: 0.87 .. Rec_loss: 1885.33 .. NELBO: 1886.2
****************************************************************************************************
Epoch----->1 .. LR: 0.005 .. KL_theta: 0.8 .. Rec_loss: 1873.05 .. NELBO: 1873.85
*******************************************************************

[I 2025-08-18 16:56:30,119] Trial 18 finished with values: [-0.008032606468240756, 0.05] and parameters: {'num_topics': 22, 'dropout': 0.2733758614729633, 't_hidden_size': 300, 'activation': 'sigmoid'}.


model: ETM(
  (t_drop): Dropout(p=0.13245590924643258, inplace=False)
  (theta_act): Softplus(beta=1.0, threshold=20.0)
  (rho): Linear(in_features=300, out_features=23743, bias=False)
  (alphas): Linear(in_features=300, out_features=44, bias=False)
  (q_theta): Sequential(
    (0): Linear(in_features=23743, out_features=300, bias=True)
    (1): Softplus(beta=1.0, threshold=20.0)
    (2): Linear(in_features=300, out_features=300, bias=True)
    (3): Softplus(beta=1.0, threshold=20.0)
  )
  (mu_q_theta): Linear(in_features=300, out_features=44, bias=True)
  (logsigma_q_theta): Linear(in_features=300, out_features=44, bias=True)
)
Epoch: 1 .. batch: 20/46 .. LR: 0.005 .. KL_theta: 2.34 .. Rec_loss: 2016.94 .. NELBO: 2019.28
Epoch: 1 .. batch: 40/46 .. LR: 0.005 .. KL_theta: 1.22 .. Rec_loss: 1892.89 .. NELBO: 1894.11
****************************************************************************************************
Epoch----->1 .. LR: 0.005 .. KL_theta: 1.09 .. Rec_loss: 1880.01 .. NELB

[I 2025-08-18 16:57:49,308] Trial 19 finished with values: [-0.008184440322500942, 0.02727272727272727] and parameters: {'num_topics': 44, 'dropout': 0.13245590924643258, 't_hidden_size': 300, 'activation': 'softplus'}.


model: ETM(
  (t_drop): Dropout(p=0.3348440784817614, inplace=False)
  (theta_act): Softplus(beta=1.0, threshold=20.0)
  (rho): Linear(in_features=300, out_features=23743, bias=False)
  (alphas): Linear(in_features=300, out_features=26, bias=False)
  (q_theta): Sequential(
    (0): Linear(in_features=23743, out_features=50, bias=True)
    (1): Softplus(beta=1.0, threshold=20.0)
    (2): Linear(in_features=50, out_features=50, bias=True)
    (3): Softplus(beta=1.0, threshold=20.0)
  )
  (mu_q_theta): Linear(in_features=50, out_features=26, bias=True)
  (logsigma_q_theta): Linear(in_features=50, out_features=26, bias=True)
)
Epoch: 1 .. batch: 20/46 .. LR: 0.005 .. KL_theta: 1.12 .. Rec_loss: 1998.41 .. NELBO: 1999.53
Epoch: 1 .. batch: 40/46 .. LR: 0.005 .. KL_theta: 0.69 .. Rec_loss: 1882.35 .. NELBO: 1883.04
****************************************************************************************************
Epoch----->1 .. LR: 0.005 .. KL_theta: 0.63 .. Rec_loss: 1870.49 .. NELBO: 187

[I 2025-08-18 16:59:01,338] Trial 20 finished with values: [-0.008573141269459303, 0.04230769230769231] and parameters: {'num_topics': 26, 'dropout': 0.3348440784817614, 't_hidden_size': 50, 'activation': 'softplus'}.


model: ETM(
  (t_drop): Dropout(p=0.3594262187553921, inplace=False)
  (theta_act): ReLU()
  (rho): Linear(in_features=300, out_features=23743, bias=False)
  (alphas): Linear(in_features=300, out_features=17, bias=False)
  (q_theta): Sequential(
    (0): Linear(in_features=23743, out_features=200, bias=True)
    (1): ReLU()
    (2): Linear(in_features=200, out_features=200, bias=True)
    (3): ReLU()
  )
  (mu_q_theta): Linear(in_features=200, out_features=17, bias=True)
  (logsigma_q_theta): Linear(in_features=200, out_features=17, bias=True)
)
Epoch: 1 .. batch: 20/46 .. LR: 0.005 .. KL_theta: 0.69 .. Rec_loss: 2000.89 .. NELBO: 2001.58
Epoch: 1 .. batch: 40/46 .. LR: 0.005 .. KL_theta: 0.58 .. Rec_loss: 1883.14 .. NELBO: 1883.72
****************************************************************************************************
Epoch----->1 .. LR: 0.005 .. KL_theta: 0.59 .. Rec_loss: 1871.12 .. NELBO: 1871.71
**************************************************************************

[I 2025-08-18 17:00:13,920] Trial 21 finished with values: [-0.0016968190483323267, 0.20588235294117646] and parameters: {'num_topics': 17, 'dropout': 0.3594262187553921, 't_hidden_size': 200, 'activation': 'relu'}.


model: ETM(
  (t_drop): Dropout(p=0.38702064203453895, inplace=False)
  (theta_act): ReLU()
  (rho): Linear(in_features=300, out_features=23743, bias=False)
  (alphas): Linear(in_features=300, out_features=45, bias=False)
  (q_theta): Sequential(
    (0): Linear(in_features=23743, out_features=200, bias=True)
    (1): ReLU()
    (2): Linear(in_features=200, out_features=200, bias=True)
    (3): ReLU()
  )
  (mu_q_theta): Linear(in_features=200, out_features=45, bias=True)
  (logsigma_q_theta): Linear(in_features=200, out_features=45, bias=True)
)
Epoch: 1 .. batch: 20/46 .. LR: 0.005 .. KL_theta: 0.41 .. Rec_loss: 2006.69 .. NELBO: 2007.1
Epoch: 1 .. batch: 40/46 .. LR: 0.005 .. KL_theta: 0.5 .. Rec_loss: 1886.43 .. NELBO: 1886.93
****************************************************************************************************
Epoch----->1 .. LR: 0.005 .. KL_theta: 0.5 .. Rec_loss: 1874.05 .. NELBO: 1874.55
****************************************************************************

[I 2025-08-18 17:01:34,079] Trial 22 finished with values: [-0.0004960556170698477, 0.11777777777777777] and parameters: {'num_topics': 45, 'dropout': 0.38702064203453895, 't_hidden_size': 200, 'activation': 'relu'}.


model: ETM(
  (t_drop): Dropout(p=0.11897931196390538, inplace=False)
  (theta_act): Sigmoid()
  (rho): Linear(in_features=300, out_features=23743, bias=False)
  (alphas): Linear(in_features=300, out_features=16, bias=False)
  (q_theta): Sequential(
    (0): Linear(in_features=23743, out_features=200, bias=True)
    (1): Sigmoid()
    (2): Linear(in_features=200, out_features=200, bias=True)
    (3): Sigmoid()
  )
  (mu_q_theta): Linear(in_features=200, out_features=16, bias=True)
  (logsigma_q_theta): Linear(in_features=200, out_features=16, bias=True)
)
Epoch: 1 .. batch: 20/46 .. LR: 0.005 .. KL_theta: 0.81 .. Rec_loss: 1998.79 .. NELBO: 1999.6
Epoch: 1 .. batch: 40/46 .. LR: 0.005 .. KL_theta: 0.45 .. Rec_loss: 1882.43 .. NELBO: 1882.88
****************************************************************************************************
Epoch----->1 .. LR: 0.005 .. KL_theta: 0.44 .. Rec_loss: 1870.52 .. NELBO: 1870.96
*****************************************************************

[I 2025-08-18 17:02:44,841] Trial 23 finished with values: [-0.007718446778067803, 0.06875] and parameters: {'num_topics': 16, 'dropout': 0.11897931196390538, 't_hidden_size': 200, 'activation': 'sigmoid'}.


model: ETM(
  (t_drop): Dropout(p=0.3229344587670168, inplace=False)
  (theta_act): Softplus(beta=1.0, threshold=20.0)
  (rho): Linear(in_features=300, out_features=23743, bias=False)
  (alphas): Linear(in_features=300, out_features=47, bias=False)
  (q_theta): Sequential(
    (0): Linear(in_features=23743, out_features=100, bias=True)
    (1): Softplus(beta=1.0, threshold=20.0)
    (2): Linear(in_features=100, out_features=100, bias=True)
    (3): Softplus(beta=1.0, threshold=20.0)
  )
  (mu_q_theta): Linear(in_features=100, out_features=47, bias=True)
  (logsigma_q_theta): Linear(in_features=100, out_features=47, bias=True)
)
Epoch: 1 .. batch: 20/46 .. LR: 0.005 .. KL_theta: 0.97 .. Rec_loss: 2010.54 .. NELBO: 2011.51
Epoch: 1 .. batch: 40/46 .. LR: 0.005 .. KL_theta: 0.56 .. Rec_loss: 1889.4 .. NELBO: 1889.96
****************************************************************************************************
Epoch----->1 .. LR: 0.005 .. KL_theta: 0.5 .. Rec_loss: 1876.87 .. NELBO: 

[I 2025-08-18 17:03:57,905] Trial 24 finished with values: [-0.00813832527444089, 0.023404255319148935] and parameters: {'num_topics': 47, 'dropout': 0.3229344587670168, 't_hidden_size': 100, 'activation': 'softplus'}.


model: ETM(
  (t_drop): Dropout(p=0.08802529452985816, inplace=False)
  (theta_act): Sigmoid()
  (rho): Linear(in_features=300, out_features=23743, bias=False)
  (alphas): Linear(in_features=300, out_features=32, bias=False)
  (q_theta): Sequential(
    (0): Linear(in_features=23743, out_features=200, bias=True)
    (1): Sigmoid()
    (2): Linear(in_features=200, out_features=200, bias=True)
    (3): Sigmoid()
  )
  (mu_q_theta): Linear(in_features=200, out_features=32, bias=True)
  (logsigma_q_theta): Linear(in_features=200, out_features=32, bias=True)
)
Epoch: 1 .. batch: 20/46 .. LR: 0.005 .. KL_theta: 1.12 .. Rec_loss: 2003.74 .. NELBO: 2004.86
Epoch: 1 .. batch: 40/46 .. LR: 0.005 .. KL_theta: 0.62 .. Rec_loss: 1885.13 .. NELBO: 1885.75
****************************************************************************************************
Epoch----->1 .. LR: 0.005 .. KL_theta: 0.56 .. Rec_loss: 1872.93 .. NELBO: 1873.49
****************************************************************

[I 2025-08-18 17:05:16,570] Trial 25 finished with values: [-0.008140502139030073, 0.034375] and parameters: {'num_topics': 32, 'dropout': 0.08802529452985816, 't_hidden_size': 200, 'activation': 'sigmoid'}.


model: ETM(
  (t_drop): Dropout(p=0.2200493978790428, inplace=False)
  (theta_act): ReLU()
  (rho): Linear(in_features=300, out_features=23743, bias=False)
  (alphas): Linear(in_features=300, out_features=15, bias=False)
  (q_theta): Sequential(
    (0): Linear(in_features=23743, out_features=50, bias=True)
    (1): ReLU()
    (2): Linear(in_features=50, out_features=50, bias=True)
    (3): ReLU()
  )
  (mu_q_theta): Linear(in_features=50, out_features=15, bias=True)
  (logsigma_q_theta): Linear(in_features=50, out_features=15, bias=True)
)
Epoch: 1 .. batch: 20/46 .. LR: 0.005 .. KL_theta: 0.36 .. Rec_loss: 1995.09 .. NELBO: 1995.45
Epoch: 1 .. batch: 40/46 .. LR: 0.005 .. KL_theta: 0.27 .. Rec_loss: 1880.47 .. NELBO: 1880.74
****************************************************************************************************
Epoch----->1 .. LR: 0.005 .. KL_theta: 0.26 .. Rec_loss: 1868.79 .. NELBO: 1869.05
*******************************************************************************

[I 2025-08-18 17:06:28,677] Trial 26 finished with values: [-0.007643347445374546, 0.17333333333333334] and parameters: {'num_topics': 15, 'dropout': 0.2200493978790428, 't_hidden_size': 50, 'activation': 'relu'}.


model: ETM(
  (t_drop): Dropout(p=0.5829101244003742, inplace=False)
  (theta_act): Softplus(beta=1.0, threshold=20.0)
  (rho): Linear(in_features=300, out_features=23743, bias=False)
  (alphas): Linear(in_features=300, out_features=30, bias=False)
  (q_theta): Sequential(
    (0): Linear(in_features=23743, out_features=100, bias=True)
    (1): Softplus(beta=1.0, threshold=20.0)
    (2): Linear(in_features=100, out_features=100, bias=True)
    (3): Softplus(beta=1.0, threshold=20.0)
  )
  (mu_q_theta): Linear(in_features=100, out_features=30, bias=True)
  (logsigma_q_theta): Linear(in_features=100, out_features=30, bias=True)
)
Epoch: 1 .. batch: 20/46 .. LR: 0.005 .. KL_theta: 1.32 .. Rec_loss: 2005.54 .. NELBO: 2006.86
Epoch: 1 .. batch: 40/46 .. LR: 0.005 .. KL_theta: 0.78 .. Rec_loss: 1886.48 .. NELBO: 1887.26
****************************************************************************************************
Epoch----->1 .. LR: 0.005 .. KL_theta: 0.7 .. Rec_loss: 1874.24 .. NELBO:

[I 2025-08-18 17:07:40,972] Trial 27 finished with values: [-0.008114284618493513, 0.04] and parameters: {'num_topics': 30, 'dropout': 0.5829101244003742, 't_hidden_size': 100, 'activation': 'softplus'}.


model: ETM(
  (t_drop): Dropout(p=0.5405996787437367, inplace=False)
  (theta_act): ReLU()
  (rho): Linear(in_features=300, out_features=23743, bias=False)
  (alphas): Linear(in_features=300, out_features=35, bias=False)
  (q_theta): Sequential(
    (0): Linear(in_features=23743, out_features=300, bias=True)
    (1): ReLU()
    (2): Linear(in_features=300, out_features=300, bias=True)
    (3): ReLU()
  )
  (mu_q_theta): Linear(in_features=300, out_features=35, bias=True)
  (logsigma_q_theta): Linear(in_features=300, out_features=35, bias=True)
)
Epoch: 1 .. batch: 20/46 .. LR: 0.005 .. KL_theta: 0.37 .. Rec_loss: 2004.13 .. NELBO: 2004.5
Epoch: 1 .. batch: 40/46 .. LR: 0.005 .. KL_theta: 0.45 .. Rec_loss: 1885.15 .. NELBO: 1885.6
****************************************************************************************************
Epoch----->1 .. LR: 0.005 .. KL_theta: 0.45 .. Rec_loss: 1872.98 .. NELBO: 1873.43
****************************************************************************

[I 2025-08-18 17:09:05,514] Trial 28 finished with values: [-0.001033423423210282, 0.14285714285714285] and parameters: {'num_topics': 35, 'dropout': 0.5405996787437367, 't_hidden_size': 300, 'activation': 'relu'}.


model: ETM(
  (t_drop): Dropout(p=0.320319944785655, inplace=False)
  (theta_act): ReLU()
  (rho): Linear(in_features=300, out_features=23743, bias=False)
  (alphas): Linear(in_features=300, out_features=15, bias=False)
  (q_theta): Sequential(
    (0): Linear(in_features=23743, out_features=50, bias=True)
    (1): ReLU()
    (2): Linear(in_features=50, out_features=50, bias=True)
    (3): ReLU()
  )
  (mu_q_theta): Linear(in_features=50, out_features=15, bias=True)
  (logsigma_q_theta): Linear(in_features=50, out_features=15, bias=True)
)
Epoch: 1 .. batch: 20/46 .. LR: 0.005 .. KL_theta: 0.67 .. Rec_loss: 1997.13 .. NELBO: 1997.8
Epoch: 1 .. batch: 40/46 .. LR: 0.005 .. KL_theta: 0.53 .. Rec_loss: 1881.23 .. NELBO: 1881.76
****************************************************************************************************
Epoch----->1 .. LR: 0.005 .. KL_theta: 0.49 .. Rec_loss: 1869.42 .. NELBO: 1869.91
*********************************************************************************

[I 2025-08-18 17:10:17,955] Trial 29 finished with values: [0.00045604524557746296, 0.25333333333333335] and parameters: {'num_topics': 15, 'dropout': 0.320319944785655, 't_hidden_size': 50, 'activation': 'relu'}.


model: ETM(
  (t_drop): Dropout(p=0.30614914824238526, inplace=False)
  (theta_act): Sigmoid()
  (rho): Linear(in_features=300, out_features=23743, bias=False)
  (alphas): Linear(in_features=300, out_features=41, bias=False)
  (q_theta): Sequential(
    (0): Linear(in_features=23743, out_features=300, bias=True)
    (1): Sigmoid()
    (2): Linear(in_features=300, out_features=300, bias=True)
    (3): Sigmoid()
  )
  (mu_q_theta): Linear(in_features=300, out_features=41, bias=True)
  (logsigma_q_theta): Linear(in_features=300, out_features=41, bias=True)
)
Epoch: 1 .. batch: 20/46 .. LR: 0.005 .. KL_theta: 1.01 .. Rec_loss: 2006.51 .. NELBO: 2007.52
Epoch: 1 .. batch: 40/46 .. LR: 0.005 .. KL_theta: 0.54 .. Rec_loss: 1887.15 .. NELBO: 1887.69
****************************************************************************************************
Epoch----->1 .. LR: 0.005 .. KL_theta: 0.49 .. Rec_loss: 1874.87 .. NELBO: 1875.36
****************************************************************

[I 2025-08-18 17:11:35,470] Trial 30 finished with values: [-0.008162655007304185, 0.026829268292682926] and parameters: {'num_topics': 41, 'dropout': 0.30614914824238526, 't_hidden_size': 300, 'activation': 'sigmoid'}.


model: ETM(
  (t_drop): Dropout(p=0.08467938286173438, inplace=False)
  (theta_act): Sigmoid()
  (rho): Linear(in_features=300, out_features=23743, bias=False)
  (alphas): Linear(in_features=300, out_features=16, bias=False)
  (q_theta): Sequential(
    (0): Linear(in_features=23743, out_features=200, bias=True)
    (1): Sigmoid()
    (2): Linear(in_features=200, out_features=200, bias=True)
    (3): Sigmoid()
  )
  (mu_q_theta): Linear(in_features=200, out_features=16, bias=True)
  (logsigma_q_theta): Linear(in_features=200, out_features=16, bias=True)
)
Epoch: 1 .. batch: 20/46 .. LR: 0.005 .. KL_theta: 1.07 .. Rec_loss: 1998.05 .. NELBO: 1999.12
Epoch: 1 .. batch: 40/46 .. LR: 0.005 .. KL_theta: 0.6 .. Rec_loss: 1881.68 .. NELBO: 1882.28
****************************************************************************************************
Epoch----->1 .. LR: 0.005 .. KL_theta: 0.56 .. Rec_loss: 1869.83 .. NELBO: 1870.39
*****************************************************************

[I 2025-08-18 17:12:45,368] Trial 31 finished with values: [-0.00877536518409483, 0.06875] and parameters: {'num_topics': 16, 'dropout': 0.08467938286173438, 't_hidden_size': 200, 'activation': 'sigmoid'}.


model: ETM(
  (t_drop): Dropout(p=0.17684923924858745, inplace=False)
  (theta_act): Softplus(beta=1.0, threshold=20.0)
  (rho): Linear(in_features=300, out_features=23743, bias=False)
  (alphas): Linear(in_features=300, out_features=28, bias=False)
  (q_theta): Sequential(
    (0): Linear(in_features=23743, out_features=100, bias=True)
    (1): Softplus(beta=1.0, threshold=20.0)
    (2): Linear(in_features=100, out_features=100, bias=True)
    (3): Softplus(beta=1.0, threshold=20.0)
  )
  (mu_q_theta): Linear(in_features=100, out_features=28, bias=True)
  (logsigma_q_theta): Linear(in_features=100, out_features=28, bias=True)
)
Epoch: 1 .. batch: 20/46 .. LR: 0.005 .. KL_theta: 0.93 .. Rec_loss: 2003.99 .. NELBO: 2004.92
Epoch: 1 .. batch: 40/46 .. LR: 0.005 .. KL_theta: 0.57 .. Rec_loss: 1885.34 .. NELBO: 1885.91
****************************************************************************************************
Epoch----->1 .. LR: 0.005 .. KL_theta: 0.52 .. Rec_loss: 1873.14 .. NELB

[I 2025-08-18 17:13:57,718] Trial 32 finished with values: [-0.008214751497585479, 0.04285714285714286] and parameters: {'num_topics': 28, 'dropout': 0.17684923924858745, 't_hidden_size': 100, 'activation': 'softplus'}.


model: ETM(
  (t_drop): Dropout(p=0.5381536241250484, inplace=False)
  (theta_act): Softplus(beta=1.0, threshold=20.0)
  (rho): Linear(in_features=300, out_features=23743, bias=False)
  (alphas): Linear(in_features=300, out_features=26, bias=False)
  (q_theta): Sequential(
    (0): Linear(in_features=23743, out_features=200, bias=True)
    (1): Softplus(beta=1.0, threshold=20.0)
    (2): Linear(in_features=200, out_features=200, bias=True)
    (3): Softplus(beta=1.0, threshold=20.0)
  )
  (mu_q_theta): Linear(in_features=200, out_features=26, bias=True)
  (logsigma_q_theta): Linear(in_features=200, out_features=26, bias=True)
)
Epoch: 1 .. batch: 20/46 .. LR: 0.005 .. KL_theta: 1.1 .. Rec_loss: 2002.3 .. NELBO: 2003.4
Epoch: 1 .. batch: 40/46 .. LR: 0.005 .. KL_theta: 0.64 .. Rec_loss: 1884.69 .. NELBO: 1885.33
****************************************************************************************************
Epoch----->1 .. LR: 0.005 .. KL_theta: 0.57 .. Rec_loss: 1872.64 .. NELBO: 1

[I 2025-08-18 17:15:12,528] Trial 33 finished with values: [-0.007597076811601464, 0.04230769230769231] and parameters: {'num_topics': 26, 'dropout': 0.5381536241250484, 't_hidden_size': 200, 'activation': 'softplus'}.


model: ETM(
  (t_drop): Dropout(p=0.027477818419284695, inplace=False)
  (theta_act): Sigmoid()
  (rho): Linear(in_features=300, out_features=23743, bias=False)
  (alphas): Linear(in_features=300, out_features=50, bias=False)
  (q_theta): Sequential(
    (0): Linear(in_features=23743, out_features=100, bias=True)
    (1): Sigmoid()
    (2): Linear(in_features=100, out_features=100, bias=True)
    (3): Sigmoid()
  )
  (mu_q_theta): Linear(in_features=100, out_features=50, bias=True)
  (logsigma_q_theta): Linear(in_features=100, out_features=50, bias=True)
)
Epoch: 1 .. batch: 20/46 .. LR: 0.005 .. KL_theta: 0.71 .. Rec_loss: 2006.41 .. NELBO: 2007.12
Epoch: 1 .. batch: 40/46 .. LR: 0.005 .. KL_theta: 0.44 .. Rec_loss: 1886.81 .. NELBO: 1887.25
****************************************************************************************************
Epoch----->1 .. LR: 0.005 .. KL_theta: 0.44 .. Rec_loss: 1874.46 .. NELBO: 1874.9
****************************************************************

[I 2025-08-18 17:16:28,883] Trial 34 finished with values: [-0.008768270912557233, 0.024] and parameters: {'num_topics': 50, 'dropout': 0.027477818419284695, 't_hidden_size': 100, 'activation': 'sigmoid'}.


model: ETM(
  (t_drop): Dropout(p=0.5279578289947756, inplace=False)
  (theta_act): ReLU()
  (rho): Linear(in_features=300, out_features=23743, bias=False)
  (alphas): Linear(in_features=300, out_features=38, bias=False)
  (q_theta): Sequential(
    (0): Linear(in_features=23743, out_features=50, bias=True)
    (1): ReLU()
    (2): Linear(in_features=50, out_features=50, bias=True)
    (3): ReLU()
  )
  (mu_q_theta): Linear(in_features=50, out_features=38, bias=True)
  (logsigma_q_theta): Linear(in_features=50, out_features=38, bias=True)
)
Epoch: 1 .. batch: 20/46 .. LR: 0.005 .. KL_theta: 0.28 .. Rec_loss: 2007.44 .. NELBO: 2007.72
Epoch: 1 .. batch: 40/46 .. LR: 0.005 .. KL_theta: 0.23 .. Rec_loss: 1887.45 .. NELBO: 1887.68
****************************************************************************************************
Epoch----->1 .. LR: 0.005 .. KL_theta: 0.25 .. Rec_loss: 1875.08 .. NELBO: 1875.33
*******************************************************************************

[I 2025-08-18 17:17:43,919] Trial 35 finished with values: [-0.00853941538966046, 0.02894736842105263] and parameters: {'num_topics': 38, 'dropout': 0.5279578289947756, 't_hidden_size': 50, 'activation': 'relu'}.


model: ETM(
  (t_drop): Dropout(p=0.09441093479072561, inplace=False)
  (theta_act): Sigmoid()
  (rho): Linear(in_features=300, out_features=23743, bias=False)
  (alphas): Linear(in_features=300, out_features=41, bias=False)
  (q_theta): Sequential(
    (0): Linear(in_features=23743, out_features=300, bias=True)
    (1): Sigmoid()
    (2): Linear(in_features=300, out_features=300, bias=True)
    (3): Sigmoid()
  )
  (mu_q_theta): Linear(in_features=300, out_features=41, bias=True)
  (logsigma_q_theta): Linear(in_features=300, out_features=41, bias=True)
)
Epoch: 1 .. batch: 20/46 .. LR: 0.005 .. KL_theta: 1.1 .. Rec_loss: 2010.71 .. NELBO: 2011.81
Epoch: 1 .. batch: 40/46 .. LR: 0.005 .. KL_theta: 0.61 .. Rec_loss: 1889.4 .. NELBO: 1890.01
****************************************************************************************************
Epoch----->1 .. LR: 0.005 .. KL_theta: 0.55 .. Rec_loss: 1876.88 .. NELBO: 1877.43
******************************************************************

[I 2025-08-18 17:19:02,621] Trial 36 finished with values: [-0.008188369416399083, 0.02926829268292683] and parameters: {'num_topics': 41, 'dropout': 0.09441093479072561, 't_hidden_size': 300, 'activation': 'sigmoid'}.


model: ETM(
  (t_drop): Dropout(p=0.1838537319428485, inplace=False)
  (theta_act): Sigmoid()
  (rho): Linear(in_features=300, out_features=23743, bias=False)
  (alphas): Linear(in_features=300, out_features=20, bias=False)
  (q_theta): Sequential(
    (0): Linear(in_features=23743, out_features=200, bias=True)
    (1): Sigmoid()
    (2): Linear(in_features=200, out_features=200, bias=True)
    (3): Sigmoid()
  )
  (mu_q_theta): Linear(in_features=200, out_features=20, bias=True)
  (logsigma_q_theta): Linear(in_features=200, out_features=20, bias=True)
)
Epoch: 1 .. batch: 20/46 .. LR: 0.005 .. KL_theta: 0.83 .. Rec_loss: 2002.53 .. NELBO: 2003.36
Epoch: 1 .. batch: 40/46 .. LR: 0.005 .. KL_theta: 0.46 .. Rec_loss: 1884.4 .. NELBO: 1884.86
****************************************************************************************************
Epoch----->1 .. LR: 0.005 .. KL_theta: 0.42 .. Rec_loss: 1872.25 .. NELBO: 1872.67
******************************************************************

[I 2025-08-18 17:20:12,955] Trial 37 finished with values: [-0.007972072099875022, 0.05] and parameters: {'num_topics': 20, 'dropout': 0.1838537319428485, 't_hidden_size': 200, 'activation': 'sigmoid'}.


model: ETM(
  (t_drop): Dropout(p=0.47553347551975217, inplace=False)
  (theta_act): Sigmoid()
  (rho): Linear(in_features=300, out_features=23743, bias=False)
  (alphas): Linear(in_features=300, out_features=21, bias=False)
  (q_theta): Sequential(
    (0): Linear(in_features=23743, out_features=300, bias=True)
    (1): Sigmoid()
    (2): Linear(in_features=300, out_features=300, bias=True)
    (3): Sigmoid()
  )
  (mu_q_theta): Linear(in_features=300, out_features=21, bias=True)
  (logsigma_q_theta): Linear(in_features=300, out_features=21, bias=True)
)
Epoch: 1 .. batch: 20/46 .. LR: 0.005 .. KL_theta: 0.77 .. Rec_loss: 2007.49 .. NELBO: 2008.26
Epoch: 1 .. batch: 40/46 .. LR: 0.005 .. KL_theta: 0.43 .. Rec_loss: 1887.56 .. NELBO: 1887.99
****************************************************************************************************
Epoch----->1 .. LR: 0.005 .. KL_theta: 0.39 .. Rec_loss: 1875.21 .. NELBO: 1875.6
*****************************************************************

[I 2025-08-18 17:21:26,332] Trial 38 finished with values: [-0.008742037993155257, 0.05238095238095238] and parameters: {'num_topics': 21, 'dropout': 0.47553347551975217, 't_hidden_size': 300, 'activation': 'sigmoid'}.


model: ETM(
  (t_drop): Dropout(p=0.5283015047924261, inplace=False)
  (theta_act): Softplus(beta=1.0, threshold=20.0)
  (rho): Linear(in_features=300, out_features=23743, bias=False)
  (alphas): Linear(in_features=300, out_features=43, bias=False)
  (q_theta): Sequential(
    (0): Linear(in_features=23743, out_features=200, bias=True)
    (1): Softplus(beta=1.0, threshold=20.0)
    (2): Linear(in_features=200, out_features=200, bias=True)
    (3): Softplus(beta=1.0, threshold=20.0)
  )
  (mu_q_theta): Linear(in_features=200, out_features=43, bias=True)
  (logsigma_q_theta): Linear(in_features=200, out_features=43, bias=True)
)
Epoch: 1 .. batch: 20/46 .. LR: 0.005 .. KL_theta: 1.33 .. Rec_loss: 2009.18 .. NELBO: 2010.51
Epoch: 1 .. batch: 40/46 .. LR: 0.005 .. KL_theta: 0.72 .. Rec_loss: 1888.6 .. NELBO: 1889.32
****************************************************************************************************
Epoch----->1 .. LR: 0.005 .. KL_theta: 0.64 .. Rec_loss: 1876.19 .. NELBO:

[I 2025-08-18 17:22:42,806] Trial 39 finished with values: [-0.008414693526731562, 0.02558139534883721] and parameters: {'num_topics': 43, 'dropout': 0.5283015047924261, 't_hidden_size': 200, 'activation': 'softplus'}.


model: ETM(
  (t_drop): Dropout(p=0.14693900958923492, inplace=False)
  (theta_act): ReLU()
  (rho): Linear(in_features=300, out_features=23743, bias=False)
  (alphas): Linear(in_features=300, out_features=44, bias=False)
  (q_theta): Sequential(
    (0): Linear(in_features=23743, out_features=50, bias=True)
    (1): ReLU()
    (2): Linear(in_features=50, out_features=50, bias=True)
    (3): ReLU()
  )
  (mu_q_theta): Linear(in_features=50, out_features=44, bias=True)
  (logsigma_q_theta): Linear(in_features=50, out_features=44, bias=True)
)
Epoch: 1 .. batch: 20/46 .. LR: 0.005 .. KL_theta: 0.25 .. Rec_loss: 2006.1 .. NELBO: 2006.35
Epoch: 1 .. batch: 40/46 .. LR: 0.005 .. KL_theta: 0.22 .. Rec_loss: 1886.75 .. NELBO: 1886.97
****************************************************************************************************
Epoch----->1 .. LR: 0.005 .. KL_theta: 0.24 .. Rec_loss: 1874.43 .. NELBO: 1874.67
*******************************************************************************

[I 2025-08-18 17:23:58,747] Trial 40 finished with values: [-0.008259848806871927, 0.025] and parameters: {'num_topics': 44, 'dropout': 0.14693900958923492, 't_hidden_size': 50, 'activation': 'relu'}.


model: ETM(
  (t_drop): Dropout(p=0.11139056710698976, inplace=False)
  (theta_act): Softplus(beta=1.0, threshold=20.0)
  (rho): Linear(in_features=300, out_features=23743, bias=False)
  (alphas): Linear(in_features=300, out_features=44, bias=False)
  (q_theta): Sequential(
    (0): Linear(in_features=23743, out_features=100, bias=True)
    (1): Softplus(beta=1.0, threshold=20.0)
    (2): Linear(in_features=100, out_features=100, bias=True)
    (3): Softplus(beta=1.0, threshold=20.0)
  )
  (mu_q_theta): Linear(in_features=100, out_features=44, bias=True)
  (logsigma_q_theta): Linear(in_features=100, out_features=44, bias=True)
)
Epoch: 1 .. batch: 20/46 .. LR: 0.005 .. KL_theta: 1.11 .. Rec_loss: 2007.1 .. NELBO: 2008.21
Epoch: 1 .. batch: 40/46 .. LR: 0.005 .. KL_theta: 0.67 .. Rec_loss: 1887.23 .. NELBO: 1887.9
****************************************************************************************************
Epoch----->1 .. LR: 0.005 .. KL_theta: 0.61 .. Rec_loss: 1874.85 .. NELBO:

[I 2025-08-18 17:25:12,941] Trial 41 finished with values: [-0.008219068714424911, 0.025] and parameters: {'num_topics': 44, 'dropout': 0.11139056710698976, 't_hidden_size': 100, 'activation': 'softplus'}.


model: ETM(
  (t_drop): Dropout(p=0.3104505269586035, inplace=False)
  (theta_act): ReLU()
  (rho): Linear(in_features=300, out_features=23743, bias=False)
  (alphas): Linear(in_features=300, out_features=24, bias=False)
  (q_theta): Sequential(
    (0): Linear(in_features=23743, out_features=200, bias=True)
    (1): ReLU()
    (2): Linear(in_features=200, out_features=200, bias=True)
    (3): ReLU()
  )
  (mu_q_theta): Linear(in_features=200, out_features=24, bias=True)
  (logsigma_q_theta): Linear(in_features=200, out_features=24, bias=True)
)
Epoch: 1 .. batch: 20/46 .. LR: 0.005 .. KL_theta: 0.3 .. Rec_loss: 2000.95 .. NELBO: 2001.25
Epoch: 1 .. batch: 40/46 .. LR: 0.005 .. KL_theta: 0.47 .. Rec_loss: 1883.29 .. NELBO: 1883.76
****************************************************************************************************
Epoch----->1 .. LR: 0.005 .. KL_theta: 0.48 .. Rec_loss: 1871.25 .. NELBO: 1871.73
***************************************************************************

[I 2025-08-18 17:26:29,992] Trial 42 finished with values: [0.0026266247342269836, 0.2125] and parameters: {'num_topics': 24, 'dropout': 0.3104505269586035, 't_hidden_size': 200, 'activation': 'relu'}.


model: ETM(
  (t_drop): Dropout(p=0.2769868853252398, inplace=False)
  (theta_act): ReLU()
  (rho): Linear(in_features=300, out_features=23743, bias=False)
  (alphas): Linear(in_features=300, out_features=11, bias=False)
  (q_theta): Sequential(
    (0): Linear(in_features=23743, out_features=300, bias=True)
    (1): ReLU()
    (2): Linear(in_features=300, out_features=300, bias=True)
    (3): ReLU()
  )
  (mu_q_theta): Linear(in_features=300, out_features=11, bias=True)
  (logsigma_q_theta): Linear(in_features=300, out_features=11, bias=True)
)
Epoch: 1 .. batch: 20/46 .. LR: 0.005 .. KL_theta: 1.16 .. Rec_loss: 1989.99 .. NELBO: 1991.15
Epoch: 1 .. batch: 40/46 .. LR: 0.005 .. KL_theta: 0.83 .. Rec_loss: 1876.44 .. NELBO: 1877.27
****************************************************************************************************
Epoch----->1 .. LR: 0.005 .. KL_theta: 0.82 .. Rec_loss: 1865.14 .. NELBO: 1865.96
**************************************************************************

[I 2025-08-18 17:27:47,069] Trial 43 finished with values: [0.00755896331352282, 0.45454545454545453] and parameters: {'num_topics': 11, 'dropout': 0.2769868853252398, 't_hidden_size': 300, 'activation': 'relu'}.


model: ETM(
  (t_drop): Dropout(p=0.4565340664619044, inplace=False)
  (theta_act): Softplus(beta=1.0, threshold=20.0)
  (rho): Linear(in_features=300, out_features=23743, bias=False)
  (alphas): Linear(in_features=300, out_features=10, bias=False)
  (q_theta): Sequential(
    (0): Linear(in_features=23743, out_features=100, bias=True)
    (1): Softplus(beta=1.0, threshold=20.0)
    (2): Linear(in_features=100, out_features=100, bias=True)
    (3): Softplus(beta=1.0, threshold=20.0)
  )
  (mu_q_theta): Linear(in_features=100, out_features=10, bias=True)
  (logsigma_q_theta): Linear(in_features=100, out_features=10, bias=True)
)
Epoch: 1 .. batch: 20/46 .. LR: 0.005 .. KL_theta: 1.26 .. Rec_loss: 1992.66 .. NELBO: 1993.92
Epoch: 1 .. batch: 40/46 .. LR: 0.005 .. KL_theta: 0.71 .. Rec_loss: 1878.47 .. NELBO: 1879.18
****************************************************************************************************
Epoch----->1 .. LR: 0.005 .. KL_theta: 0.64 .. Rec_loss: 1866.99 .. NELBO

[I 2025-08-18 17:28:57,021] Trial 44 finished with values: [-0.007972072099875023, 0.1] and parameters: {'num_topics': 10, 'dropout': 0.4565340664619044, 't_hidden_size': 100, 'activation': 'softplus'}.


model: ETM(
  (t_drop): Dropout(p=0.005461920059068204, inplace=False)
  (theta_act): Softplus(beta=1.0, threshold=20.0)
  (rho): Linear(in_features=300, out_features=23743, bias=False)
  (alphas): Linear(in_features=300, out_features=37, bias=False)
  (q_theta): Sequential(
    (0): Linear(in_features=23743, out_features=50, bias=True)
    (1): Softplus(beta=1.0, threshold=20.0)
    (2): Linear(in_features=50, out_features=50, bias=True)
    (3): Softplus(beta=1.0, threshold=20.0)
  )
  (mu_q_theta): Linear(in_features=50, out_features=37, bias=True)
  (logsigma_q_theta): Linear(in_features=50, out_features=37, bias=True)
)
Epoch: 1 .. batch: 20/46 .. LR: 0.005 .. KL_theta: 1.07 .. Rec_loss: 2002.49 .. NELBO: 2003.56
Epoch: 1 .. batch: 40/46 .. LR: 0.005 .. KL_theta: 0.72 .. Rec_loss: 1884.44 .. NELBO: 1885.16
****************************************************************************************************
Epoch----->1 .. LR: 0.005 .. KL_theta: 0.68 .. Rec_loss: 1872.32 .. NELBO: 1

[I 2025-08-18 17:30:09,863] Trial 45 finished with values: [-0.008765935457076166, 0.02972972972972973] and parameters: {'num_topics': 37, 'dropout': 0.005461920059068204, 't_hidden_size': 50, 'activation': 'softplus'}.


model: ETM(
  (t_drop): Dropout(p=0.5425847953641002, inplace=False)
  (theta_act): Sigmoid()
  (rho): Linear(in_features=300, out_features=23743, bias=False)
  (alphas): Linear(in_features=300, out_features=11, bias=False)
  (q_theta): Sequential(
    (0): Linear(in_features=23743, out_features=300, bias=True)
    (1): Sigmoid()
    (2): Linear(in_features=300, out_features=300, bias=True)
    (3): Sigmoid()
  )
  (mu_q_theta): Linear(in_features=300, out_features=11, bias=True)
  (logsigma_q_theta): Linear(in_features=300, out_features=11, bias=True)
)
Epoch: 1 .. batch: 20/46 .. LR: 0.005 .. KL_theta: 1.46 .. Rec_loss: 2001.64 .. NELBO: 2003.1
Epoch: 1 .. batch: 40/46 .. LR: 0.005 .. KL_theta: 0.81 .. Rec_loss: 1882.97 .. NELBO: 1883.78
****************************************************************************************************
Epoch----->1 .. LR: 0.005 .. KL_theta: 0.74 .. Rec_loss: 1870.93 .. NELBO: 1871.67
******************************************************************

[I 2025-08-18 17:31:22,012] Trial 46 finished with values: [-0.008952028691322594, 0.1] and parameters: {'num_topics': 11, 'dropout': 0.5425847953641002, 't_hidden_size': 300, 'activation': 'sigmoid'}.


model: ETM(
  (t_drop): Dropout(p=0.15699856688065783, inplace=False)
  (theta_act): Softplus(beta=1.0, threshold=20.0)
  (rho): Linear(in_features=300, out_features=23743, bias=False)
  (alphas): Linear(in_features=300, out_features=11, bias=False)
  (q_theta): Sequential(
    (0): Linear(in_features=23743, out_features=200, bias=True)
    (1): Softplus(beta=1.0, threshold=20.0)
    (2): Linear(in_features=200, out_features=200, bias=True)
    (3): Softplus(beta=1.0, threshold=20.0)
  )
  (mu_q_theta): Linear(in_features=200, out_features=11, bias=True)
  (logsigma_q_theta): Linear(in_features=200, out_features=11, bias=True)
)
Epoch: 1 .. batch: 20/46 .. LR: 0.005 .. KL_theta: 1.59 .. Rec_loss: 2003.17 .. NELBO: 2004.76
Epoch: 1 .. batch: 40/46 .. LR: 0.005 .. KL_theta: 0.91 .. Rec_loss: 1883.83 .. NELBO: 1884.74
****************************************************************************************************
Epoch----->1 .. LR: 0.005 .. KL_theta: 0.81 .. Rec_loss: 1871.69 .. NELB

[I 2025-08-18 17:32:31,871] Trial 47 finished with values: [-0.007972072099875022, 0.09090909090909091] and parameters: {'num_topics': 11, 'dropout': 0.15699856688065783, 't_hidden_size': 200, 'activation': 'softplus'}.


model: ETM(
  (t_drop): Dropout(p=0.25320981797696485, inplace=False)
  (theta_act): ReLU()
  (rho): Linear(in_features=300, out_features=23743, bias=False)
  (alphas): Linear(in_features=300, out_features=38, bias=False)
  (q_theta): Sequential(
    (0): Linear(in_features=23743, out_features=300, bias=True)
    (1): ReLU()
    (2): Linear(in_features=300, out_features=300, bias=True)
    (3): ReLU()
  )
  (mu_q_theta): Linear(in_features=300, out_features=38, bias=True)
  (logsigma_q_theta): Linear(in_features=300, out_features=38, bias=True)
)
Epoch: 1 .. batch: 20/46 .. LR: 0.005 .. KL_theta: 0.31 .. Rec_loss: 2005.89 .. NELBO: 2006.2
Epoch: 1 .. batch: 40/46 .. LR: 0.005 .. KL_theta: 0.28 .. Rec_loss: 1886.5 .. NELBO: 1886.78
****************************************************************************************************
Epoch----->1 .. LR: 0.005 .. KL_theta: 0.28 .. Rec_loss: 1874.19 .. NELBO: 1874.47
***************************************************************************

[I 2025-08-18 17:33:52,459] Trial 48 finished with values: [-0.00402310855691646, 0.11315789473684211] and parameters: {'num_topics': 38, 'dropout': 0.25320981797696485, 't_hidden_size': 300, 'activation': 'relu'}.


model: ETM(
  (t_drop): Dropout(p=0.447096182596987, inplace=False)
  (theta_act): ReLU()
  (rho): Linear(in_features=300, out_features=23743, bias=False)
  (alphas): Linear(in_features=300, out_features=13, bias=False)
  (q_theta): Sequential(
    (0): Linear(in_features=23743, out_features=300, bias=True)
    (1): ReLU()
    (2): Linear(in_features=300, out_features=300, bias=True)
    (3): ReLU()
  )
  (mu_q_theta): Linear(in_features=300, out_features=13, bias=True)
  (logsigma_q_theta): Linear(in_features=300, out_features=13, bias=True)
)
Epoch: 1 .. batch: 20/46 .. LR: 0.005 .. KL_theta: 0.12 .. Rec_loss: 1993.05 .. NELBO: 1993.17
Epoch: 1 .. batch: 40/46 .. LR: 0.005 .. KL_theta: 0.28 .. Rec_loss: 1879.34 .. NELBO: 1879.62
****************************************************************************************************
Epoch----->1 .. LR: 0.005 .. KL_theta: 0.32 .. Rec_loss: 1867.82 .. NELBO: 1868.14
***************************************************************************

[I 2025-08-18 17:35:06,444] Trial 49 finished with values: [-0.009427572215897437, 0.2076923076923077] and parameters: {'num_topics': 13, 'dropout': 0.447096182596987, 't_hidden_size': 300, 'activation': 'relu'}.


In [95]:
def train_final_ETM_model(params):
    """Train final LDA model with selected parameters"""
    print(f"\nTraining final model with parameters: {params}")

    model = ETM(
        num_topics=params['num_topics'],
        dropout = params['dropout'],
        t_hidden_size=params['t_hidden_size'],
        activation=params['activation'],
        device = 'cuda',
        embeddings_path='./data/chilit-19th-century-averaged-embeddings.txt',
        use_partitions=False,
        num_epochs = 50
    )

    output = model.train_model(dataset)

    # Calculate final metrics
    coherence_metrics = Coherence(texts=dataset.get_corpus(), #list of our documents
                    measure='c_npmi')
    coherence = coherence_metrics.score(output)

    diverisity_metric = TopicDiversity(topk=10) # Initialize metric
    diversity = diverisity_metric.score(output)

    print(f"Final model metrics:")
    print(f"  Coherence: {coherence:.4f}")
    print(f"  Diversity: {diversity:.4f}")

    return model, output, coherence, diversity

In [94]:
ETM_study = optuna.load_study(
    storage=f"sqlite:///{optuna_folder}ETM_Study.db",
    study_name="ETM_Study"
)

In [96]:
if ETM_study.best_trials:
    # Get balanced solution
    pareto_trials = ETM_study.best_trials

    # Pick the first Pareto optimal solution
    selected_params = pareto_trials[0].params
    final_model, final_output, final_coherence, final_diversity = train_final_ETM_model(selected_params)

    print(f"\nFinal model trained successfully!")

    pickle.dump(final_output, open(optuna_folder + "Optuna_ETM_output.pkl", "wb"))



Training final model with parameters: {'num_topics': 11, 'dropout': 0.2769868853252398, 't_hidden_size': 300, 'activation': 'relu'}
model: ETM(
  (t_drop): Dropout(p=0.2769868853252398, inplace=False)
  (theta_act): ReLU()
  (rho): Linear(in_features=300, out_features=23743, bias=False)
  (alphas): Linear(in_features=300, out_features=11, bias=False)
  (q_theta): Sequential(
    (0): Linear(in_features=23743, out_features=300, bias=True)
    (1): ReLU()
    (2): Linear(in_features=300, out_features=300, bias=True)
    (3): ReLU()
  )
  (mu_q_theta): Linear(in_features=300, out_features=11, bias=True)
  (logsigma_q_theta): Linear(in_features=300, out_features=11, bias=True)
)
Epoch: 1 .. batch: 20/46 .. LR: 0.005 .. KL_theta: 0.77 .. Rec_loss: 1990.77 .. NELBO: 1991.54
Epoch: 1 .. batch: 40/46 .. LR: 0.005 .. KL_theta: 0.82 .. Rec_loss: 1876.91 .. NELBO: 1877.73
****************************************************************************************************
Epoch----->1 .. LR: 0.00