In [1]:
import mlflow



import sys
sys.path.append('../src')
from config_loader import load_config
from mlflow_setup import setup_mlflow
import pandas as pd
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.decomposition import LatentDirichletAllocation as LDA
from gensim.corpora import Dictionary
from gensim.models import CoherenceModel

–û–±—â–∏–µ —Ñ—É–Ω–∫—Ü–∏–∏

In [2]:
def extract_topics(model, vectorizer, n_words=10):
    feature_names = vectorizer.get_feature_names_out()
    topics = []
    for topic in model.components_:
        top_words_idx = topic.argsort()[:-n_words-1:-1]
        topics.append([feature_names[i] for i in top_words_idx])
    return topics

def calculate_coherence(topics, texts):
    tokenized_texts = [str(text).split() for text in texts]
    coherence_model = CoherenceModel(
        topics=topics,
        texts=tokenized_texts,
        dictionary=Dictionary(tokenized_texts),
        coherence='c_v'
    )
    return coherence_model.get_coherence()


LDA 1

In [5]:
# –ó–∞–≥—Ä—É–∂–∞–µ–º –∫–æ–Ω—Ñ–∏–≥—É—Ä–∞—Ü–∏—é LDA
cfg = load_config("lda_1")

# –£—Å—Ç–∞–Ω–∞–≤–ª–∏–≤–∞–µ–º —ç–∫—Å–ø–µ—Ä–∏–º–µ–Ω—Ç
setup_mlflow(cfg)  

with mlflow.start_run(run_name="lda_topic_model") as run:
    
    # 1. –õ–æ–≥–∏—Ä—É–µ–º –ø–∞—Ä–∞–º–µ—Ç—Ä—ã
    mlflow.log_params({
        "vectorizer_max_features": cfg['vectorizer']['max_features'],
        "vectorizer_ngram_range": str(cfg['vectorizer']['ngram_range']),
        "vectorizer_max_df": cfg['vectorizer']['max_df'],
        "vectorizer_min_df": cfg['vectorizer']['min_df'],
        "lda_n_components": cfg['lda']['n_components'],
        "lda_max_iter": cfg['lda']['max_iter'],
        "lda_learning_method": cfg['lda']['learning_method'],
        "lda_random_state": cfg['lda']['random_state'],
    
    # evaluation —Ç–æ–∂–µ –Ω–∞ –∫–æ—Ä–Ω–µ–≤–æ–º —É—Ä–æ–≤–Ω–µ
    "evaluation_top_words": cfg['evaluation']['top_words_count']
    })
    
    # –ó–∞–≥—Ä—É–∂–∞–µ–º –¥–∞–Ω–Ω—ã–µ
    print(f"–ó–∞–≥—Ä—É–∑–∫–∞ –¥–∞–Ω–Ω—ã—Ö –∏–∑ {cfg['data']['input']['path']}")
    df_processed = pd.read_csv(
        cfg['data']['input']['path'],
        sep=cfg['data']['input']['delimiter'],
        encoding=cfg['data']['input']['encoding']
    )
    
    # –ò–Ω–∏—Ü–∏–∞–ª–∏–∑–∞—Ü–∏—è –∏ –æ–±—É—á–µ–Ω–∏–µ –≤–µ–∫—Ç–æ—Ä–∏–∑–∞—Ç–æ—Ä–∞
    count_vectorizer = CountVectorizer(
        max_features = cfg['vectorizer']['max_features'],
        ngram_range = tuple(cfg['vectorizer']['ngram_range']),
        max_df = cfg['vectorizer']['max_df'],
        min_df = cfg['vectorizer']['min_df'],
    )
    
    texts = df_processed['message_clean_no_stopwords'].fillna('')
    dataset = count_vectorizer.fit_transform(texts)
    
    # –ò–Ω–∏—Ü–∏–∞–ª–∏–∑–∞—Ü–∏—è –∏ –æ–±—É—á–µ–Ω–∏–µ LDA
    lda = LDA( 
        n_components=cfg['lda']['n_components'],
        max_iter=cfg['lda']['max_iter'],    
        learning_method=cfg['lda']['learning_method'],
        evaluate_every=cfg['lda']['evaluate_every'],
        random_state=cfg['lda']['random_state'],
        verbose=1
    )
    
    lda.fit(dataset)
    print(f"LDA –º–æ–¥–µ–ª—å –æ–±—É—á–µ–Ω–∞: {cfg['lda']['n_components']} —Ç–µ–º")
    
    # –ò–∑–≤–ª–µ–∫–∞–µ–º —Ç–µ–º—ã 
    topics = extract_topics(lda, count_vectorizer, cfg['evaluation']['top_words_count'])
    
    # –†–∞—Å—Å—á–∏—Ç—ã–≤–∞–µ–º –º–µ—Ç—Ä–∏–∫–∏
    coherence = calculate_coherence(topics, texts.tolist())
    
    metrics = {
        "perplexity": lda.perplexity(dataset),
        "log_likelihood": lda.score(dataset),
        "coherence": coherence
    }
    
    mlflow.log_metrics(metrics)
    print(f"–ö–æ–≥–µ—Ä–µ–Ω—Ç–Ω–æ—Å—Ç—å –º–æ–¥–µ–ª–∏: {coherence:.4f}")
    print(f"Perplexity: {lda.perplexity(dataset):.2f}")
    
    # –õ–æ–≥–∏—Ä—É–µ–º –º–æ–¥–µ–ª–∏
    mlflow.sklearn.log_model(count_vectorizer, "count_vectorizer")
    mlflow.sklearn.log_model(lda, "lda_model1")
    
    # –í—ã–≤–æ–¥–∏–º –∏ –ª–æ–≥–∏—Ä—É–µ–º —Ç–µ–º—ã
    print("\n–¢–µ–º—ã –º–æ–¥–µ–ª–∏:")
    for idx, topic_words in enumerate(topics, 1):
        print(f"–¢–µ–º–∞ {idx}: {', '.join(topic_words)}")
    
    # –õ–æ–≥–∏—Ä—É–µ–º —Ç–µ–º—ã –∫–∞–∫ –∞—Ä—Ç–µ—Ñ–∞–∫—Ç
    topics_content = "\n".join(
        [f"–¢–µ–º–∞ {idx}: {', '.join(words)}" for idx, words in enumerate(topics, 1)]
    )
    mlflow.log_text(topics_content, "topics.txt")
    
    print(f"LDA –º–æ–¥–µ–ª—å –∑–∞–ª–æ–≥–∏—Ä–æ–≤–∞–Ω–∞ –≤ MLflow. Run ID: {run.info.run_id}")

–ó–∞–≥—Ä—É–∂–∞—é –∫–æ–Ω—Ñ–∏–≥ –∏–∑: ../config/lda_1.yaml
‚úì Tracking URI: http://127.0.0.1:8080
‚úì Experiment: topic_modeling
‚úì –ì–æ—Ç–æ–≤–æ –∫ –∑–∞–ø—É—Å–∫—É –Ω–æ–≤–æ–≥–æ —ç–∫—Å–ø–µ—Ä–∏–º–µ–Ω—Ç–∞
–ó–∞–≥—Ä—É–∑–∫–∞ –¥–∞–Ω–Ω—ã—Ö –∏–∑ ../data/data_processed2.csv
iteration: 1 of max_iter: 50
iteration: 2 of max_iter: 50
iteration: 3 of max_iter: 50
iteration: 4 of max_iter: 50
iteration: 5 of max_iter: 50, perplexity: 11.9279
iteration: 6 of max_iter: 50
iteration: 7 of max_iter: 50
iteration: 8 of max_iter: 50
iteration: 9 of max_iter: 50
iteration: 10 of max_iter: 50, perplexity: 11.5192
iteration: 11 of max_iter: 50
iteration: 12 of max_iter: 50
iteration: 13 of max_iter: 50
iteration: 14 of max_iter: 50
iteration: 15 of max_iter: 50, perplexity: 11.3988
iteration: 16 of max_iter: 50
iteration: 17 of max_iter: 50
iteration: 18 of max_iter: 50
iteration: 19 of max_iter: 50
iteration: 20 of max_iter: 50, perplexity: 11.3897
LDA –º–æ–¥–µ–ª—å –æ–±—É—á–µ–Ω–∞: 7 —Ç–µ–º
–ö–æ–≥–µ—Ä–µ–Ω—Ç–Ω–



Perplexity: 11.39





–¢–µ–º—ã –º–æ–¥–µ–ª–∏:
–¢–µ–º–∞ 1: —É–ª–∏—Ü–∞, –æ—Å—Ç–∞–Ω–æ–≤–∫–∞, –∞–≤—Ç–æ–±—É—Å, –º–∞—Ä—à—Ä—É—Ç, –±—ã—Ç—å, –ø—É—Ç—å, —Ä–∞–±–æ—Ç–∞—Ç—å, —Ü–µ–Ω—Ç—Ä, –¥–≤–∏–∂–µ–Ω–∏–µ, –ª–∏–Ω–∏—è
–¢–µ–º–∞ 2: –¥–≤–∏–∂–µ–Ω–∏–µ, —Ü–µ–Ω—Ç—Ä, —É–ª–∏—Ü–∞, –±—ã—Ç—å, —Ä–∞–±–æ—Ç–∞—Ç—å, –ø—É—Ç—å, –ø–æ–µ–∑–¥–∫–∞, –º–∞—Ä—à—Ä—É—Ç, –∞–≤—Ç–æ–±—É—Å, –ª–∏–Ω–∏—è
–¢–µ–º–∞ 3: –ø–æ–µ–∑–¥–∫–∞, –ø—É—Ç—å, –±—ã—Ç—å, —Ä–∞–±–æ—Ç–∞—Ç—å, –¥–≤–∏–∂–µ–Ω–∏–µ, —Ü–µ–Ω—Ç—Ä, –º–∞—Ä—à—Ä—É—Ç, —É–ª–∏—Ü–∞, –∞–≤—Ç–æ–±—É—Å, –ø–æ–µ–∑–¥
–¢–µ–º–∞ 4: –ø–æ–¥—Å–ª—É—à–∞—Ç—å, –ø—É—Ç—å, –ø–æ–µ–∑–¥, –∞–≤—Ç–æ–±—É—Å, –ª–∏–Ω–∏—è, –¥–≤–∏–∂–µ–Ω–∏–µ, –±—ã—Ç—å, —Ä–∞–±–æ—Ç–∞—Ç—å, –æ—Å—Ç–∞–Ω–æ–≤–∫–∞, –º–∞—Ä—à—Ä—É—Ç
–¢–µ–º–∞ 5: –º–∞—Ä—à—Ä—É—Ç, –±—ã—Ç—å, –∞–≤—Ç–æ–±—É—Å, —Ä–∞–±–æ—Ç–∞—Ç—å, –æ—Å—Ç–∞–Ω–æ–≤–∫–∞, –ø—É—Ç—å, –¥–≤–∏–∂–µ–Ω–∏–µ, —É–ª–∏—Ü–∞, —Ü–µ–Ω—Ç—Ä, –ª–∏–Ω–∏—è
–¢–µ–º–∞ 6: –ø–æ–µ–∑–¥, –¥–≤–∏–∂–µ–Ω–∏–µ, –ø—É—Ç—å, –±—ã—Ç—å, –æ—Å—Ç–∞–Ω–æ–≤–∫–∞, –º–∞—Ä—à—Ä—É—Ç, –ª–∏–Ω–∏—è, –ø–æ–µ–∑–¥–∫–∞, —Ä–∞–±–æ—Ç–∞—Ç—å, –∞–≤—Ç–æ–±—É—Å
–¢–µ–º–∞ 7: –ª–∏–Ω–∏—è, —Ä–∞–±–æ—Ç–∞—Ç—å

LDA 2

In [9]:
# –ó–∞–≥—Ä—É–∂–∞–µ–º –∫–æ–Ω—Ñ–∏–≥
cfg = load_config("lda_2")

# –ù–∞—Å—Ç—Ä–∞–∏–≤–∞–µ–º —ç–∫—Å–ø–µ—Ä–∏–º–µ–Ω—Ç
setup_mlflow(cfg)  

with mlflow.start_run(run_name=f"lda_{cfg['lda']['n_components']}topics") as run:
    
    # –ó–∞–≥—Ä—É–∂–∞–µ–º –∞—Ä—Ç–µ—Ñ–∞–∫—Ç—ã –∏–∑ MLflow
    client = mlflow.tracking.MlflowClient()
    
    # –ó–∞–≥—Ä—É–∂–∞–µ–º –≤–µ–∫—Ç–æ—Ä–∏–∑–∞—Ç–æ—Ä
    vectorizer_path = client.download_artifacts(
        cfg['data']['vectorizer']['run_id'], 
        "count_vectorizer"
    )
    vectorizer = mlflow.sklearn.load_model(vectorizer_path)
    
    # –ó–∞–≥—Ä—É–∂–∞–µ–º –¥–∞–Ω–Ω—ã–µ
    data_path = client.download_artifacts(
        cfg['data']['data']['run_id'], 
        "data/data_processed2/data_processed2.csv"
    )
    df = pd.read_csv(data_path)
    
    # –õ–æ–≥–∏—Ä—É–µ–º –ø–∞—Ä–∞–º–µ—Ç—Ä—ã
    mlflow.log_params({
        "n_topics": cfg['lda']['n_components'],
        "vectorizer_source": cfg['data']['vectorizer']['run_id'],
        "data_source": cfg['data']['data']['run_id'],
        "max_iter": cfg['lda']['max_iter']
    })
    
    # –ü–æ–¥–≥–æ—Ç–∞–≤–ª–∏–≤–∞–µ–º –¥–∞–Ω–Ω—ã–µ
    texts = df['message_clean_no_stopwords'].fillna('')
    dataset = vectorizer.transform(texts)
    
    # –û–±—É—á–∞–µ–º LDA
    lda = LDA(
        n_components=cfg['lda']['n_components'],
        max_iter=cfg['lda']['max_iter'],    
        learning_method=cfg['lda']['learning_method'],
        evaluate_every=cfg['lda']['evaluate_every'],
        random_state=cfg['lda']['random_state'],
        verbose=1,
        n_jobs=-1
    )
    lda.fit(dataset)

    topics = extract_topics(lda, vectorizer, n_words=10)
    
    # –°—á–∏—Ç–∞–µ–º –º–µ—Ç—Ä–∏–∫–∏
    coherence = calculate_coherence(topics, texts.tolist())
    
    mlflow.log_metrics({
        "perplexity": lda.perplexity(dataset),
        "log_likelihood": lda.score(dataset),
        "coherence": coherence
    })
    
    # –°–æ—Ö—Ä–∞–Ω—è–µ–º –º–æ–¥–µ–ª—å
    mlflow.sklearn.log_model(lda, "lda_model2")
    
    # –í—ã–≤–æ–¥–∏–º —Ç–µ–º—ã
    print("\n–¢–µ–º—ã:")
    for i, topic_words in enumerate(topics, 1):
        print(f"–¢–µ–º–∞ {i}: {', '.join(topic_words)}")
    
    # –°–æ—Ö—Ä–∞–Ω—è–µ–º —Ç–µ–º—ã
    with open("topics.txt", "w") as f:
        for i, words in enumerate(topics, 1):
            f.write(f"–¢–µ–º–∞ {i}: {', '.join(words)}\n")
    mlflow.log_artifact("topics.txt")
    
    print(f"LDA –º–æ–¥–µ–ª—å –∑–∞–ª–æ–≥–∏—Ä–æ–≤–∞–Ω–∞ –≤ MLflow. Run ID: {run.info.run_id}")

–ó–∞–≥—Ä—É–∂–∞—é –∫–æ–Ω—Ñ–∏–≥ –∏–∑: ../config/lda_2.yaml
‚úì Tracking URI: http://127.0.0.1:8080
‚úì Experiment: topic_modeling
‚úì –ì–æ—Ç–æ–≤–æ –∫ –∑–∞–ø—É—Å–∫—É –Ω–æ–≤–æ–≥–æ —ç–∫—Å–ø–µ—Ä–∏–º–µ–Ω—Ç–∞


Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/5 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

iteration: 1 of max_iter: 100
iteration: 2 of max_iter: 100
iteration: 3 of max_iter: 100
iteration: 4 of max_iter: 100
iteration: 5 of max_iter: 100, perplexity: 14.2321
iteration: 6 of max_iter: 100
iteration: 7 of max_iter: 100
iteration: 8 of max_iter: 100
iteration: 9 of max_iter: 100
iteration: 10 of max_iter: 100, perplexity: 14.0795
iteration: 11 of max_iter: 100
iteration: 12 of max_iter: 100
iteration: 13 of max_iter: 100
iteration: 14 of max_iter: 100
iteration: 15 of max_iter: 100, perplexity: 14.1199





–¢–µ–º—ã:
–¢–µ–º–∞ 1: –æ—Å—Ç–∞–Ω–æ–≤–∫–∞, –º–∞—Ä—à—Ä—É—Ç, –±—ã—Ç—å, —É–ª–∏—Ü–∞, –∞–≤—Ç–æ–±—É—Å, –ø—É—Ç—å, —Ä–∞–±–æ—Ç–∞—Ç—å, –¥–≤–∏–∂–µ–Ω–∏–µ, –ª–∏–Ω–∏—è, —Ü–µ–Ω—Ç—Ä
–¢–µ–º–∞ 2: –¥–≤–∏–∂–µ–Ω–∏–µ, —É–ª–∏—Ü–∞, –±—ã—Ç—å, —Ü–µ–Ω—Ç—Ä, –º–∞—Ä—à—Ä—É—Ç, –ø–æ–µ–∑–¥–∫–∞, –ø—É—Ç—å, —Ä–∞–±–æ—Ç–∞—Ç—å, –∞–≤—Ç–æ–±—É—Å, –æ—Å—Ç–∞–Ω–æ–≤–∫–∞
–¢–µ–º–∞ 3: –ø–æ–µ–∑–¥–∫–∞, –±—ã—Ç—å, –¥–≤–∏–∂–µ–Ω–∏–µ, —Ü–µ–Ω—Ç—Ä, –º–∞—Ä—à—Ä—É—Ç, –ø—É—Ç—å, –ø–æ–µ–∑–¥, —É–ª–∏—Ü–∞, —Ä–∞–±–æ—Ç–∞—Ç—å, –ª–∏–Ω–∏—è
–¢–µ–º–∞ 4: –¥–≤–∏–∂–µ–Ω–∏–µ, –±—ã—Ç—å, –ø—É—Ç—å, –ø–æ–µ–∑–¥–∫–∞, –º–∞—Ä—à—Ä—É—Ç, –ø–æ–µ–∑–¥, —Ü–µ–Ω—Ç—Ä, –ª–∏–Ω–∏—è, –∞–≤—Ç–æ–±—É—Å, —Ä–∞–±–æ—Ç–∞—Ç—å
–¢–µ–º–∞ 5: –∞–≤—Ç–æ–±—É—Å, –±—ã—Ç—å, —É–ª–∏—Ü–∞, –º–∞—Ä—à—Ä—É—Ç, —Ä–∞–±–æ—Ç–∞—Ç—å, –æ—Å—Ç–∞–Ω–æ–≤–∫–∞, –¥–≤–∏–∂–µ–Ω–∏–µ, –ø—É—Ç—å, –ª–∏–Ω–∏—è, —Ü–µ–Ω—Ç—Ä
–¢–µ–º–∞ 6: –ø—É—Ç—å, –ø–æ–µ–∑–¥, –¥–≤–∏–∂–µ–Ω–∏–µ, –±—ã—Ç—å, –ø–æ–µ–∑–¥–∫–∞, –æ—Å—Ç–∞–Ω–æ–≤–∫–∞, –º–∞—Ä—à—Ä—É—Ç, –ª–∏–Ω–∏—è, –∞–≤—Ç–æ–±—É—Å, —É–ª–∏—Ü–∞
–¢–µ–º–∞ 7: –ª–∏–Ω–∏—è, –±—ã—Ç—å, –º–∞—Ä—à—Ä—É—Ç, –¥–≤–∏–∂–µ–Ω–∏–µ, –

LDA 3 (–Ω–æ–≤—ã–π –≤–µ–∫—Ç–æ—Ä–∏–∑–∞—Ç–æ—Ä + –Ω–æ–≤—ã–π –¥–∞—Ç–∞—Å–µ—Ç)

In [11]:
# –ó–∞–≥—Ä—É–∂–∞–µ–º –∫–æ–Ω—Ñ–∏–≥
cfg = load_config("lda_3")
setup_mlflow(cfg)  

with mlflow.start_run(run_name=f"lda_{cfg['lda']['n_components']}topics_new") as run:
    
    # –ó–∞–≥—Ä—É–∂–∞–µ–º –¥–∞–Ω–Ω—ã–µ
    client = mlflow.tracking.MlflowClient()
    data_path = client.download_artifacts(
        cfg['data']['data_run_id'], 
        "output_data/data_without_stopwords.csv"
    )
    df = pd.read_csv(data_path)
    
    # –í–µ–∫—Ç–æ—Ä–∏–∑–∞—Ç–æ—Ä
    vectorizer = CountVectorizer(
        max_features=cfg['vectorizer']['max_features'], 
        ngram_range=tuple(cfg['vectorizer']['ngram_range']),
        max_df=cfg['vectorizer']['max_df'], 
        min_df=cfg['vectorizer']['min_df']
    )
    
    # –ü–æ–¥–≥–æ—Ç–æ–≤–∫–∞ –¥–∞–Ω–Ω—ã—Ö
    texts = df['message_clean_no_stopwords'].fillna('')
    dataset = vectorizer.fit_transform(texts)
    texts_list = texts.tolist()
    
    # –õ–æ–≥–∏—Ä—É–µ–º –ø–∞—Ä–∞–º–µ—Ç—Ä—ã
    mlflow.log_params({
        "n_topics": cfg['lda']['n_components'],
        "data_source": cfg['data']['data_run_id'],
        "vectorizer_max_features": cfg['vectorizer']['max_features'],
        "vectorizer_ngram_range": str(cfg['vectorizer']['ngram_range']),
        "vectorizer_max_df": cfg['vectorizer']['max_df'],
        "vectorizer_min_df": cfg['vectorizer']['min_df'],
        "lda_max_iter": cfg['lda']['max_iter']
    })
    
    # LDA
    lda = LDA(
        n_components=cfg['lda']['n_components'],
        max_iter=cfg['lda']['max_iter'],    
        learning_method=cfg['lda']['learning_method'],
        evaluate_every=cfg['lda']['evaluate_every'],
        random_state=cfg['lda']['random_state'],
        verbose=1,
        n_jobs=-1
    )
    lda.fit(dataset)

    # –ò–∑–≤–ª–µ–∫–∞–µ–º —Ç–µ–º—ã
    topics = extract_topics(lda, vectorizer, n_words=10)
    
    # –ü—Ä–µ–æ–±—Ä–∞–∑—É–µ–º –±–∏–≥—Ä–∞–º–º—ã –¥–ª—è –∫–æ–≥–µ—Ä–µ–Ω—Ç–Ω–æ—Å—Ç–∏
    valid_topics = []
    for topic in topics:
        words = []
        for phrase in topic:
            words.extend(str(phrase).split())
        # –£–±–∏—Ä–∞–µ–º –¥—É–±–ª–∏–∫–∞—Ç—ã
        unique_words = list(dict.fromkeys(words))
        valid_topics.append(unique_words[:10])
    
    # –ö–æ–≥–µ—Ä–µ–Ω—Ç–Ω–æ—Å—Ç—å
    if len(texts_list) > 2000:
        texts_list = texts_list[:2000]
    
    try:
        coherence = calculate_coherence(valid_topics, texts_list)
    except:
        coherence = 0.0
    
    # –ú–µ—Ç—Ä–∏–∫–∏
    mlflow.log_metrics({
        "perplexity": lda.perplexity(dataset),
        "coherence": coherence,
        "log_likelihood": lda.score(dataset)
    })
    
    # –°–æ—Ö—Ä–∞–Ω—è–µ–º
    mlflow.sklearn.log_model(vectorizer, "vectorizer")
    mlflow.sklearn.log_model(lda, "lda_model")
    
    # –¢–µ–º—ã
    topics_text = "\n".join([f"–¢–µ–º–∞ {i}: {', '.join(words)}" 
                           for i, words in enumerate(topics, 1)])
    mlflow.log_text(topics_text, "topics.txt")
    
    # –†–µ–∑—É–ª—å—Ç–∞—Ç—ã
    print(f"–¢–µ–º—ã: {cfg['lda']['n_components']}")
    print(f"Perplexity: {lda.perplexity(dataset):.2f}")
    print(f"Coherence: {coherence:.4f}")
    print(f"Run ID: {run.info.run_id}")

–ó–∞–≥—Ä—É–∂–∞—é –∫–æ–Ω—Ñ–∏–≥ –∏–∑: ../config/lda_3.yaml
‚úì Tracking URI: http://127.0.0.1:8080
‚úì Experiment: topic_modeling
‚úì –ì–æ—Ç–æ–≤–æ –∫ –∑–∞–ø—É—Å–∫—É –Ω–æ–≤–æ–≥–æ —ç–∫—Å–ø–µ—Ä–∏–º–µ–Ω—Ç–∞


Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

iteration: 1 of max_iter: 100
iteration: 2 of max_iter: 100
iteration: 3 of max_iter: 100
iteration: 4 of max_iter: 100
iteration: 5 of max_iter: 100, perplexity: 424.9296
iteration: 6 of max_iter: 100
iteration: 7 of max_iter: 100
iteration: 8 of max_iter: 100
iteration: 9 of max_iter: 100
iteration: 10 of max_iter: 100, perplexity: 396.5783
iteration: 11 of max_iter: 100
iteration: 12 of max_iter: 100
iteration: 13 of max_iter: 100
iteration: 14 of max_iter: 100
iteration: 15 of max_iter: 100, perplexity: 392.7948
iteration: 16 of max_iter: 100
iteration: 17 of max_iter: 100
iteration: 18 of max_iter: 100
iteration: 19 of max_iter: 100
iteration: 20 of max_iter: 100, perplexity: 392.1191
iteration: 21 of max_iter: 100
iteration: 22 of max_iter: 100
iteration: 23 of max_iter: 100
iteration: 24 of max_iter: 100
iteration: 25 of max_iter: 100, perplexity: 391.8181
iteration: 26 of max_iter: 100
iteration: 27 of max_iter: 100
iteration: 28 of max_iter: 100
iteration: 29 of max_iter: 100




–¢–µ–º—ã: 15
Perplexity: 387.14
Coherence: 0.4703
Run ID: 9c87e1b8b13446ac9d420d611f900312
üèÉ View run lda_15topics_new at: http://127.0.0.1:8080/#/experiments/4/runs/9c87e1b8b13446ac9d420d611f900312
üß™ View experiment at: http://127.0.0.1:8080/#/experiments/4
