In [17]:
from gensim.corpora import Dictionary
from gensim.models import LdaModel
from nltk.corpus import stopwords
import nltk

# 데이터 셋
documents = [
    "I can't believe we're really doing this, going into the enchanted forest."
    "Yeah, it's been my dream since I was a kid."
    "Your dream? I thought you were afraid of magic."
    "I was, but dreams can change, can't they?"
    "Sure, as long as you're ready for whatever comes next."
    "Are you saying there's something we should worry about?"
    "You can never be too sure, especially when magic is involved."
    "But we have each other, right? That should count for something."
    "Absolutely, friendship is the most powerful magic of all."
    "Just promise me one thing: if things get tough, you won't leave me behind."
    "I promise, not only because of our friendship but also because we're stronger together."
    "Good, because we're about to cross the threshold, and there's no turning back."
    "As we step in, I can feel the energy change. Can you?"
    "Yes, it's like we've just walked into another dimension."
    "Look, there's a clearing ahead. Should we go there?"
    "Might as well, it could be a good place to get our bearings."
    "The clearing is beautiful, full of luminescent plants and mysterious creatures."
    "It's enchanting, but let's not forget why we're here."
    "Right, we need to find the Heart of the Forest. It's said to grant a single wish to those who find it."
    "The wish can be anything?"
    "Yes, but it comes with a price, and we must be willing to pay it."
    "Well, if it's for a good cause, I'm willing to take the risk."
    "Me too, but let's be cautious. The forest is full of tricks and illusions."
    "Agreed, sticking together is our best chance of finding it and making it out alive."
    "Suddenly, a soft voice echoes, 'Who dares to seek the Heart of the Forest?'"
    "It must be the Guardian. What should we say?"
    "Let's be honest. We seek it to bring balance to our world."
    "The Guardian seems pleased. 'Very well, you may proceed. But remember, the Heart will test you both.'"
    "We nod, knowing the real journey has just begun."
    "Our hands tighten around each other's. Whatever comes next, we're ready."
]

# 전처리
nltk.download('stopwords')
stop_words = set(stopwords.words('english'))

tokenized_data = []
for doc in documents:
    tokens = doc.lower().split()
    filtered_tokens = [word for word in tokens if word not in stop_words]
    tokenized_data.append(filtered_tokens)

# 사전과 코퍼스 생성
dictionary = Dictionary(tokenized_data)
corpus = [dictionary.doc2bow(doc) for doc in tokenized_data]

# LDA 모델 학습 😎
lda_model = LdaModel(corpus=corpus, id2word=dictionary, num_topics=2)

# 토픽 출력 😊
topics = lda_model.print_topics()
for topic in topics:
    print('-' * 20)
    print(f"Topic {topic[0]}: {topic[1]}")

--------------------
Topic 0: 0.014*"we're" + 0.013*"heart" + 0.011*"there's" + 0.011*"comes" + 0.010*"must" + 0.010*"get" + 0.010*"clearing" + 0.009*"willing" + 0.009*"wish" + 0.009*"can't"
--------------------
Topic 1: 0.022*"we're" + 0.013*"comes" + 0.013*"there's" + 0.012*"heart" + 0.011*"seek" + 0.010*"good" + 0.010*"magic" + 0.010*"well," + 0.010*"let's" + 0.010*"full"


[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\lch85\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [37]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.decomposition import LatentDirichletAllocation
from nltk.corpus import stopwords
import nltk

# 데이터 셋
documents = [
    "I can't believe we're really doing this, going into the enchanted forest.",
    "Yeah, it's been my dream since I was a kid.",
    "Your dream? I thought you were afraid of magic.",
    "I was, but dreams can change, can't they?",
    "Sure, as long as you're ready for whatever comes next.",
    "Are you saying there's something we should worry about?",
    "You can never be too sure, especially when magic is involved.",
    "But we have each other, right? That should count for something.",
    "Absolutely, friendship is the most powerful magic of all.",
    "Just promise me one thing: if things get tough, you won't leave me behind.",
    "I promise, not only because of our friendship but also because we're stronger together.",
    "Good, because we're about to cross the threshold, and there's no turning back.",
    "As we step in, I can feel the energy change. Can you?",
    "Yes, it's like we've just walked into another dimension.",
    "Look, there's a clearing ahead. Should we go there?",
    "Might as well, it could be a good place to get our bearings.",
    "The clearing is beautiful, full of luminescent plants and mysterious creatures.",
    "It's enchanting, but let's not forget why we're here.",
    "Right, we need to find the Heart of the Forest. It's said to grant a single wish to those who find it.",
    "The wish can be anything?",
    "Yes, but it comes with a price, and we must be willing to pay it.",
    "Well, if it's for a good cause, I'm willing to take the risk.",
    "Me too, but let's be cautious. The forest is full of tricks and illusions.",
    "Agreed, sticking together is our best chance of finding it and making it out alive.",
    "Suddenly, a soft voice echoes, 'Who dares to seek the Heart of the Forest?'",
    "It must be the Guardian. What should we say?",
    "Let's be honest. We seek it to bring balance to our world.",
    "The Guardian seems pleased. 'Very well, you may proceed. But remember, the Heart will test you both.'",
    "We nod, knowing the real journey has just begun.",
    "Our hands tighten around each other's. Whatever comes next, we're ready."
]

# 전처리
nltk.download('stopwords')
stop_words = set(stopwords.words('english'))

documents = [ ' '.join([word for word in document.lower().split() if word not in stop_words]) for document in documents]

# TF-IDF 벡터화
vectorizer = TfidfVectorizer()
X = vectorizer.fit_transform(documents)

# LDA 모델링
lda = LatentDirichletAllocation(n_components=5, random_state=42)
lda.fit(X)

# 토픽 모델 평가
topic_words = lda.components_.argsort(axis=1)[:, ::-1]
for i in range(lda.n_components):
    top_words = [vectorizer.get_feature_names_out()[j] for j in topic_words[i, :5]]
    print(f"Topic {i}: {', '.join(top_words)}")

Topic 0: there, all, absolutely, powerful, dreams
Topic 1: well, wish, something, willing, anything
Topic 2: dream, let, say, guardian, must
Topic 3: promise, especially, never, involved, also
Topic 4: re, we, here, forget, enchanting


[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\lch85\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [48]:
from sklearn.feature_extraction.text import TfidfVectorizer
import pandas as pd

documents2 = [
    "I can't believe we're really doing this, going into the enchanted forest."
    "Yeah, it's been my dream since I was a kid."
    "Your dream? I thought you were afraid of magic."
    "I was, but dreams can change, can't they?"
    "Sure, as long as you're ready for whatever comes next."
    "Are you saying there's something we should worry about?"
    "You can never be too sure, especially when magic is involved."
    "But we have each other, right? That should count for something."
    "Absolutely, friendship is the most powerful magic of all."
    "Just promise me one thing: if things get tough, you won't leave me behind."
    "I promise, not only because of our friendship but also because we're stronger together."
    "Good, because we're about to cross the threshold, and there's no turning back."
    "As we step in, I can feel the energy change. Can you?"
    "Yes, it's like we've just walked into another dimension."
    "Look, there's a clearing ahead. Should we go there?"
    "Might as well, it could be a good place to get our bearings."
    "The clearing is beautiful, full of luminescent plants and mysterious creatures."
    "It's enchanting, but let's not forget why we're here."
    "Right, we need to find the Heart of the Forest. It's said to grant a single wish to those who find it."
    "The wish can be anything?"
    "Yes, but it comes with a price, and we must be willing to pay it."
    "Well, if it's for a good cause, I'm willing to take the risk."
    "Me too, but let's be cautious. The forest is full of tricks and illusions."
    "Agreed, sticking together is our best chance of finding it and making it out alive."
    "Suddenly, a soft voice echoes, 'Who dares to seek the Heart of the Forest?'"
    "It must be the Guardian. What should we say?"
    "Let's be honest. We seek it to bring balance to our world."
    "The Guardian seems pleased. 'Very well, you may proceed. But remember, the Heart will test you both.'"
    "We nod, knowing the real journey has just begun."
    "Our hands tighten around each other's. Whatever comes next, we're ready."
]

vectorizer = TfidfVectorizer(stop_words='english')
X = vectorizer.fit_transform(documents2)
feature_names = vectorizer.get_feature_names_out()

dense = X.todense()
denselist = dense.tolist()
df = pd.DataFrame(denselist, columns=feature_names)
for i, row in df.iterrows():
    top_five = row.nlargest(10)
    print(f"문장 {i+1}의 상위 5 TF-IDF 단어:")
    print(top_five)
    print("-----")

문장 1의 상위 5 TF-IDF 단어:
forest      0.278693
comes       0.209020
good        0.209020
heart       0.209020
just        0.209020
let         0.209020
magic       0.209020
change      0.139347
clearing    0.139347
dream       0.139347
Name: 0, dtype: float64
-----


In [44]:
from nltk import ngrams

documents2 = [
    "I can't believe we're really doing this, going into the enchanted forest."
    "Yeah, it's been my dream since I was a kid."
    "Your dream? I thought you were afraid of magic."
    "I was, but dreams can change, can't they?"
    "Sure, as long as you're ready for whatever comes next."
    "Are you saying there's something we should worry about?"
    "You can never be too sure, especially when magic is involved."
    "But we have each other, right? That should count for something."
    "Absolutely, friendship is the most powerful magic of all."
    "Just promise me one thing: if things get tough, you won't leave me behind."
    "I promise, not only because of our friendship but also because we're stronger together."
    "Good, because we're about to cross the threshold, and there's no turning back."
    "As we step in, I can feel the energy change. Can you?"
    "Yes, it's like we've just walked into another dimension."
    "Look, there's a clearing ahead. Should we go there?"
    "Might as well, it could be a good place to get our bearings."
    "The clearing is beautiful, full of luminescent plants and mysterious creatures."
    "It's enchanting, but let's not forget why we're here."
    "Right, we need to find the Heart of the Forest. It's said to grant a single wish to those who find it."
    "The wish can be anything?"
    "Yes, but it comes with a price, and we must be willing to pay it."
    "Well, if it's for a good cause, I'm willing to take the risk."
    "Me too, but let's be cautious. The forest is full of tricks and illusions."
    "Agreed, sticking together is our best chance of finding it and making it out alive."
    "Suddenly, a soft voice echoes, 'Who dares to seek the Heart of the Forest?'"
    "It must be the Guardian. What should we say?"
    "Let's be honest. We seek it to bring balance to our world."
    "The Guardian seems pleased. 'Very well, you may proceed. But remember, the Heart will test you both.'"
    "We nod, knowing the real journey has just begun."
    "Our hands tighten around each other's. Whatever comes next, we're ready."
]

# 모든 문장에 대해 N-gram 분석 수행
for i, sentence in enumerate(documents2):
    print(f"--- 문장 {i+1}의 N-grams ---")
    n = 2  # bigram
    bigrams = ngrams(sentence.split(), n)
    for grams in bigrams:
        print(grams)

--- 문장 1의 N-grams ---
('I', "can't")
("can't", 'believe')
('believe', "we're")
("we're", 'really')
('really', 'doing')
('doing', 'this,')
('this,', 'going')
('going', 'into')
('into', 'the')
('the', 'enchanted')
('enchanted', 'forest.Yeah,')
('forest.Yeah,', "it's")
("it's", 'been')
('been', 'my')
('my', 'dream')
('dream', 'since')
('since', 'I')
('I', 'was')
('was', 'a')
('a', 'kid.Your')
('kid.Your', 'dream?')
('dream?', 'I')
('I', 'thought')
('thought', 'you')
('you', 'were')
('were', 'afraid')
('afraid', 'of')
('of', 'magic.I')
('magic.I', 'was,')
('was,', 'but')
('but', 'dreams')
('dreams', 'can')
('can', 'change,')
('change,', "can't")
("can't", 'they?Sure,')
('they?Sure,', 'as')
('as', 'long')
('long', 'as')
('as', "you're")
("you're", 'ready')
('ready', 'for')
('for', 'whatever')
('whatever', 'comes')
('comes', 'next.Are')
('next.Are', 'you')
('you', 'saying')
('saying', "there's")
("there's", 'something')
('something', 'we')
('we', 'should')
('should', 'worry')
('worry', 'abou

In [42]:
from gensim.corpora import Dictionary
from gensim.models import LdaMulticore

# 문장을 단어로 분리합니다.
texts = [doc.lower().split() for doc in documents]

dictionary = Dictionary(texts)
corpus = [dictionary.doc2bow(text) for text in texts]

lda = LdaMulticore(corpus, id2word=dictionary, num_topics=5, chunksize=5)
topics = lda.print_topics()
for topic in topics:
    print('-' * 20)
    print(f"Topic {topic[0]}: {topic[1]}")

--------------------
Topic 0: 0.020*"we're" + 0.020*"heart" + 0.020*"seek" + 0.020*"suddenly," + 0.020*"forest?'" + 0.020*"dares" + 0.020*"sticking" + 0.020*"voice" + 0.020*"echoes," + 0.020*"soft"
--------------------
Topic 1: 0.034*"we're" + 0.034*"there's" + 0.020*"comes" + 0.018*"whatever" + 0.018*"let's" + 0.018*"around" + 0.018*"ready." + 0.018*"other's." + 0.018*"hands" + 0.018*"threshold,"
--------------------
Topic 2: 0.035*"find" + 0.019*"wish" + 0.019*"heart" + 0.019*"it." + 0.019*"single" + 0.019*"good" + 0.019*"forest." + 0.019*"said" + 0.019*"right," + 0.019*"need"
--------------------
Topic 3: 0.027*"full" + 0.027*"clearing" + 0.015*"let's" + 0.015*"there's" + 0.015*"yes," + 0.015*"things" + 0.015*"comes" + 0.015*"sure," + 0.015*"one" + 0.015*"get"
--------------------
Topic 4: 0.028*"we're" + 0.027*"well," + 0.027*"friendship" + 0.026*"willing" + 0.015*"can't" + 0.015*"heart" + 0.015*"pleased." + 0.015*"remember," + 0.015*"guardian" + 0.015*"forest."


In [55]:
test1 = (
    "I can't believe we're really doing this, going into the enchanted forest."
    "Yeah, it's been my dream since I was a kid."
    "Your dream? I thought you were afraid of magic."
    "I was, but dreams can change, can't they?"
    "Sure, as long as you're ready for whatever comes next."
    "Are you saying there's something we should worry about?"
    "You can never be too sure, especially when magic is involved."
    "But we have each other, right? That should count for something."
    "Absolutely, friendship is the most powerful magic of all."
    "Just promise me one thing: if things get tough, you won't leave me behind."
    "I promise, not only because of our friendship but also because we're stronger together."
    "Good, because we're about to cross the threshold, and there's no turning back."
    "As we step in, I can feel the energy change. Can you?"
    "Yes, it's like we've just walked into another dimension."
    "Look, there's a clearing ahead. Should we go there?"
    "Might as well, it could be a good place to get our bearings."
    "The clearing is beautiful, full of luminescent plants and mysterious creatures."
    "It's enchanting, but let's not forget why we're here."
    "Right, we need to find the Heart of the Forest. It's said to grant a single wish to those who find it."
    "The wish can be anything?"
    "Yes, but it comes with a price, and we must be willing to pay it."
    "Well, if it's for a good cause, I'm willing to take the risk."
    "Me too, but let's be cautious. The forest is full of tricks and illusions."
    "Agreed, sticking together is our best chance of finding it and making it out alive."
    "Suddenly, a soft voice echoes, 'Who dares to seek the Heart of the Forest?'"
    "It must be the Guardian. What should we say?"
    "Let's be honest. We seek it to bring balance to our world."
    "The Guardian seems pleased. 'Very well, you may proceed. But remember, the Heart will test you both.'"
    "We nod, knowing the real journey has just begun."
    "Our hands tighten around each other's. Whatever comes next, we're ready."
)



In [56]:
from keybert import KeyBERT


In [57]:
model = KeyBERT()

keywords = model.extract_keywords(test1, keyphrase_ngram_range=(1, 2), stop_words=None)
print(keywords)

[('enchanted forest', 0.3829), ('absolutely friendship', 0.3753), ('the enchanted', 0.3745), ('powerful magic', 0.3717), ('enchanted', 0.368)]
