In [1]:
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.decomposition import LatentDirichletAllocation
import numpy as np
import pandas as pd

In [2]:
psblog = pd.read_csv("../../../Data/Final Dataset /PSBlog/PSBlog_Final.csv")

print(psblog.head())

print(psblog.shape)
print(psblog.columns)

         Author                         Timestamp  \
0      engobruh  December 13, 2023 at 8:49 am PST   
1     Tim102597  December 13, 2023 at 8:53 am PST   
2    cyberpangu  December 13, 2023 at 8:54 am PST   
3     StingrayX  December 13, 2023 at 8:55 am PST   
4  Prosopopoico  December 13, 2023 at 8:56 am PST   

                                             Comment  \
0                                  Finally, quality.   
1  Anyone else had buzz listing year star command...   
2  I would be happier if it was Ride 5 instead of...   
3                                       Solid month!   
4  Awesome month! But I just purchased GTAV last ...   

                                        Article Link Platform  
0  https://blog.playstation.com/2023/12/13/playst...  PS Plus  
1  https://blog.playstation.com/2023/12/13/playst...  PS Plus  
2  https://blog.playstation.com/2023/12/13/playst...  PS Plus  
3  https://blog.playstation.com/2023/12/13/playst...  PS Plus  
4  https://blog.playstat

In [3]:
psblog_data_clean = psblog.dropna(subset=['Comment'])

vectorizer = CountVectorizer(max_df=0.95, min_df=2, stop_words='english')
data_vectorized = vectorizer.fit_transform(psblog_data_clean['Comment'])

data_vectorized.shape

(13349, 10079)

In [4]:
num_topics = 10
no_top_words = 15
lda_model = LatentDirichletAllocation(n_components=num_topics, random_state=0)
lda_model.fit(data_vectorized)

In [5]:
def display_topics(model, feature_names, no_top_words):
    topic_dict = {}
    for topic_idx, topic in enumerate(model.components_):
        topic_dict[f"Topic {topic_idx}"] = " ".join([feature_names[i] for i in topic.argsort()[:-no_top_words - 1:-1]])
    return topic_dict

In [6]:
topics = display_topics(lda_model, vectorizer.get_feature_names_out(), no_top_words)
topics

{'Topic 0': 'game like play ps4 just ps5 games really hope played new time best people killzone',
 'Topic 1': 'play playstation app thanks wait add game store ps games let edition remote sale digital',
 'Topic 2': 'games game time just sony ps3 ps1 like ps support ps2 great ps5 want update',
 'Topic 3': 'ps4 games 2023 edition star ps5 wars unknown evil strange adventure life mafia definitive game',
 'Topic 4': 'game games vr good looks psvr2 great like really looking forward just play month amazing',
 'Topic 5': 'games ps plus game price extra man sony don premium just spider month like playstation',
 'Topic 6': 'ps5 pc games ps4 sony console just xbox like playstation play ps game make version',
 'Topic 7': 'game games just like controller sony year don buy play playstation psvr2 make physical right',
 'Topic 8': 'game ps5 version new mode like player release just turismo gran ps4 play make edition',
 'Topic 9': 'game free games play just download dead day need version month money dl

In [7]:
topics_df = pd.DataFrame(topics, index=[0])

topics_df

Unnamed: 0,Topic 0,Topic 1,Topic 2,Topic 3,Topic 4,Topic 5,Topic 6,Topic 7,Topic 8,Topic 9
0,game like play ps4 just ps5 games really hope ...,play playstation app thanks wait add game stor...,games game time just sony ps3 ps1 like ps supp...,ps4 games 2023 edition star ps5 wars unknown e...,game games vr good looks psvr2 great like real...,games ps plus game price extra man sony don pr...,ps5 pc games ps4 sony console just xbox like p...,game games just like controller sony year don ...,game ps5 version new mode like player release ...,game free games play just download dead day ne...


In [8]:
topic_df_clean = {
    "Ranking": [f"Topic {i+1}" for i in range(len(topics))],
    "Topic": list(topics.values())
}
topic_df_clean = pd.DataFrame(topic_df_clean)

topic_df_clean

Unnamed: 0,Ranking,Topic
0,Topic 1,game like play ps4 just ps5 games really hope ...
1,Topic 2,play playstation app thanks wait add game stor...
2,Topic 3,games game time just sony ps3 ps1 like ps supp...
3,Topic 4,ps4 games 2023 edition star ps5 wars unknown e...
4,Topic 5,game games vr good looks psvr2 great like real...
5,Topic 6,games ps plus game price extra man sony don pr...
6,Topic 7,ps5 pc games ps4 sony console just xbox like p...
7,Topic 8,game games just like controller sony year don ...
8,Topic 9,game ps5 version new mode like player release ...
9,Topic 10,game free games play just download dead day ne...
