In [1]:
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.decomposition import LatentDirichletAllocation
import numpy as np
import pandas as pd

In [2]:
metacritic = pd.read_csv("../../../Data/Final Dataset /Metacritic/Metacritic_Final.csv")

print(metacritic.head())

print(metacritic.shape)
print(metacritic.columns)

                 Game Title Console Commenter Name Reviewer Type Comment  \
0           synthetic lover     NaN            NaN        Critic     NaN   
1           synthetic lover     NaN            NaN          User     NaN   
2                 railbreak     NaN            NaN        Critic     NaN   
3                 railbreak     NaN            NaN          User     NaN   
4  mortal kombat 1 quan chi     NaN            NaN        Critic     NaN   

  Date Commented Score Number  Release Date  \
0            NaN          tbd  Dec 22, 2023   
1            NaN          NaN  Dec 22, 2023   
2            NaN          tbd  Dec 22, 2023   
3            NaN          NaN  Dec 22, 2023   
4            NaN          tbd           NaN   

                                         Description  \
0  In the year 2066, biologically engineered huma...   
1  In the year 2066, biologically engineered huma...   
2  Power through hordes of zombies, '90s arcade s...   
3  Power through hordes of zombies, 

In [3]:
metacritic_data_clean = metacritic.dropna(subset=['Comment'])

vectorizer = CountVectorizer(max_df=0.95, min_df=2, stop_words='english')
data_vectorized = vectorizer.fit_transform(metacritic_data_clean['Comment'])

data_vectorized.shape

(2225, 5543)

In [4]:
num_topics = 10
no_top_words = 15
lda_model = LatentDirichletAllocation(n_components=num_topics, random_state=0)
lda_model.fit(data_vectorized)

In [5]:
def display_topics(model, feature_names, no_top_words):
    topic_dict = {}
    for topic_idx, topic in enumerate(model.components_):
        topic_dict[f"Topic {topic_idx}"] = " ".join([feature_names[i] for i in topic.argsort()[:-no_top_words - 1:-1]])
    return topic_dict

In [6]:
topics = display_topics(lda_model, vectorizer.get_feature_names_out(), no_top_words)
topics

{'Topic 0': 'game fun et new plus like world le je est не experience combat qui blood',
 'Topic 1': 'game adventure gameplay fun story great experience like play lot games unique little new just',
 'Topic 2': 'game experience vr play games new gameplay time fun great world just best good way',
 'Topic 3': 'game fun series like experience based games great fans vr good best story genre turn',
 'Topic 4': 'game games time like just fun great good way new better best play don make',
 'Topic 5': 'game es la juego que en el los experience best una 10 fans muy games',
 'Topic 6': 'game story great good really play fun time gameplay like just best characters played amazing',
 'Topic 7': 'game like just best ve games play fun jogo good experience que gameplay content great',
 'Topic 8': 'game like just really little combat gameplay games excellent play world fantasy 10 good better',
 'Topic 9': 'game like games make just new good fun time world puzzle story original series want'}

In [7]:
topics_df = pd.DataFrame(topics, index=[0])

topics_df

Unnamed: 0,Topic 0,Topic 1,Topic 2,Topic 3,Topic 4,Topic 5,Topic 6,Topic 7,Topic 8,Topic 9
0,game fun et new plus like world le je est не e...,game adventure gameplay fun story great experi...,game experience vr play games new gameplay tim...,game fun series like experience based games gr...,game games time like just fun great good way n...,game es la juego que en el los experience best...,game story great good really play fun time gam...,game like just best ve games play fun jogo goo...,game like just really little combat gameplay g...,game like games make just new good fun time wo...


In [8]:
topic_df_clean = {
    "Ranking": [f"Topic {i+1}" for i in range(len(topics))],
    "Topic": list(topics.values())
}
topic_df_clean = pd.DataFrame(topic_df_clean)

topic_df_clean

Unnamed: 0,Ranking,Topic
0,Topic 1,game fun et new plus like world le je est не e...
1,Topic 2,game adventure gameplay fun story great experi...
2,Topic 3,game experience vr play games new gameplay tim...
3,Topic 4,game fun series like experience based games gr...
4,Topic 5,game games time like just fun great good way n...
5,Topic 6,game es la juego que en el los experience best...
6,Topic 7,game story great good really play fun time gam...
7,Topic 8,game like just best ve games play fun jogo goo...
8,Topic 9,game like just really little combat gameplay g...
9,Topic 10,game like games make just new good fun time wo...
