In [1]:
from keybert import KeyBERT
from nltk.tag import pos_tag
from nltk.tokenize import word_tokenize
import pandas as pd
import torch
import os
from tqdm import tqdm, trange
from random import randint
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer

In [2]:
tqdm.pandas()

In [3]:
def morphs(text, noun = True, verb = False, adjective = False, adverb = False):
    tokens = word_tokenize(text)
    poses = pos_tag(tokens, tagset = 'universal')
    filters = []

    if noun:
        filters.append('NOUN')
    if verb:
        filters.append('VERB')
    if adjective:
        filters.append('ADJ')
    if adverb:
        filters.append('ADV')

    return [pos[0] for pos in poses if pos[1] in filters]

In [4]:
models = [
    'all-mpnet-base-v2',
    'multi-qa-mpnet-base-dot-v1',
    'all-distilroberta-v1',
    'all-MiniLM-L12-v2',
    'multi-qa-distilbert-cos-v1'
]

In [5]:
for model_name in models:
    print(f'TRAINING ON {model_name}')

    model = KeyBERT(model_name)
    
    df1 = pd.read_csv('./datasets/original/roblox1.csv', index_col = 0, low_memory = False)
    df2 = pd.read_csv('./datasets/original/roblox2.csv', index_col = 0, low_memory = False)
    df3 = pd.read_csv('./datasets/original/roblox3.csv', index_col = 0, low_memory = False)
    df4 = pd.read_csv('./datasets/original/roblox4.csv', index_col = 0, low_memory = False)
    df5 = pd.read_csv('./datasets/original/roblox5.csv', index_col = 0, low_memory = False)
    df6 = pd.read_csv('./datasets/original/zepeto.csv', index_col = 0, low_memory = False)

    df1['keybert_keywords'] = df1['content'].progress_apply(lambda x : model.extract_keywords(x, top_n = 10))
    df2['keybert_keywords'] = df2['content'].progress_apply(lambda x : model.extract_keywords(x, top_n = 10))
    df3['keybert_keywords'] = df3['content'].progress_apply(lambda x : model.extract_keywords(x, top_n = 10))
    df4['keybert_keywords'] = df4['content'].progress_apply(lambda x : model.extract_keywords(x, top_n = 10))
    df5['keybert_keywords'] = df5['content'].progress_apply(lambda x : model.extract_keywords(x, top_n = 10))
    df6['keybert_keywords'] = df6['content'].progress_apply(lambda x : model.extract_keywords(x, top_n = 10))

    os.makedirs(f'./datasets/keybert-{model_name}')

    df1.reset_index(drop = True).to_csv(f'./datasets/keybert-{model_name}/roblox1.csv')
    df2.reset_index(drop = True).to_csv(f'./datasets/keybert-{model_name}/roblox2.csv')
    df3.reset_index(drop = True).to_csv(f'./datasets/keybert-{model_name}/roblox3.csv')
    df4.reset_index(drop = True).to_csv(f'./datasets/keybert-{model_name}/roblox4.csv')
    df5.reset_index(drop = True).to_csv(f'./datasets/keybert-{model_name}/roblox5.csv')
    df6.reset_index(drop = True).to_csv(f'./datasets/keybert-{model_name}/zepeto.csv')

TRAINING ON all-mpnet-base-v2


  5%|▌         | 5339/100000 [10:29<3:05:53,  8.49it/s]


KeyboardInterrupt: 

In [1]:
import pandas as pd

In [4]:
df = pd.read_csv('./datasets/keybert-distilbert-base-nli-mean-tokens/roblox1.csv', index_col = 0)

In [5]:
for i in range(5):
    print(df.loc[i].content)
    print(df.loc[i].keybert_keywords)
    print()

As a person who has played this game for many years, I love it. There is one issue that keeps repeating itself in multiple games(I'm on mobile). Whenever I try to chat, my keyboard will either not load at all and cause a small lag spike or I get lucky and it does. One other issue I keep having is that I will be running a game smoothly, then out of nowhere it'll freeze for a second then crash. I don't know if it's just me experiencing these issues, but it's making it very difficult to play.
[('keyboard', 0.0893), ('crash', 0.0827), ('games', 0.0636), ('difficult', 0.0385), ('game', 0.0314), ('love', 0.0193), ('lucky', 0.0139), ('mobile', 0.0101), ('played', -0.0043), ('play', -0.025)]

Overall the game is pretty good, but there are a couple of bugs. Like, when I first load the game up, it freezes on the home screen, so I have to reloat it again, which is extremely frustrating at times. Also, whenever I'm playing games like ZO or Evade, or something of the sort, the screen freezes after 

In [2]:
df = pd.read_csv('./datasets/keybert-all-mpnet-base-v2/roblox1.csv', index_col = 0)

In [3]:
for i in range(5):
    print(df.loc[i].content)
    print(df.loc[i].keybert_keywords)
    print()

As a person who has played this game for many years, I love it. There is one issue that keeps repeating itself in multiple games(I'm on mobile). Whenever I try to chat, my keyboard will either not load at all and cause a small lag spike or I get lucky and it does. One other issue I keep having is that I will be running a game smoothly, then out of nowhere it'll freeze for a second then crash. I don't know if it's just me experiencing these issues, but it's making it very difficult to play.
[('lag', 0.4936), ('issues', 0.4356), ('keyboard', 0.3528), ('issue', 0.352), ('crash', 0.305), ('chat', 0.2749), ('game', 0.2671), ('games', 0.2658), ('play', 0.2033), ('freeze', 0.2008)]

Overall the game is pretty good, but there are a couple of bugs. Like, when I first load the game up, it freezes on the home screen, so I have to reloat it again, which is extremely frustrating at times. Also, whenever I'm playing games like ZO or Evade, or something of the sort, the screen freezes after 10-20 min