In [2]:
from sklearn.neighbors import NearestNeighbors
from sentence_transformers import SentenceTransformer
import pandas as pd
import textwrap
import numpy as np
from scipy.optimize import nnls
from scipy import stats
import plotly.express as px
from sklearn.neighbors import NearestNeighbors
import random

# we instatiate the model first, so we can assign it to multiple copies of the engine
model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2', device='cpu')

  from tqdm.autonotebook import tqdm, trange





In [3]:
# import raw data
df = pd.read_parquet('games.parquet').dropna()
df['Tags'] = df['Tags'].apply(lambda x : x.split(','))
df['Genres'] = df['Genres'].apply(lambda x : x.split(','))
df = df.drop(['game_vector', 'game_indices', 'Score', 'Recommendations'], axis=1)

# extract raw lists
sample_list = df['Tags'].values.tolist()
tag_list = sorted(list(set([x for xs in sample_list for x in xs])))
tag_encoded = [model.encode(x) for x in tag_list]

index = NearestNeighbors(n_neighbors=20, metric='cosine').fit(tag_encoded)

In [39]:
def covariate_tagging(str1, inflection_point):

	print(textwrap.fill(str1, width=120), '\n')

	k = model.encode(str1)

	X = np.hstack([x[:, np.newaxis] for x in tag_encoded])
	# Non-Negative Least Squares (NNLS)
	w, _ = nnls(X, k)

	# filtering
	data = sorted(list(zip(w, tag_list)))[::-1]
	df_ = pd.DataFrame([x for x in data[::-1] if x[0] > 0], columns=['Score', 'Label'])
	scores = df_['Score'].values.tolist()

	# Calculate the first and second derivatives
	first_derivatives = np.diff(scores)
	second_derivatives = np.diff(first_derivatives)

	# Calculate the absolute values of the second derivatives
	abs_second_derivatives = np.abs(second_derivatives)

	inflection_points = sorted(np.argsort(-abs_second_derivatives)[:3])[::-1]
	tags_top = df_.iloc[df_[inflection_points[inflection_point]:].index]['Label'].tolist()
	print([x+1 for x in inflection_points[::-1]])
	print(tags_top)
	
	# print z-score
	fig = px.bar(df_[::-1], x=df_.index, y='Score')
	fig.show()

	# compare it with raw knn: the positions are scattered, we don't just keep the top k ones
	distances, indices = index.kneighbors([model.encode(str1)])
	knn_tags = list()
	for i in indices[0]:
		if tag_list[i] in tags_top:
			knn_tags.append(1)
		else:
			knn_tags.append(0)
	knn_tags

	return df_['Score'].values.tolist(), tags_top, knn_tags

r = random.randint(0, len(df))
print('index:', r)
# r = 4 # if you are following the article, choose r = 4
str1 = df.iloc[r]['About the game']
scores, tags_top, knn_tags = covariate_tagging(str1, inflection_point=2)
print(knn_tags)

Death is lonely. He has zero friends on his FaceTome account and no one to hang out with. So, in order to feel better he
begins “Project Deadlings”. Death buys a factory where he can build his laboratory and begin training a massive army of
zombie minions. As the army of Deadlings grows, the mazes of the laboratory become deadlier, loaded with puzzles and
death-defying traps. Different Deadlings have their own unique abilities: Bonesack is agile - he can run and jump, Creep
can climb on walls and ceilings, Lazybrain treads slowly but carefully and Stencher... well Stencher has gastric
problems so he can use his powerful gas clouds to fly. You will have to combine all of these abilities to find your way
in Death's Maze. Can you help Death to kill his boredom? Will you be able to navigate all 60+ levels available in
Deadlings? Will you complete Project Deadlings, and successfully train all of your zombie minions? Arcade side-scroller
with strategy elements! Four different types of zombie

[1, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0]


### comparison with bart-large-mnli

In [None]:
from transformers import pipeline
classifier = pipeline("zero-shot-classification", model="facebook/bart-large-mnli")

In [44]:
sequence_to_classify = df.iloc[4]['About the game']
candidate_labels = tag_list
classifier(sequence_to_classify, candidate_labels)

{'sequence': "Death is lonely. He has zero friends on his FaceTome account and no one to hang out with. So, in order to feel better he begins “Project Deadlings”. Death buys a factory where he can build his laboratory and begin training a massive army of zombie minions. As the army of Deadlings grows, the mazes of the laboratory become deadlier, loaded with puzzles and death-defying traps. Different Deadlings have their own unique abilities: Bonesack is agile - he can run and jump, Creep can climb on walls and ceilings, Lazybrain treads slowly but carefully and Stencher... well Stencher has gastric problems so he can use his powerful gas clouds to fly. You will have to combine all of these abilities to find your way in Death's Maze. Can you help Death to kill his boredom? Will you be able to navigate all 60+ levels available in Deadlings? Will you complete Project Deadlings, and successfully train all of your zombie minions? Arcade side-scroller with strategy elements! Four different t

In [None]:
from sentence_transformers import SentenceTransformer, util as sbert_util

model = SentenceTransformer(model_name_or_path='claritylab/zero-shot-implicit-bi-encoder')

In [46]:
text = df.iloc[4]['About the game']
labels = tag_list
aspect = 'best tag:' # classification
aspect_sep_token = model.tokenizer.additional_special_tokens[0]
text = f'{aspect} {aspect_sep_token} {text}'

text_embed = model.encode(text)
label_embeds = model.encode(labels)
scores = [round(sbert_util.cos_sim(text_embed, lb_embed).item(), 3) for lb_embed in label_embeds]
sorted(list(zip(scores, labels)))[::-1]

[(0.654, 'Relaxing'),
 (0.564, 'Story Rich'),
 (0.548, 'Dungeons & Dragons'),
 (0.547, 'Adventure'),
 (0.537, 'Immersive Sim'),
 (0.534, 'Family Friendly'),
 (0.532, 'RPGMaker'),
 (0.524, 'Party'),
 (0.521, 'Magic'),
 (0.512, 'Life Sim'),
 (0.503, 'Web Publishing'),
 (0.496, 'Party-Based RPG'),
 (0.496, 'Futuristic'),
 (0.495, 'Survival'),
 (0.494, 'Roguevania'),
 (0.492, 'Conversation'),
 (0.487, 'Realistic'),
 (0.478, 'Supernatural'),
 (0.47, 'JRPG'),
 (0.466, 'Immersive'),
 (0.464, 'Lore-Rich'),
 (0.463, 'Touch-Friendly'),
 (0.462, 'Level Editor'),
 (0.459, 'Grand Strategy'),
 (0.458, 'Experience'),
 (0.456, 'Action'),
 (0.454, 'Job Simulator'),
 (0.452, 'Strategy RPG'),
 (0.447, 'Casual'),
 (0.447, 'Action RTS'),
 (0.444, 'Strategy'),
 (0.443, 'Tactical'),
 (0.443, 'Interactive Fiction'),
 (0.442, 'Warhammer 40K'),
 (0.442, 'Crafting'),
 (0.441, 'Rogue-lite'),
 (0.44, 'MMORPG'),
 (0.434, 'Boss Rush'),
 (0.432, 'Medical Sim'),
 (0.43, 'Colony Sim'),
 (0.429, 'Simulation'),
 (0.429, 

model.safetensors:   0%|          | 0.00/438M [00:00<?, ?B/s]