In [None]:
import pandas as pd
from sklearn.neighbors import NearestNeighbors
from sklearn.preprocessing import MultiLabelBinarizer
from scipy import spatial
from tabulate import tabulate
import time

def get_distance(game1, game2):
    #Take array index 0 to 29 values for genres_similarity since these are genres.
    #List starts with index 0, so we are taking 0 to 29 since 29th will not be taken
    genres_similarity = spatial.distance.cosine(game1[:29], game2[:29])

    #Take 29 to 58 values for categories_similarity since these are categories
    categories_similarity = spatial.distance.cosine(game1[29:58], game2[29:58])

    #steam tags 339
    tags_similarity = spatial.distance.cosine(game1[58:397], game2[58:397])

    #Feel free to use and tweak these custom parameters
    time_engaged_similarity = spatial.distance.cosine(game1[397:407], game2[397:407])
    player_acclaim_similarity = spatial.distance.cosine(game1[407:417], game2[407:417])
    adopters_choice_similarity = spatial.distance.cosine(game1[417:427], game2[417:427])
    game_nexus_similarity = spatial.distance.cosine(game1[427:437], game2[427:437])

    # Apply weights
    weighted_similarity = (0.6 * tags_similarity) + (0.2 * genres_similarity) + (0.1 * categories_similarity) + (0.1 * game_nexus_similarity)
    return weighted_similarity


df = pd.read_csv('formattedSteamGames.csv')

# Splitting categories and genres into lists
df['categories'] = df['categories'].fillna('').str.split(';')
df['genres'] = df['genres'].fillna('').str.split(';')
df['steamspy_tags'] = df['steamspy_tags'].fillna('').str.split(';')

# Creating one-hot encoded vectors for categories and genres
mlb = MultiLabelBinarizer()
categories_encoded = pd.DataFrame(mlb.fit_transform(df['categories']), columns=mlb.classes_, index=df.index)
genres_encoded = pd.DataFrame(mlb.fit_transform(df['genres']), columns=mlb.classes_, index=df.index)
steamspy_tags_encoded = pd.DataFrame(mlb.fit_transform(df['steamspy_tags']), columns=mlb.classes_, index=df.index)
time_engaged_category_encoded = pd.get_dummies(df['time_engaged_category'], prefix='tec', dtype=int)
player_acclaim_category_encoded = pd.get_dummies(df['player_acclaim_category'], prefix='pac', dtype=int)
adopters_choice_category_encoded = pd.get_dummies(df['adopters_choice_category'], prefix='acc', dtype=int)
game_nexus_category_encoded = pd.get_dummies(df['game_nexus_category'], prefix='gnc', dtype=int)

# Concatenating encoded columns with original DataFrame
df_encoded = pd.concat([df['name'], categories_encoded, genres_encoded, steamspy_tags_encoded, time_engaged_category_encoded, player_acclaim_category_encoded, adopters_choice_category_encoded, game_nexus_category_encoded], axis=1)

# Creating feature vectors by combining encoded categories and genres
feature_vectors = pd.concat([categories_encoded, genres_encoded, steamspy_tags_encoded, time_engaged_category_encoded, player_acclaim_category_encoded, adopters_choice_category_encoded, game_nexus_category_encoded], axis=1)

# Instantiate and fit the KNN model
knn_model = NearestNeighbors(n_neighbors=11, metric=get_distance)
knn_model.fit(feature_vectors.values)

# Function to get recommendations for a given game name
def get_recommendations(game_name):
    game_index = df[df['name'] == game_name].index[0]

    distances, indices = knn_model.kneighbors([feature_vectors.iloc[game_index]])
    similar_games = df.iloc[indices[0][1:]]['name'].astype(str).values.tolist()  # Exclude the input game itself

    distancesAndGameNames = [distances[0][1:].tolist(), similar_games]

    return distancesAndGameNames

In [None]:
#Get user input
gameName = input("Enter the game name:")

#Separate spaces with | since it is used in Python's contains funcion as "or" operator
formatedGameName = gameName.replace(" ", "|")
foundGames = df[df['name'].str.contains(formatedGameName, case=False)]['name'].astype(str).values.tolist()[:10]

#Print games which are found
for (idx, game) in enumerate(foundGames, start=1):
    print (str(idx) + " " + game)
print("--------------------------------------------------------------------------")

#Sleep for 0.5 sec to avoid consloe print issues
time.sleep(0.5)

#Get users input. Which game user wants from the list we provided.
selectedGame = input("We have found these games, please enter the number for which one you want to get recommendation:")

#Get recommended games. We are using index of  selectedGame-1 since numeration started from 1, but our foundGames list starts from index 0.
returnedGamesAndDistances = get_recommendations(foundGames[int(selectedGame)-1])

#Format response in table format.
formattedResultList = []
for gameName, gameSimilarity in zip(returnedGamesAndDistances[1], returnedGamesAndDistances[0]):
    formattedResultList.append([gameName, "{:.2f}".format((1 - gameSimilarity) * 100)])

#Print similar games
print("")
print("")
print ("For selected game: " + foundGames[int(selectedGame)-1] + ". We have found these similar games:")
print(tabulate(formattedResultList, headers=['Game Name', 'Similarity  (%)'], tablefmt="outline"))

1 Counter-Strike
2 Counter-Strike: Condition Zero
3 Counter-Strike: Source
4 Counter-Strike: Global Offensive
5 Harvest: Massive Encounter
6 Serious Sam HD: The First Encounter
7 Serious Sam: The Random Encounter
8 Counter-Strike Nexon: Zombies
9 Counter Spell
10 Ghost Encounters: Deadwood - Collector's Edition
--------------------------------------------------------------------------


For selected game: Counter-Strike: Global Offensive. We have found these similar games:
+------------------------------+-------------------+
| Game Name                    |   Similarity  (%) |
| Team Fortress 2              |             74.29 |
| Dirty Bomb®                  |             72.65 |
| Paladins®                    |             71.55 |
| Batla                        |             70.61 |
| Day of Defeat: Source        |             70.43 |
| Quake Live™                  |             69.57 |
| Call of Duty®: Black Ops III |             69.14 |
| Call of Duty®: Black Ops II  |             

In [None]:
from sklearn.metrics.pairwise import cosine_similarity

custom_feature_vectors = pd.concat([categories_encoded, genres_encoded, steamspy_tags_encoded], axis=1)

def calculate_diversity():

    item_similarity = cosine_similarity(custom_feature_vectors)
    diversity = 1 - item_similarity.mean()
    return diversity

diversity_score = calculate_diversity()
print(diversity_score)

0.6263883364972274


In [None]:

# Train a content-based model (e.g., Nearest Neighbors)
custom_feature_vectors1= pd.concat([categories_encoded, genres_encoded, steamspy_tags_encoded, game_nexus_category_encoded], axis=1)
knn_model = NearestNeighbors(n_neighbors=len(custom_feature_vectors1), metric='cosine')
knn_model.fit(custom_feature_vectors1)

# Get nearest neighbors for each item in the catalog
_, indices1 = knn_model.kneighbors(custom_feature_vectors1)

# Calculate the average popularity (uniqueness) of recommended items
novelty_scores = []
for index in indices1:
    novelty_score = 1 / (index + 1)  # Inverse rank as a measure of novelty
    novelty_scores.append(novelty_score)

# Average the novelty scores
average_novelty = sum(novelty_scores) / len(novelty_scores)

print(average_novelty)

KeyboardInterrupt: 

In [None]:
df_encoded

Unnamed: 0,name,Captions available,Co-op,Commentary available,Cross-Platform Multiplayer,Full controller support,In-App Purchases,Includes Source SDK,Includes level editor,Local Co-op,...,gnc_AboveAverage,gnc_Average,gnc_BelowAverage,gnc_Excellent,gnc_Good,gnc_Masterpiece,gnc_Mediocre,gnc_Outstanding,gnc_Subpar,gnc_Unacceptable
0,Counter-Strike,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,1,0,0,0,0
1,Team Fortress Classic,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,1,0,0,0,0
2,Day of Defeat,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,1,0,0,0,0
3,Deathmatch Classic,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,1,0,0,0,0
4,Half-Life: Opposing Force,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,1,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
27070,Room of Pandora,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,1,0,0,0
27071,Cyber Gun,0,0,0,0,0,0,0,0,0,...,0,1,0,0,0,0,0,0,0,0
27072,Super Star Blast,0,1,0,0,1,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
27073,New Yankee 7: Deer Hunters,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,1,0,0,0
