In [1]:
import pandas as pd
import numpy as np
import timeit

from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.feature_extraction.text import TfidfVectorizer

from scipy.sparse import hstack


pd.options.mode.chained_assignment = None  # default='warn'

In [2]:
def reduce_mem_usage(df):
    """ iterate through all the columns of a dataframe and modify the data type
        to reduce memory usage.        
    """
    start_mem = df.memory_usage().sum() / 1024**2
    print('Memory usage of dataframe is {:.2f} MB'.format(start_mem))
    
    for col in df.columns:
        col_type = df[col].dtype
        
        if col_type != object:
            c_min = df[col].min()
            c_max = df[col].max()
            if str(col_type)[:3] == 'int':
                if c_min > np.iinfo(np.int8).min and c_max < np.iinfo(np.int8).max:
                    df[col] = df[col].astype(np.int8)
                elif c_min > np.iinfo(np.int16).min and c_max < np.iinfo(np.int16).max:
                    df[col] = df[col].astype(np.int16)
                elif c_min > np.iinfo(np.int32).min and c_max < np.iinfo(np.int32).max:
                    df[col] = df[col].astype(np.int32)
                elif c_min > np.iinfo(np.int64).min and c_max < np.iinfo(np.int64).max:
                    df[col] = df[col].astype(np.int64)  
            else:
                if c_min > np.finfo(np.float16).min and c_max < np.finfo(np.float16).max:
                    df[col] = df[col].astype(np.float16)
                elif c_min > np.finfo(np.float32).min and c_max < np.finfo(np.float32).max:
                    df[col] = df[col].astype(np.float32)
                else:
                    df[col] = df[col].astype(np.float64)
        else:
            df[col] = df[col].astype('category')

    end_mem = df.memory_usage().sum() / 1024**2
    print('Memory usage after optimization is: {:.2f} MB'.format(end_mem))
    print('Decreased by {:.1f}%'.format(100 * (start_mem - end_mem) / start_mem))
    
    return df


def import_data(file):
    """create a dataframe and optimize its memory usage"""
    df = pd.read_csv(file, parse_dates=True, keep_date_col=True)
    df = reduce_mem_usage(df)
    return df

In [3]:
# Importing the files you want
united_df = pd.read_csv('/kaggle/input/gamify2/newData.csv')

# no=0
# for i in united_df['description']:
#     united_df['description'][no]=str(i)
#     no=no+1
# united_df = import_data('/kaggle/input/rs-final/clean_data.csv')

In [4]:
united_df = united_df[united_df['About the game'].isna() == False]
united_df = united_df[united_df['Tags'].isna() == False]
united_df = united_df.reset_index()
united_df['Genres'] = united_df['Genres'].fillna('')
united_df['Categories'] = united_df['Categories'].fillna('')
united_df['Publishers'] = united_df['Publishers'].fillna('')

In [5]:
united_df['Tags'] = united_df['Tags'].str.replace(',', ', ')
united_df['Genres'] = united_df['Genres'].str.replace(',', ', ')
united_df['Developers'] = united_df['Developers'].str.replace(',', ', ')
united_df['Publishers'] = united_df['Publishers'].str.replace(',', ', ')
united_df['Categories'] = united_df['Categories'].str.replace(',', ', ')

In [6]:
('[NAME]: ' + united_df['Name'] +', [DESC]: ' + united_df['About the game'] + ', [CATEG]: ' + united_df['Categories'].str.join('') + ', [TAGS]: ' + united_df['Tags'].str.join('') + ', [GENRES]: ' + united_df['Genres'].str.join('') + ', [DEVELOPER]:' + united_df['Developers'].str.join('') + ', [PUBLISHER]:' + united_df['Publishers'].str.join('')).loc[0]

"[NAME]: Deadlings: Rotten Edition, [DESC]: Death is lonely. He has zero friends on his FaceTome account and no one to hang out with. So, in order to feel better he begins “Project Deadlings”. Death buys a factory where he can build his laboratory and begin training a massive army of zombie minions. As the army of Deadlings grows, the mazes of the laboratory become deadlier, loaded with puzzles and death-defying traps. Different Deadlings have their own unique abilities: Bonesack is agile - he can run and jump, Creep can climb on walls and ceilings, Lazybrain treads slowly but carefully and Stencher... well Stencher has gastric problems so he can use his powerful gas clouds to fly. You will have to combine all of these abilities to find your way in Death's Maze. Can you help Death to kill his boredom? Will you be able to navigate all 60+ levels available in Deadlings? Will you complete Project Deadlings, and successfully train all of your zombie minions? Arcade side-scroller with strat

In [7]:
united_df['description'] = ('[NAME]: ' + united_df['Name'] +', [DESC]: ' + united_df['About the game'] + ', [CATEG]: ' + united_df['Categories'].str.join('') + ', [TAGS]: ' + united_df['Tags'].str.join('') + ', [GENRES]: ' + united_df['Genres'].str.join('') + ', [DEVELOPER]:' + united_df['Developers'].str.join('') + ', [PUBLISHER]:' + united_df['Publishers'].str.join(''))

In [8]:
united_df.to_csv('newData2', index=False)

In [9]:
united_df.columns

Index(['index', 'AppID', 'Name', 'Release date', 'Estimated owners',
       'Peak CCU', 'Required age', 'Price', 'DLC count', 'About the game',
       'Reviews', 'Metacritic score', 'User score', 'Positive', 'Negative',
       'Score rank', 'Achievements', 'Recommendations',
       'Average playtime forever', 'Average playtime two weeks',
       'Median playtime forever', 'Median playtime two weeks', 'Developers',
       'Publishers', 'Categories', 'Genres', 'Tags', 'description'],
      dtype='object')

In [10]:
united_df.head()

Unnamed: 0,index,AppID,Name,Release date,Estimated owners,Peak CCU,Required age,Price,DLC count,About the game,...,Average playtime forever,Average playtime two weeks,Median playtime forever,Median playtime two weeks,Developers,Publishers,Categories,Genres,Tags,description
0,0,320150,Deadlings: Rotten Edition,"Nov 11, 2014",50000 - 100000,0,0,3.99,0,Death is lonely. He has zero friends on his Fa...,...,703,0,782,0,ONE MORE LEVEL,ONE MORE LEVEL,"Single-player, Steam Achievements, Steam Tradi...","Action, Adventure, Indie","Action, Indie, Adventure, Puzzle-Platformer, A...","[NAME]: Deadlings: Rotten Edition, [DESC]: Dea..."
1,1,1026420,WARSAW,"Oct 2, 2019",20000 - 50000,5,0,23.99,0,Use everything at your disposal to help a team...,...,67,0,93,0,Pixelated Milk,"Pixelated Milk, gaming company","Single-player, Steam Achievements, Steam Tradi...","Indie, RPG","Tactical RPG, Turn-Based Strategy, Wargame, Hi...","[NAME]: WARSAW, [DESC]: Use everything at your..."
2,2,485000,Cthulhu Realms,"Jul 1, 2016",50000 - 100000,0,0,0.0,1,Star Realms has gone insane! Introducing Cthul...,...,224,0,257,0,"Wise Wizard Games, LLC","Wise Wizard Games, LLC","Single-player, Multi-player, Shared/Split Scre...",Strategy,"Card Game, Strategy, Deckbuilding, Lovecraftia...","[NAME]: Cthulhu Realms, [DESC]: Star Realms ha..."
3,3,825930,Royal Battleships,"Apr 6, 2018",20000 - 50000,0,0,2.99,0,Sometimes there is nothing better than a tourn...,...,11,0,11,0,Educational Games,Educational Games,"Single-player, Steam Achievements, Steam Leade...","Casual, Indie","Indie, Casual","[NAME]: Royal Battleships, [DESC]: Sometimes t..."
4,4,22670,Alien Breed 3: Descent,"Nov 17, 2010",200000 - 500000,3,0,9.99,0,Alien Breed™ 3: Descent is the final explosive...,...,44,0,24,0,Team17 Digital Ltd,Team17 Digital Ltd,"Single-player, Multi-player, Co-op, Steam Achi...",Action,"Action, Shooter, Sci-fi, Aliens, Third Person,...","[NAME]: Alien Breed 3: Descent, [DESC]: Alien ..."


In [11]:
!pip install sentence-transformers

Collecting sentence-transformers
  Downloading sentence-transformers-2.2.2.tar.gz (85 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m86.0/86.0 kB[0m [31m4.5 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25ldone
Building wheels for collected packages: sentence-transformers
  Building wheel for sentence-transformers (setup.py) ... [?25ldone
[?25h  Created wheel for sentence-transformers: filename=sentence_transformers-2.2.2-py3-none-any.whl size=125938 sha256=3b83166c960a90a2e630660ae2e4b959db086f4bb5c6bb3e12c01fff88a47277
  Stored in directory: /root/.cache/pip/wheels/83/71/2b/40d17d21937fed496fb99145227eca8f20b4891240ff60c86f
Successfully built sentence-transformers
Installing collected packages: sentence-transformers
Successfully installed sentence-transformers-2.2.2
[0m

In [12]:
from sentence_transformers import SentenceTransformer

start = timeit.default_timer()
model = SentenceTransformer('bert-base-nli-mean-tokens')

Downloading (…)821d1/.gitattributes:   0%|          | 0.00/391 [00:00<?, ?B/s]

Downloading (…)_Pooling/config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

Downloading (…)8d01e821d1/README.md:   0%|          | 0.00/3.95k [00:00<?, ?B/s]

Downloading (…)d1/added_tokens.json:   0%|          | 0.00/2.00 [00:00<?, ?B/s]

Downloading (…)01e821d1/config.json:   0%|          | 0.00/625 [00:00<?, ?B/s]

Downloading (…)ce_transformers.json:   0%|          | 0.00/122 [00:00<?, ?B/s]

Downloading pytorch_model.bin:   0%|          | 0.00/438M [00:00<?, ?B/s]

Downloading (…)nce_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

Downloading (…)821d1/tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

Downloading (…)okenizer_config.json:   0%|          | 0.00/399 [00:00<?, ?B/s]

Downloading (…)8d01e821d1/vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

Downloading (…)1e821d1/modules.json:   0%|          | 0.00/229 [00:00<?, ?B/s]

In [13]:
alls = united_df['description'].tolist()

In [14]:
# reviews_by_user = united_df1.groupby('steamid')['review'].apply(list).reset_index(name='reviews')

In [15]:
# len(reviews_by_user['reviews'][0])

In [16]:
# pytorch library
import torch # the main pytorch library
import torch.nn.functional as f # the sub-library containing different functions for manipulating with tensors

In [17]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [18]:
model = model.eval()
model = model.to(device)

In [19]:
sentence_embeddings = model.encode(alls)

Batches:   0%|          | 0/441 [00:00<?, ?it/s]

In [20]:
numerical_columns = united_df[['Peak CCU']].values
# Calculate L2 norm of array
norm = np.linalg.norm(numerical_columns)

# Normalize array
normalized_data = (numerical_columns / norm)
sentence_embeddings1 = np.hstack((sentence_embeddings, normalized_data))

In [21]:
cosine_sim4 = cosine_similarity(sentence_embeddings,sentence_embeddings)
stop = timeit.default_timer()
print('Time: ', stop - start) 

Time:  84.76605688100005


In [65]:
# The main function that makes recommendations
def get_rec1(stmid, cosine):
#     Determine the index
    ind = united_df[united_df['Name'] == stmid].index.to_list()[0]
    
#     Obtaining cosine convergence by index
    cos_scor = list(enumerate(cosine[ind]))
    
#     Getting the most suitable games
    cos_scor = sorted(cos_scor, key=lambda x: x[1], reverse=True)
    cos_scor = cos_scor[1:15]
    ten_ind = [i[0] for i in cos_scor]
    return united_df[['Name', 'Release date', 'Estimated owners',
       'Peak CCU','About the game',
       'Metacritic score', 'Positive', 'Negative',
       'Average playtime forever', 'Median playtime two weeks', 'Developers',
       'Publishers', 'Categories', 'Genres', 'Tags', 'description']].iloc[ten_ind]
#     return united_df.iloc[ten_ind]

In [23]:
united_df.loc[united_df.Name == "Need for Speed™ Payback"]["About the game"]

461    Set in the underworld of Fortune Valley, you a...
Name: About the game, dtype: object

In [81]:
naz = "Tom Clancy's Splinter Cell Chaos Theory®"
print('Recommendation for', naz, '\n')
print('Recommendations using BERT embeddings and cosine similarity \n')
# print(get_rec1(naz, cosine_sim4).head(20))
get_rec1(naz, cosine_sim4).head(20)

Recommendation for Tom Clancy's Splinter Cell Chaos Theory® 

Recommendations using BERT embeddings and cosine similarity 



Unnamed: 0,Name,Release date,Estimated owners,Peak CCU,About the game,Metacritic score,Positive,Negative,Average playtime forever,Median playtime two weeks,Developers,Publishers,Categories,Genres,Tags,description
2272,Tom Clancy's Splinter Cell®,"Apr 1, 2008",200000 - 500000,29,"Infiltrate terrorists' positions, acquire crit...",91,1929,280,293,0,Ubisoft,Ubisoft,Single-player,Action,"Stealth, Action, Third Person, Singleplayer, C...","[NAME]: Tom Clancy's Splinter Cell®, [DESC]: I..."
518,"Invisible, Inc.","May 12, 2015",500000 - 1000000,33,Take control of Invisible's agents in the fiel...,82,4701,419,215,0,Klei Entertainment,Klei Entertainment,"Single-player, Steam Achievements, Steam Tradi...","Action, Indie, Strategy","Stealth, Turn-Based Strategy, Cyberpunk, Turn-...","[NAME]: Invisible, Inc., [DESC]: Take control ..."
2637,Quantum Replica,"May 31, 2018",0 - 20000,1,"It's 2084, and the world bows to a ruthless co...",0,57,49,11,0,ON3D Studios,PQube,"Single-player, Steam Achievements, Full contro...","Action, Indie","Action, Indie, Cyberpunk, Stealth","[NAME]: Quantum Replica, [DESC]: It's 2084, an..."
3181,The Price of Freedom,"Dec 22, 2016",20000 - 50000,0,Ask not what this country can do for you Janua...,0,360,27,26,0,Construct Studio,Construct Studio Inc.,Single-player,"Adventure, Indie","Adventure, Indie, VR, Story Rich, Atmospheric,...","[NAME]: The Price of Freedom, [DESC]: Ask not ..."
2714,Dark Sector,"Mar 24, 2009",50000 - 100000,2,Dark Sector thrusts players into the role of H...,66,500,127,439,0,Digital Extremes,ND Games,"Single-player, Multi-player, Partial Controlle...",Action,"Action, Third-Person Shooter, Singleplayer, Th...","[NAME]: Dark Sector, [DESC]: Dark Sector thrus..."
5522,The Bureau: XCOM Declassified,"Aug 19, 2013",2000000 - 5000000,27,The year is 1962 and the Cold War has the nati...,66,7863,3377,310,0,2K Marin,2K,"Single-player, Steam Achievements, Full contro...",Action,"Action, Tactical, Sci-fi, Third-Person Shooter...","[NAME]: The Bureau: XCOM Declassified, [DESC]:..."
13981,"Warhammer 40,000: Darktide","Nov 30, 2022",500000 - 1000000,63616,Take back the city of Tertium from hordes of b...,0,4486,2104,409,318,Fatshark,Fatshark,"Multi-player, Co-op, Online Co-op, Steam Achie...","Action, Adventure, Indie","Warhammer 40K, PvE, FPS, Hack and Slash, Onlin...","[NAME]: Warhammer 40,000: Darktide, [DESC]: Ta..."
439,Shot In The Dark,"Jun 10, 2015",20000 - 50000,0,"In a dystopian future, multiple factions have ...",0,78,86,133,0,Technomancy Studios,Technomancy Studios,"Multi-player, Steam Achievements, Steam Tradin...","Action, Free to Play, Indie, Early Access","Early Access, Indie, Free to Play, Action, FPS...","[NAME]: Shot In The Dark, [DESC]: In a dystopi..."
8698,Sniper Elite,"Jul 16, 2009",500000 - 1000000,34,"As World War II draws to a close, the first co...",76,2114,636,356,112,Rebellion,Rebellion,"Single-player, Steam Trading Cards",Action,"Action, Sniper, Stealth, World War II, Third-P...","[NAME]: Sniper Elite, [DESC]: As World War II ..."
794,Phantom Doctrine,"Aug 14, 2018",200000 - 500000,65,Phantom Doctrine is a strategic turn-based esp...,73,2889,971,682,0,CreativeForge Games,Good Shepherd Entertainment,"Single-player, Multi-player, PvP, Online PvP, ...","Action, RPG, Strategy","Turn-Based Tactics, Strategy, Cold War, Stealt...","[NAME]: Phantom Doctrine, [DESC]: Phantom Doct..."


In [77]:
with pd.option_context('display.max_colwidth', None):
    # display the dataframe with the option applied
    print(united_df.description.loc[united_df.Name == "Invisible, Inc."])

518    [NAME]: Invisible, Inc., [DESC]: Take control of Invisible's agents in the field and infiltrate the world's most dangerous corporations. Stealth, precision, and teamwork are essential in high-stakes, high-profit missions, where every move may cost an agent their life. Klei Entertainment, the independent studio behind the hit games Mark of the Ninja and Don't Starve, presents: Invisible, Inc. Key Features Character selection: Start with any of the 10 unlockable agents in the game, plus 6 agent variants, and 6 starting programs to crack corporate security. Deep customization of builds: each play through is different as you create your own strategy using agents, items, augments and programs, and adapt to your surroundings. Randomly generated world: locations, threats, and loot are randomly generated so each playthrough is vastly different and you’ll never get complacent. Choose your own game mode: with 5 different game modes and extensive custom generation options, each player can 

In [79]:
with pd.option_context('display.max_colwidth', None):
    # display the dataframe with the option applied
    print(united_df.description.loc[united_df.Name == "Tom Clancy's Splinter Cell Chaos Theory®"])

9275    [NAME]: Tom Clancy's Splinter Cell Chaos Theory®, [DESC]: The year is 2008. Citywide blackouts ... stock exchange sabotage ... electronic hijacking of national defense systems ... this is information warfare. To prevent these attacks, operatives must infiltrate deep into hostile territory and aggressively collect critical intelligence, closer than ever to enemy soldiers. You are Sam Fisher, the NSA's most elite black-ops agent. To achieve your mission you will kill from close range, attack with your combat knife, shoot with the prototype Land Warrior rifle, and use radical suppression techniques such as the inverted neck break. Also take on cooperative multiplayer infiltration missions, where teamwork is the ultimate weapon. As the enemy evolves, so must you. Deadly agility - Incredibly complete and fluid variety of moves including athletic moves, stealth moves, stealth kills. Real weapons of tomorrow - The most complete arsenal of weapons and gadgets, from the knife to the exp