In [1]:
import urllib.request 
import re
import pandas as pd
from elasticsearch import Elasticsearch

In [None]:
import requests
# run bin/elasticsearch in elasticsearch directory in terminal before running this cell
res = requests.get('http://localhost:9200')

In [None]:
es = Elasticsearch([{'host': 'localhost', 'port': 9200}])

In [None]:
def test_ES(es):
    return es.ping()  # got True

In [None]:
if test_ES(es):
    print('ES instance working')
else:
    print('ES instance not working')

In [None]:
def index_info(index_name):
    count, deleted, shards, =  es.cat.indices(index=index_name, h=['docs.count', 'docs.deleted', 'pri'])[:-1].split(' ')
    print(
      """
      #### INDEX INFO #####
      index_name = {}
      doc_count = {}
      shard_count = {}
      deleted_doc_count = {}
      """.format(index_name, count, shards, deleted)
  )

In [2]:
# read data from steam data directory 
games = pd.read_pickle('final_data_raw.pickle')

In [3]:
game_meta = pd.read_csv('steamspy_cleaned.csv')

In [4]:
games['release_date'] = pd.to_datetime(games.release_date)

In [11]:
_games = games.set_index('steam_appid')

In [9]:
_game_meta = game_meta.set_index('appid')

In [13]:
merged_df = _games.merge(_game_meta.iloc[:,[-1,-2]], left_index=True, right_index=True)

In [17]:
merged_df.reset_index(inplace=True)

In [25]:
merged_df.rename(columns={'index':'steam_appid'},inplace=True)

In [26]:
merged_df

Unnamed: 0,steam_appid,type,name,about_the_game,short_description,developers,publishers,categories,genres,release_date,popularity,rating
0,10,game,Counter-Strike,Play the world's number 1 online action game. ...,Play the world's number 1 online action game. ...,[Valve],[Valve],"[Multi-player, PvP, Online PvP, Shared/Split S...",[Action],2000-01-11,0.818182,175211
1,20,game,Team Fortress Classic,One of the most popular online action games of...,One of the most popular online action games of...,[Valve],[Valve],"[Multi-player, PvP, Online PvP, Shared/Split S...",[Action],1999-01-04,0.636364,4186
2,30,game,Day of Defeat,Enlist in an intense brand of Axis vs. Allied ...,Enlist in an intense brand of Axis vs. Allied ...,[Valve],[Valve],"[Multi-player, Valve Anti-Cheat enabled]",[Action],2003-01-05,0.727273,4233
3,40,game,Deathmatch Classic,Enjoy fast-paced multiplayer gaming with Death...,Enjoy fast-paced multiplayer gaming with Death...,[Valve],[Valve],"[Multi-player, PvP, Online PvP, Shared/Split S...",[Action],2001-01-06,0.727273,1372
4,50,game,Half-Life: Opposing Force,Return to the Black Mesa Research Facility as ...,Return to the Black Mesa Research Facility as ...,[Gearbox Software],[Valve],"[Single-player, Multi-player, Valve Anti-Cheat...",[Action],1999-01-11,0.727273,11073
...,...,...,...,...,...,...,...,...,...,...,...,...
49412,1691700,game,Football tournament,Football has united all the countries of the w...,Football has united all the countries of the w...,[Phoenixxx Games],[Phoenixxx Games],[Single-player],"[Casual, Indie, Simulation, Sports]",2021-02-07,0.000000,0
49413,1691760,game,Survival Escape Room,"Welcome to Survival Escape Room, You've been k...",You've been kidnapped and you only have so muc...,[Braden Frye],[Frye Games],[Single-player],[Indie],2021-07-23,0.000000,0
49414,1692480,game,Cavern Commandos,Inspired by classic platformers like Rick Dang...,Inspired by classic platformers like Rick Dang...,[Soiree Games],[Soiree Games],"[Single-player, Full controller support]","[Action, Indie]",2021-06-07,0.000000,0
49415,1692580,game,Hidden Shapes Black Skull - Jigsaw Puzzle Game,Disconnect yourself from the world with this g...,Relax with beautiful hand-drawn puzzles inspir...,[YAW Studios],[YAW Studios],"[Single-player, Steam Achievements]","[Casual, Indie]",2021-06-08,0.000000,0


In [33]:
merged_df.values[:,[0,2,3,4,5,6,7,8,9,10,11]]

array([[10, 'Counter-Strike',
        "Play the world's number 1 online action game. Engage in an incredibly realistic brand of terrorist warfare in this wildly popular team-based game. Ally with teammates to complete strategic missions. Take out enemy sites. Rescue hostages. Your role affects your team's success. Your team's success affects your role.",
        ..., Timestamp('2000-01-11 00:00:00'), 0.8181818181818182,
        175211],
       [20, 'Team Fortress Classic',
        'One of the most popular online action games of all time, Team Fortress Classic features over nine character classes -- from Medic to Spy to Demolition Man -- enlisted in a unique style of online team warfare. Each character class possesses unique weapons, items, and abilities, as teams compete online in a variety of game play modes.',
        ..., Timestamp('1999-01-04 00:00:00'), 0.6363636363636364, 4186],
       [30, 'Day of Defeat',
        'Enlist in an intense brand of Axis vs. Allied teamplay set in th

In [None]:
#es.indices.delete(index='test-index')

In [None]:
# set index name
index_name = 'steam_index_bm25'
# BM25 (Default index)
request_body_bm25 = {
      'settings': {
            'number_of_shards': 1,
            'number_of_replicas': 1
            },
      'mappings': {
            'properties': {
                  'app_id':{'type': 'integer'},
                  'name': {'type': 'text'},
                  'short_description': {
                        'type': 'text',
                        'analyzer': 'english'
                        },
                  'about_the_game': {
                        'type': 'text',
                        'analyzer': 'english'
                        },
                  'developers': {
                        "type": "text",
                        "fields": {
                              "raw": {
                                    "type": "keyword"
                                    }
                              }
                        },
                  'publishers': {
                        "type": "text",
                        "fields": {
                              "raw": {
                                    "type": "keyword"
                                    }
                              }
                        },
                  'categories': {
                        "type": "text",
                        "fields": {
                              "raw": {
                                    "type": "keyword"
                                    }
                              }
                        },
                  'genres': {
                        "type": "text",
                        "fields": {
                              "raw": {
                                    "type": "keyword"
                                    }
                              }
                        },
                  'release_date': {'type': 'date'},
                  'popularity':{'type':'float'},
                  'rating':{'type':'integer'}
                  }
            }
      }

try:
    es.indices.get(index_name)
    print('index {} already exists'.format(index_name))
except:
    print('creating index {}'.format(index_name))
    es.indices.create(index_name, body=request_body_bm25)
    print('{} index created successfully'.format(index_name))

In [None]:
for i, (appid, name, about_the_game, short_description,
       developers, publishers, categories, 
       genres, release_date, popularity, rating) in enumerate(merged_df.values[:,[0,2,3,4,5,6,7,8,9,10,11]]):
    doc_body = {
              'app_id': app_id,
              'title': name,
              'short_description': short_description,
              'about_the_game': about_the_game,
              'developers': developers,
              'publishers': publishers,
              'categories': categories,
              'genres': genres,
              'release_date': release_date,
              'popularity': popularity,
              'rating': rating
              }
    es.index(index_name, doc_body, id=i)
print('{} index populated successfully'.format(index_name))

In [None]:
print('we have made and index called {} with {} documents'.format(index_name, es.cat.count(index=index_name,h=['count'])))
index_info(index_name)

In [None]:
index_name = 'steam_index_dfr'
#DFR index
request_body_dfr = {
    'settings': {
        'number_of_shards': 1,
        'number_of_replicas': 1,
        'index': {'similarity': {
            'dfr_similarity': {
                'type': 'DFR',
                'basic_model': 'g',
                'after_effect': 'l',
                'normalization': 'h2',
                'normalization.h2.c':'3.0'
                }
            }
        }
    },
    'mappings': {
        'properties': {
            'app_id':{'type': 'integer'},
            'title': {
                'type': 'text',
                'similarity': 'dfr_similarity'
                },
            'short_description': {
                'type': 'text',
                'analyzer': 'english',
                'similarity': 'dfr_similarity'
                },
            'about_the_game': {
                'type': 'text',
                'analyzer': 'english',
                'similarity': 'dfr_similarity'
                },
            'developer': {
                "type": "text",
                'similarity': 'dfr_similarity',
                "fields": {
                    "raw": {
                        "type": "keyword"
                        }
                    }
                },
            'publisher': {
                "type": "text",
                'similarity': 'dfr_similarity',
                "fields": {
                    "raw": {
                        "type": "keyword"
                        }
                    }
                },
            'categories': {
                "type": "text",
                'similarity': 'dfr_similarity',
                "fields": {
                    "raw": {
                        "type": "keyword"
                        }
                    }
                },
            'genres': {
                "type": "text",
                'similarity': 'dfr_similarity',
                "fields": {
                    "raw": {
                        "type": "keyword"
                        }
                    }
                },
            'release_date': {'type': 'date'},
            'popularity':{'type':'float'},
            'rating':{'type':'integer'}
        }
    }
}
try:
    es.indices.get(index_name)
    print('index {} already exists'.format(index_name))
except:
    print('creating index {}'.format(index_name))
    es.indices.create(index_name, body=request_body_dfr)
    print('{} index created successfully'.format(index_name))

In [None]:
for i, (appid, name, about_the_game, short_description,
       developers, publishers, categories, 
       genres, release_date, popularity, rating) in enumerate(merged_df.values[:,[0,2,3,4,5,6,7,8,9,10,11]]):
    doc_body = {
              'app_id': app_id,
              'title': name,
              'short_description': short_description,
              'about_the_game': about_the_game,
              'developers': developers,
              'publishers': publishers,
              'categories': categories,
              'genres': genres,
              'release_date': release_date,
              'popularity': popularity,
              'rating': rating
              }
    es.index(index_name, doc_body, id=i)
print('{} index populated successfully'.format(index_name))

In [None]:
print('we have made and index called {} with {} documents'.format(index_name, es.cat.count(index=index_name,h=['count'])))
index_info(index_name)

In [None]:
def search():   
    query = text.value
    fields = ["title", "short_description", 'about_the_game', 'developer', 'categories', 'genres']

    start_date = my_result.result[0]
    end_date = my_result.result[1]
    
    genre = genres_drop.value
    category = categories_drop.value

    query_body = {
        "query": {
            "bool": {
                "should": [
                    {
                        "multi_match": {
                            "query": query, 
                            "fields":  fields
                        }
                    }
                ],
                "must": [
                    {
                        'range': {
                            'release_date': {
                                'gte': start_date,
                                'lte': end_date
                            }
                        }
                    }
                ],
                 'filter':[
                    {
                     'wildcard': {'genres.raw': genre},
                     'wildcard': {'categories.raw': category}
                     }
                    ]
                }
            }
        }

    print('### RESULTS ####')
    explain= False
    results = es.search(index=index_name, body=query_body, explain=explain)['hits']['hits']
    for hit in results:
        print('title: {}, release date: {}, score: {}, categories: {}, genres: {}'.format(hit['_source']['title'], 
                                                                                          hit['_source']['release_date'], 
                                                                                          hit['_score'], hit['_source']['categories'],                                                                                 hit['_source']['genres']))
    if explain:
        print('some info on results')
        print(hit['_explanation'])

In [None]:
genres_list = []
for genres in games.genres:
    for g in genres:
        genre_list.append(g)
        
genres_list = list(set(genre_list)) + ['*']   

categories_list = []
for categories in games.categories:
    for c in categories:
        categories_list.append(c)
        
categories_list = list(set(categories_list)) + ['*'] 

In [None]:
import ipywidgets as widgets
import pandas as pd
from datetime import datetime
from ipywidgets import interactive
from IPython.display import display, Markdown, clear_output


dates = pd.date_range(games.release_date.min(), games.release_date.max())

options = [(date.strftime(' %d\%m\%Y '), date) for date in dates]
index = (0, len(options)-1)

selection_range_slider = widgets.SelectionRangeSlider(
    options=options,
    index=index,
    description='Dates',
    orientation='horizontal',
    layout={'width': '500px'}
)

# Define any function
def return_dates(date_range):
    return date_range

# Create sliders using interactive
my_result = interactive(return_dates, date_range = selection_range_slider)

text = widgets.Text(
       placeholder = 'type query',
       description='Search',
        layout={'width': '500px'})

button = widgets.Button(description='search')
out = widgets.Output()

def on_button_clicked(_):
      # "linking function with output"
    with out:
      # what happens when we press the button
        clear_output()
        search()
    
# linking button and function together using a button's method
button.on_click(on_button_clicked)

genres_drop = widgets.Dropdown(description='genres',
                options= genres_list, value='*')

categories_drop = widgets.Dropdown(description='catagories',
                                   options= categories_list, value='*')

# You can also view this in a notebook without using display.
display(text, my_result, genres_drop, categories_drop, button, out)