In [1]:
import urllib.request 
import re
import pandas as pd
from elasticsearch import Elasticsearch

In [2]:
import requests
# run bin/elasticsearch in elasticsearch directory in terminal before running this cell
res = requests.get('http://localhost:9200')

In [3]:
es = Elasticsearch([{'host': 'localhost', 'port': 9200}])

In [4]:
def test_ES(es):
    return es.ping()  # got True

In [5]:
if test_ES(es):
    print('ES instance working')
else:
    print('ES instance not working')

ES instance working




In [6]:
def index_info(index_name):
    count, deleted, shards, =  es.cat.indices(index=index_name, h=['docs.count', 'docs.deleted', 'pri'])[:-1].split(' ')
    print(
      """
      #### INDEX INFO #####
      index_name = {}
      doc_count = {}
      shard_count = {}
      deleted_doc_count = {}
      """.format(index_name, count, shards, deleted)
  )

In [7]:
# read data from steam data directory
games = pd.read_pickle('final_data_raw_V3.pickle')

In [8]:
#es.indices.delete(index='steam_index_dfr')

In [9]:
# set index name
index_name = 'steam_index_bm25_final'

In [63]:
# BM25 (Default index)
request_body_bm25_final = {
    'settings': {
        'number_of_shards': 1,
        'number_of_replicas': 1,
        
    },
    'mappings': {
          'properties': {
              'app_id':{'type': 'integer'},
              'title': {'type': 'text'},
              'short_description': {
                  'type': 'text',
                  'analyzer': 'english'
              },
              'about_the_game': {
                  'type': 'text',
                  'analyzer': 'english'
              },
              'developers': {
                    "type": "text",
                    "fields": {"raw": {"type": "keyword"}}
              },
              'publishers': {
                    "type": "text",
                    "fields": {"raw": {"type": "keyword"}}
              },
              'categories': {
                    "type": "text",
                    "fields": {"raw": {"type": "keyword"}}
              },
              'genres': {
                    "type": "text",
                    "fields": {"raw": {"type": "keyword"}}
              },
              'is_released': {"type": "keyword"},
              'release_date': {'type': 'date'},
              'popularity':{'type':'float'},
              'rating':{'type':'float'}
          }
    }
}

try:
    es.indices.get(index_name)
    print('index {} already exists'.format(index_name))
except:
    print('creating index {}'.format(index_name))
    es.indices.create(index_name, body=request_body_bm25_final)

index steam_index_bm25_final already exists




In [None]:
for i, (name, app_id, about_the_game, short_description,
       developers, publishers, categories, genres, release_date, is_released, rating, popularity) in enumerate(games.values[:,1:]):
    doc_body = {
              'app_id': app_id,
              'title': name,
              'short_description': short_description,
              'about_the_game': about_the_game,
              'developers': developers,
              'publishers': publishers,
              'categories': categories,
              'genres': genres,
              'is_released': is_released,
              'release_date': release_date,
              'rating': rating,
              'popularity': popularity
              }
    es.index(index_name, doc_body, id=i)

In [None]:
print('we have made and index called {} with {} documents'.format(index_name, es.cat.count(index=index_name,h=['count'])))
index_info(index_name)

In [None]:
index_name = 'steam_index_dfr_final'

In [None]:
#DFR index
request_body_dfr = {
        'settings': {
        'number_of_shards': 1,
        'number_of_replicas': 1,
        'index': {
            'similarity': {
                'dfr_similarity': {
                    'type': 'DFR',
                    'basic_model': 'g',
                    'after_effect': 'l',
                    'normalization': 'h2',
                    'normalization.h2.c':'3.0'

                }
            }
        }
        
    },
    'mappings': {
          'properties': {
              'app_id':{'type': 'integer'},
              'title': {
                  'type': 'text', 
                  'similarity': 'dfr_similarity'
              },
              'short_description': {
                  'type': 'text', 
                  'analyzer': 'english', 
                  'similarity': 'dfr_similarity'
              },
              'about_the_game': {
                  'type': 'text', 
                  'analyzer': 'english', 
                  'similarity': 'dfr_similarity'
              },
              'developer': {
                  "type": "text",
                  'similarity': 'dfr_similarity',
                    "fields": {
                    "raw": {"type": "keyword"}}
              },
              'publisher': {
                  "type": "text",
                  'similarity': 'dfr_similarity',
                    "fields": {
                    "raw": {"type": "keyword"}}
              },
              'categories': {
                  "type": "text",
                  'similarity': 'dfr_similarity',
                    "fields": {
                    "raw": {"type": "keyword"}}
              },
              'genres': {
                  "type": "text", 
                  'similarity': 'dfr_similarity',
                    "fields": {
                    "raw": { "type": "keyword"}}
              },
              'is_released': {"type": "keyword"},
              'release_date': {'type': 'date'},
              'popularity':{'type':'float'},
              'rating':{'type':'float'}
          }
    }
}

try:
    es.indices.get(index_name)
    print('index {} already exists'.format(index_name))
except:
    print('creating index {}'.format(index_name))
    es.indices.create(index_name, body=request_body_dfr)

In [None]:
# indexing using DFR
for i, (name, app_id, about_the_game, short_description,
       developers, publishers, categories, genres, release_date, is_released, rating, popularity) in enumerate(games.values[:,1:]):
    doc_body = {
              'app_id': app_id,
              'title': name,
              'short_description': short_description,
              'about_the_game': about_the_game,
              'developers': developers,
              'publishers': publishers,
              'categories': categories,
              'genres': genres,
              'is_released': is_released,
              'release_date': release_date,
              'rating': rating,
              'popularity': popularity
              }
    es.index(index_name, doc_body, id=i)

In [None]:
print('we have made and index called {} with {} documents'.format(index_name, es.cat.count(index=index_name,h=['count'])))
index_info(index_name)

In [10]:
# search function that inlcudes query body will taken the varible from the widgets to complete the search 
def search():
    query = text.value
    fields = ["title", "short_description", 'about_the_game', 'developer', 'categories', 'genres']

    start_date = date_slider.result[0]
    end_date = date_slider.result[1]
    
    is_released = is_released_drop.value
    genre = genres_drop.value
    category = categories_drop.value
    
    popularity_factor = 10 if check[0].value == True else 0
    rating_factor = 0.5 if check[1].value == True else 0
    
        
    query_body = {
        "query": {
            'function_score': {
                "query": {
                    "bool": {
                       'should': [{
                                "multi_match": {
                                    "query": query,
                                    # 'type': 'most_fields', 
                                    "fields":  fields
                                }
                                
                            },
                            {
                                'function_score':{
                                    'field_value_factor': {
                                        'field': 'popularity',
                                        'factor': popularity_factor,
                                    }
                                }
                            },
                            { 
                                'function_score': {
                                    'field_value_factor': {
                                        'field': 'rating',
                                        'factor': rating_factor,
                                        'modifier': 'log1p'
                                    }
                                }
                            }
                        ],
                        'filter': [
                            {'wildcard': {'is_released': is_released}},
                            {'range': {'release_date': {'gte': start_date,'lte': end_date}}},
                            {'wildcard': {'genres.raw': genre}},
                            {'wildcard': {'categories.raw': category}}
                    ],
                    # 'minimum_should_match': 0.2
                }
            }
        }
        }
    }

    print('### SEARCH RESULTS ###')
    results = es.search(index=index_name, body=query_body)['hits']['hits']
    for hit in results:
        print('''
GAME TITLE: {}, 
Release date: {}, 
Score: {:.4f}
About: {:s}
Developer(s): {}
Publisher(s): {}
Genre(s): {}
Categories: {}
Popularity: {:.2f}, Rating: {:.2f}\n***\n'''.format(hit['_source']['title'], 
                                     hit['_source']['release_date'][:10], 
                                     hit['_score'], hit['_source']['about_the_game'],
                                     hit['_source']['developers'], hit['_source']['publishers'],
                                     hit['_source']['genres'], hit['_source']['categories'],
                                     hit['_source']['popularity'], hit['_source']['rating']))

In [11]:
genre_list = []
for genres in games.genres:
    for g in genres:
        genre_list.append(g)
        
genre_list = ['*'] + sorted(list(set(genre_list)))

category_list = []
for categories in games.categories:
    for c in categories:
        category_list.append(c)
        
category_list = ['*'] + sorted(list(set(category_list)))

released_list = ['*'] + sorted(list(games.is_released.unique()))

In [19]:
import ipywidgets as widgets
from datetime import datetime
from ipywidgets import interactive, Button, HBox, VBox, Checkbox, HTML, Layout
from IPython.display import display, clear_output

header = HTML('<h2>Steam Game Search Engine</h2>', layout=Layout(left= '205px', height='40px'))

dates = pd.date_range(games.release_date.min(), games.release_date.max())

options = [(date.strftime(' %d\%m\%Y '), date) for date in dates]
index = (0, len(options)-1)

selection_range_slider = widgets.SelectionRangeSlider(
    options=options,
    index=index,
    description='Release date:',
    orientation='horizontal',
    layout={'width': '603.5px'}
)

# Define any function
def return_dates(date_range):
    return date_range

# Create sliders using interactive
date_slider = interactive(return_dates, date_range = selection_range_slider)

text = widgets.Text(
       placeholder = 'Enter query',
       description='Search:',
       layout={'width': '603.5px'})

button = widgets.Button(icon='search', layout={'left': '90px','width': '510px', 'height': '40px'})
out = widgets.Output()

def on_button_clicked(_):
      # "linking function with output"
    with out:
      # what happens when we press the button
        clear_output()
        search()
    
# linking button and function together using a button's method
button.on_click(on_button_clicked)

is_released_drop = widgets.Dropdown(description = 'Un/released:',
                options = released_list, value = '*', layout={'width': '603.5px'})

genres_drop = widgets.Dropdown(description = 'Genre:',
                options = genre_list, value = '*')

categories_drop = widgets.Dropdown(description = 'Category:',
                                  options = category_list, value = '*')

words = ['Popularity', 'User ratings']
check = [Checkbox(False, description=w) for w in words]

checks_title = HTML('Influenced by:', layout=Layout(height='40px'))


display(header, text, date_slider, is_released_drop, HBox([genres_drop, categories_drop]), HBox([checks_title,check[0],check[1]]), button, out)

HTML(value='<h2>Steam Game Search Engine</h2>', layout=Layout(height='40px', left='205px'))

Text(value='', description='Search:', layout=Layout(width='603.5px'), placeholder='Enter query')

interactive(children=(SelectionRangeSlider(description='Release date:', index=(0, 15889), layout=Layout(width=…

Dropdown(description='Un/released:', layout=Layout(width='603.5px'), options=('*', 'released', 'unreleased'), …

HBox(children=(Dropdown(description='Genre:', options=('*', 'Accounting', 'Action', 'Adventure', 'Animation ',…

HBox(children=(HTML(value='Influenced by:', layout=Layout(height='40px')), Checkbox(value=False, description='…

Button(icon='search', layout=Layout(height='40px', left='90px', width='510px'), style=ButtonStyle())

Output()