In [41]:
import urllib.request 
import re
import pandas as pd
from pprint import pprint
from elasticsearch import Elasticsearch

In [2]:
import requests
# run bin/elasticsearch in elasticsearch directory in terminal before running this cell
res = requests.get('http://localhost:9200')

In [3]:
es = Elasticsearch([{'host': 'localhost', 'port': 9200}])

In [4]:
def test_ES(es):
    return es.ping()  # got True

In [5]:
if test_ES(es):
    print('ES instance working')
else:
    print('ES instance not working')

ES instance working


In [6]:
def index_info(index_name):
    count, deleted, shards, =  es.cat.indices(index=index_name, h=['docs.count', 'docs.deleted', 'pri'])[:-1].split(' ')
    print(
      """
      #### INDEX INFO #####
      index_name = {}
      doc_count = {}
      shard_count = {}
      deleted_doc_count = {}
      """.format(index_name, count, shards, deleted)
  )

In [7]:
# read data from steam data directory 
games = pd.read_pickle('final_data_raw.pickle')

In [9]:
game_meta = pd.read_csv('steamspy_cleaned_v2.csv')

In [10]:
games['release_date'] = pd.to_datetime(games.release_date)

In [11]:
_games = games.set_index('steam_appid')

In [12]:
_game_meta = game_meta.set_index('appid')

In [14]:
merged_df = _games.merge(_game_meta.iloc[:,[-1,-2]], left_index=True, right_index=True)

In [15]:
merged_df.reset_index(inplace=True)

In [16]:
merged_df.rename(columns={'index':'steam_appid'},inplace=True)

In [28]:
merged_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 49417 entries, 0 to 49416
Data columns (total 12 columns):
 #   Column             Non-Null Count  Dtype         
---  ------             --------------  -----         
 0   steam_appid        49417 non-null  int64         
 1   type               49417 non-null  object        
 2   name               49417 non-null  object        
 3   about_the_game     49417 non-null  object        
 4   short_description  49417 non-null  object        
 5   developers         49417 non-null  object        
 6   publishers         49417 non-null  object        
 7   categories         49417 non-null  object        
 8   genres             49417 non-null  object        
 9   release_date       49417 non-null  datetime64[ns]
 10  popularity         49417 non-null  float64       
 11  rating             49417 non-null  float64       
dtypes: datetime64[ns](1), float64(2), int64(1), object(8)
memory usage: 4.5+ MB


In [27]:
null_check = merged_df.isna()
null_check.loc[null_check.values == True]

Unnamed: 0,steam_appid,type,name,about_the_game,short_description,developers,publishers,categories,genres,release_date,popularity,rating


In [44]:
#es.indices.delete(index='test-index')
# es.indices.delete(index='steam_index_bm25')
# es.indices.delete(index='steam_index_dfr')
# index_name = 'steam_index_bm25'


In [29]:
# set index name
index_name = 'steam_index_bm25'
# BM25 (Default index)
request_body_bm25 = {
      'settings': {
            'number_of_shards': 1,
            'number_of_replicas': 1
            },
      'mappings': {
            'properties': {
                  'app_id':{'type': 'integer'},
                  'name': {'type': 'text'},
                  'short_description': {
                        'type': 'text',
                        'analyzer': 'english'
                        },
                  'about_the_game': {
                        'type': 'text',
                        'analyzer': 'english'
                        },
                  'developers': {
                        "type": "text",
                        "fields": {
                              "raw": {
                                    "type": "keyword"
                                    }
                              }
                        },
                  'publishers': {
                        "type": "text",
                        "fields": {
                              "raw": {
                                    "type": "keyword"
                                    }
                              }
                        },
                  'categories': {
                        "type": "text",
                        "fields": {
                              "raw": {
                                    "type": "keyword"
                                    }
                              }
                        },
                  'genres': {
                        "type": "text",
                        "fields": {
                              "raw": {
                                    "type": "keyword"
                                    }
                              }
                        },
                  'release_date': {'type': 'date'},
                  'popularity':{'type':'float'},
                  'rating':{'type':'float'}
                  }
            }
      }

try:
    es.indices.get(index_name)
    print('index {} already exists'.format(index_name))
except:
    print('creating index {}'.format(index_name))
    es.indices.create(index_name, body=request_body_bm25)
    print('{} index created successfully'.format(index_name))

creating index steam_index_bm25
steam_index_bm25 index created successfully


In [30]:
merged_df.iloc[:,[0,2,3,4,5,6,7,8,9,10,11]].values

array([[10, 'Counter-Strike',
        "Play the world's number 1 online action game. Engage in an incredibly realistic brand of terrorist warfare in this wildly popular team-based game. Ally with teammates to complete strategic missions. Take out enemy sites. Rescue hostages. Your role affects your team's success. Your team's success affects your role.",
        ..., Timestamp('2000-01-11 00:00:00'), 0.8181818181818182,
        38.60699720970165],
       [20, 'Team Fortress Classic',
        'One of the most popular online action games of all time, Team Fortress Classic features over nine character classes -- from Medic to Spy to Demolition Man -- enlisted in a unique style of online team warfare. Each character class possesses unique weapons, items, and abilities, as teams compete online in a variety of game play modes.',
        ..., Timestamp('1999-01-04 00:00:00'), 0.6363636363636364,
        5.924705882352941],
       [30, 'Day of Defeat',
        'Enlist in an intense brand of Ax

In [31]:
for i, (appid, name, about_the_game, short_description,
       developers, publishers, categories, 
       genres, release_date, popularity, rating) in enumerate(merged_df.iloc[:,[0,2,3,4,5,6,7,8,9,10,11]].values):
    doc_body = {
              'app_id': appid,
              'title': name,
              'short_description': short_description,
              'about_the_game': about_the_game,
              'developers': developers,
              'publishers': publishers,
              'categories': categories,
              'genres': genres,
              'release_date': release_date,
              'popularity': popularity,
              'rating': rating
              }
    es.index(index_name, doc_body, id=i)
print('{} index populated successfully'.format(index_name))

steam_index_bm25 index populated successfully


In [32]:
print('we have made and index called {} with {} documents'.format(index_name, es.cat.count(index=index_name,h=['count'])))
index_info(index_name)

we have made and index called steam_index_bm25 with 49417
 documents

      #### INDEX INFO #####
      index_name = steam_index_bm25
      doc_count = 49417
      shard_count = 1
      deleted_doc_count = 0
      


In [42]:
index_name = 'steam_index_dfr'
#DFR index
request_body_dfr = {
    'settings': {
        'number_of_shards': 1,
        'number_of_replicas': 1,
        'index': {'similarity': {
            'dfr_similarity': {
                'type': 'DFR',
                'basic_model': 'g',
                'after_effect': 'l',
                'normalization': 'h2',
                'normalization.h2.c':'3.0'
                }
            }
        }
    },
    'mappings': {
        'properties': {
            'app_id':{'type': 'integer'},
            'title': {
                'type': 'text',
                'similarity': 'dfr_similarity'
                },
            'short_description': {
                'type': 'text',
                'analyzer': 'english',
                'similarity': 'dfr_similarity'
                },
            'about_the_game': {
                'type': 'text',
                'analyzer': 'english',
                'similarity': 'dfr_similarity'
                },
            'developer': {
                "type": "text",
                'similarity': 'dfr_similarity',
                "fields": {
                    "raw": {
                        "type": "keyword"
                        }
                    }
                },
            'publisher': {
                "type": "text",
                'similarity': 'dfr_similarity',
                "fields": {
                    "raw": {
                        "type": "keyword"
                        }
                    }
                },
            'categories': {
                "type": "text",
                'similarity': 'dfr_similarity',
                "fields": {
                    "raw": {
                        "type": "keyword"
                        }
                    }
                },
            'genres': {
                "type": "text",
                'similarity': 'dfr_similarity',
                "fields": {
                    "raw": {
                        "type": "keyword"
                        }
                    }
                },
            'release_date': {'type': 'date'},
            'popularity':{'type':'float'},
            'rating':{'type':'integer'}
        }
    }
}
try:
    es.indices.get(index_name)
    print('index {} already exists'.format(index_name))
except:
    print('creating index {}'.format(index_name))
    es.indices.create(index_name, body=request_body_dfr)
    print('{} index created successfully'.format(index_name))

creating index steam_index_dfr
steam_index_dfr index created successfully


In [None]:
for i, (appid, name, about_the_game, short_description,
       developers, publishers, categories, 
       genres, release_date, popularity, rating) in enumerate(merged_df.values[:,[0,2,3,4,5,6,7,8,9,10,11]]):
    doc_body = {
              'app_id': appid,
              'title': name,
              'short_description': short_description,
              'about_the_game': about_the_game,
              'developers': developers,
              'publishers': publishers,
              'categories': categories,
              'genres': genres,
              'release_date': release_date,
              'popularity': popularity,
              'rating': rating
              }
    es.index(index_name, doc_body, id=i)
print('{} index populated successfully'.format(index_name))

In [None]:
print('we have made and index called {} with {} documents'.format(index_name, es.cat.count(index=index_name,h=['count'])))
index_info(index_name)

In [53]:
query = 'dragon'
fields = ["title", "short_description", 'about_the_game', 'developer', 'categories', 'genres']

# start_date = my_result.result[0]
start_date = datetime('1995')
# # end_date = my_result.result[1]
end_date = str('2018')

# genre = genres_drop.value
# category = categories_drop.value

query_body = {
    "query": {
        'function_score':{
            'query':{
                "bool": {
                    "must": [{
                        "multi_match": {
                            "query": query,
                            "fields":  fields
                        }
                    }],
                    # "must": [{
                    #     'range': {
                    #         'release_date': {
                    #             'gte': start_date,
                    #             'lte': end_date
                    #         }
                    #     }
                    # }],
                    'should':[{
                        'function_score':{
                            'field_value_factor': {
                                'field': 'popularity',
                                'factor': 1,'missing':0
                                }
                            }
                        },{ 
                        'function_score': {
                            'field_value_factor': {
                                'field': 'rating',
                                'factor': 0.001,
                                # 'modifier':'square',
                                'missing': 0
                            }
                        },
                    }],
                    # 'filter':[{
                    #     'wildcard': {
                    #         'genres.raw': genre
                    #     },
                    #     'wildcard': {
                    #         'categories.raw': category
                    #     }
                    # }]
                }
            }
        }
    }
}

print('### RESULTS ####')
explain= False
results = es.search(index=index_name, body=query_body, explain=explain)['hits']['hits']
for hit in results:
    print(
        """
        title: {}, 
        score: {}
        popularity: {}
        rating: {}"""
        .format(hit['_source']['title'],
                # hit['_source']['release_date'],
                hit['_score'],
                hit['_source']['popularity'],
                hit['_source']['rating']
                )
        )
if explain:
    print('some info on results')
    pprint(dict(hit['_explanation']))

### RESULTS ####

        title: Dragon Age: Origins - Ultimate Edition, 
        score: 8.613654
        popularity: 0.5454545454545454
        rating: 10.1875

        title: Dragon Creek, 
        score: 8.513478
        popularity: 0.0
        rating: 16.5

        title: School of Dragons, 
        score: 8.30145
        popularity: 0.5454545454545454
        rating: 1.2547547547547548

        title: Dragonia, 
        score: 8.205552
        popularity: 0.3636363636363636
        rating: 5.449048152295632

        title: Liege Dragon, 
        score: 8.187167
        popularity: 0.2727272727272727
        rating: 0.0

        title: Dragon's Sin, 
        score: 8.112987
        popularity: 0.3636363636363636
        rating: 2.046052631578948

        title: Elmarion: Dragon's Princess, 
        score: 8.111111
        popularity: 0.1818181818181818
        rating: 5.0

        title: Dragons and Titans, 
        score: 8.088365
        popularity: 0.5454545454545454
        rat

***

***

In [None]:
genres_list = []
for genres in games.genres:
    for g in genres:
        genre_list.append(g)
        
genres_list = list(set(genre_list)) + ['*']   

categories_list = []
for categories in games.categories:
    for c in categories:
        categories_list.append(c)
        
categories_list = list(set(categories_list)) + ['*'] 

In [None]:
import ipywidgets as widgets
import pandas as pd
from datetime import datetime
from ipywidgets import interactive
from IPython.display import display, Markdown, clear_output


dates = pd.date_range(games.release_date.min(), games.release_date.max())

options = [(date.strftime(' %d\%m\%Y '), date) for date in dates]
index = (0, len(options)-1)

selection_range_slider = widgets.SelectionRangeSlider(
    options=options,
    index=index,
    description='Dates',
    orientation='horizontal',
    layout={'width': '500px'}
)

# Define any function
def return_dates(date_range):
    return date_range

# Create sliders using interactive
my_result = interactive(return_dates, date_range = selection_range_slider)

text = widgets.Text(
       placeholder = 'type query',
       description='Search',
        layout={'width': '500px'})

button = widgets.Button(description='search')
out = widgets.Output()

def on_button_clicked(_):
      # "linking function with output"
    with out:
      # what happens when we press the button
        clear_output()
        search()
    
# linking button and function together using a button's method
button.on_click(on_button_clicked)

genres_drop = widgets.Dropdown(description='genres',
                options= genres_list, value='*')

categories_drop = widgets.Dropdown(description='categories',
                                   options= categories_list, value='*')

# You can also view this in a notebook without using display.
display(text, my_result, genres_drop, categories_drop, button, out)