In [2]:
import pickle
import datetime
import json
import requests
import pandas as pd
import numpy as np
from scipy.sparse import hstack
from elasticsearch import Elasticsearch

In [3]:
es = Elasticsearch([{'host': 'localhost', 'port': 9200}])

In [12]:
df = pd.read_csv('ratings.csv')
df['datetime'] = df['timestamp'].map(lambda x: datetime.datetime.fromtimestamp(x))

df_ratings = df

In [4]:
user_cluster_map = pickle.load(open('user_cluster_map.pkl', 'rb'))
cluster_user_map = pickle.load(open('cluster_user_map.pkl', 'rb'))

In [5]:
q = 'star'
u = 15

In [6]:
query = {
        'query': {
            'match': {
                'title': q
            }
        },
        'sort': [{'_score': 'desc'}]
    }
res = es.search(index='movies', doc_type='movie', body=json.dumps(query))
hits = res['hits']['hits']

hits

[{'_index': 'movies',
  '_type': 'movie',
  '_id': '1750',
  '_score': 6.2658124,
  '_source': {'title': 'Star Kid (1997)',
   'genres': ['Adventure', 'Children', 'Fantasy', 'Sci-Fi']}},
 {'_index': 'movies',
  '_type': 'movie',
  '_id': '1613',
  '_score': 6.2658124,
  '_source': {'title': 'Star Maps (1997)', 'genres': ['Drama']}},
 {'_index': 'movies',
  '_type': 'movie',
  '_id': '800',
  '_score': 6.2658124,
  '_source': {'title': 'Lone Star (1996)',
   'genres': ['Drama', 'Mystery', 'Western']}},
 {'_index': 'movies',
  '_type': 'movie',
  '_id': '4757',
  '_score': 6.2658124,
  '_source': {'title': 'Rock Star (2001)',
   'genres': ['Comedy', 'Drama', 'Musical']}},
 {'_index': 'movies',
  '_type': 'movie',
  '_id': '26487',
  '_score': 6.2658124,
  '_source': {'title': 'Star 80 (1983)', 'genres': ['Drama']}},
 {'_index': 'movies',
  '_type': 'movie',
  '_id': '68358',
  '_score': 6.2658124,
  '_source': {'title': 'Star Trek (2009)',
   'genres': ['Action', 'Adventure', 'Sci-Fi', '

In [10]:
user_cluster = user_cluster_map[u]
other_users = cluster_user_map[user_cluster]

In [13]:
movies_to_rate = [x['_id'] for x in hits]
movies_to_rate

['1750',
 '1613',
 '800',
 '4757',
 '26487',
 '68358',
 '1410',
 '329',
 '2393',
 '5944']

In [15]:
kmeans_rating = {m: 0 for m in movies_to_rate}
for movie in movies_to_rate:
    r = 0
    r_count = 0
    for u in other_users:
        candidate_df = df_ratings[(df_ratings['userId'] == u) & (df_ratings['movieId'] == movie)]
        if not candidate_df.empty:
            r += candidate_df.iloc[0]['rating']
            r_count += 1
    if r_count != 0:
        r = r / r_count
        kmeans_rating[movie] = r

In [16]:
max_score = max(x['_score'] for x in hits)

for h in hits:
    h['_r'] = kmeans_rating[h['_id']]

def rating_rank_boost(rating):
    return rating / max_score

results = sorted(hits, key=lambda x: 0.6 * x['_score'] + 0.4 * rating_rank_boost(x['_r']), reverse=True)
results

[{'_index': 'movies',
  '_type': 'movie',
  '_id': '1750',
  '_score': 6.2658124,
  '_source': {'title': 'Star Kid (1997)',
   'genres': ['Adventure', 'Children', 'Fantasy', 'Sci-Fi']},
  '_r': 0},
 {'_index': 'movies',
  '_type': 'movie',
  '_id': '1613',
  '_score': 6.2658124,
  '_source': {'title': 'Star Maps (1997)', 'genres': ['Drama']},
  '_r': 0},
 {'_index': 'movies',
  '_type': 'movie',
  '_id': '800',
  '_score': 6.2658124,
  '_source': {'title': 'Lone Star (1996)',
   'genres': ['Drama', 'Mystery', 'Western']},
  '_r': 0},
 {'_index': 'movies',
  '_type': 'movie',
  '_id': '4757',
  '_score': 6.2658124,
  '_source': {'title': 'Rock Star (2001)',
   'genres': ['Comedy', 'Drama', 'Musical']},
  '_r': 0},
 {'_index': 'movies',
  '_type': 'movie',
  '_id': '26487',
  '_score': 6.2658124,
  '_source': {'title': 'Star 80 (1983)', 'genres': ['Drama']},
  '_r': 0},
 {'_index': 'movies',
  '_type': 'movie',
  '_id': '68358',
  '_score': 6.2658124,
  '_source': {'title': 'Star Trek (2