In [None]:
!pip install gensim
import gensim
import numpy as np

In [17]:
class ScoringService(object):
    model = None                # Where we keep the model when it's loaded

    @classmethod
    def get_model(cls):
        """Get the model object for this instance, loading it if it's not already loaded."""
        if cls.model == None:
            # load the gensim model
            w2v_model = gensim.models.Word2Vec.load("word2vec_2.model")
            # keep only the normalized vectors.
            # This saves memory but makes the model untrainable (read-only).
            w2v_model.init_sims(replace=True)
            # with open(os.path.join(model_path, 'decision-tree-model.pkl'), 'r') as inp:
            #     cls.model = pickle.load(inp)
            cls.model = w2v_model
        return cls.model

    @classmethod
    def predict(cls, input):
        """For the input, do the predictions and return them.

        Args:
            input (a pandas dataframe): The data on which to do the predictions. There will be
                one prediction per row in the dataframe"""

        clf = cls.get_model()

        def _aggregate_vectors(movies):
            # get the vector average of the movies in the input
            movie_vec = []
            for i in movies:
                try:
                    movie_vec.append(clf[i])
                except KeyError:
                    continue
            print(np.mean(movie_vec, axis=0))
            return np.mean(movie_vec, axis=0)[0]

        def _similar_movies(v, n = 6):
            # extract most similar movies for the input vector
            return clf.similar_by_vector(v, topn= n+1)[1:]

        input = [[x[0].lstrip("0")] for x in input] # remove leading zeroes
        recs = _similar_movies(_aggregate_vectors(input))
        return recs

In [18]:
model = ScoringService()
model.get_model()

<gensim.models.word2vec.Word2Vec at 0x7f275770e978>

In [19]:
# model.get_model()['110912']

In [23]:
movies = [['116282'], ['2042568'], ['1019452'], ['1403865'], ['190590'], ['138524'], ['335245'], ['477348'], ['887883'], ['101410']]

In [24]:
model.predict(movies)

[[ 1.98905244e-02  6.47159517e-02  2.27556601e-02  1.47032775e-02
   1.81291372e-01  5.07929772e-02 -3.90544422e-02 -6.70838654e-02
   1.61865614e-02  3.71484309e-02  7.22482353e-02  2.04833746e-02
  -3.71096991e-02 -4.96266335e-02  2.77836658e-02 -3.18300985e-02
  -4.82707620e-02 -3.56194600e-02  6.23947829e-02 -1.04187839e-02
   1.26012623e-01  7.68777505e-02  9.33110937e-02 -9.77029279e-02
  -6.96715200e-03  4.87725697e-02  1.81066498e-04  7.09821880e-02
   3.94708961e-02  1.28469259e-01 -3.54401618e-02 -9.36877355e-02
   1.68537274e-02 -1.43596549e-02  2.94621214e-02 -4.13775034e-02
  -1.51085958e-01  1.63829401e-01  5.99360652e-02  3.24331820e-02
  -8.74773264e-02 -7.46806711e-03  1.27705932e-01  6.73395097e-02
  -1.65784061e-02  6.18338399e-03 -1.68638304e-02  8.31355006e-02
   6.29530624e-02  2.14264821e-02  8.73977467e-02  2.45746002e-02
   1.84437111e-02  2.47583184e-02 -2.91985907e-02  1.33994464e-02
  -1.19390078e-01 -6.72250800e-03  1.27457529e-01  5.29603213e-02
  -1.19532



[('482633', 0.9210887551307678),
 ('1328913', 0.9206011891365051),
 ('468442', 0.919946551322937),
 ('1109499', 0.9166485667228699),
 ('1121964', 0.9153555631637573),
 ('1326954', 0.9153527021408081)]