In [1]:
import pandas as pd
import numpy as np
import os

In [2]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from ast import literal_eval

In [3]:
ds = pd.read_csv(os.getcwd()+'\\sample data\\sampledata.csv')

In [4]:
features = ['keywords','genres']
for feature in features:
    ds[feature] = ds[feature].apply(literal_eval)


In [5]:
def get_list(x):
    if isinstance(x, list):
        names = [i['name'] for i in x]
        if len(names) > 1:
            names = names[:50]
        setname=set(names)
        listname=list(setname)
        return listname
    return []


In [6]:
for feature in features:
    ds[feature] = ds[feature].apply(get_list)


In [7]:
def create_key(x):
    return ' '.join(x['genres']) +' '+ ' '.join(x['keywords']) 
ds['key'] = ds.apply(create_key, axis=1)

In [8]:

tf = TfidfVectorizer(analyzer='word', ngram_range=(1, 2), min_df=0, stop_words='english')
ds["overview"].fillna(" ", inplace = True) 
ds["title"].fillna(" ", inplace = True) 
ds["genres"].fillna(" ", inplace = True) 
ds["keywords"].fillna(" ", inplace = True) 
ds["tagline"].fillna(" ", inplace = True) 
ds['key']=ds['key']+' '+ds['title']+' '+ds['tagline']+' '+ds['overview']

tfidf_matrix = tf.fit_transform(ds['key'])


In [9]:
cos_similarities = cosine_similarity(tfidf_matrix, tfidf_matrix)

In [10]:
result_dict = {}
for key, row in ds.iterrows():
    similar_ind = cos_similarities[key].argsort()[:-50:-1]
    similar_items = [(cos_similarities[key][i], ds['id'][i]) for i in similar_ind]
    result_dict[row['id']] = similar_items[1:]

In [11]:
def item(id):
    return ds.loc[ds['id'] == id]['title'].tolist()[0]


In [12]:
def recommend(id):
    num=5
    print("Movie "+ item(id) )
    print(' ')
    recs = result_dict[id][:num]
    for rec in recs:
        if rec[0] > 0: 
            print(item(rec[1]))
            

In [13]:
recommend(id= np.random.randint(1,len(cos_similarities),1)[0])

Movie PCU
 
High School Musical 3: Senior Year
The Spectacular Now
College
Project X
The New Guy


In [14]:
recommend(id= np.random.randint(1,len(cos_similarities),1)[0])

Movie The Canyons
 
Love Letters
Notes on a Scandal
The Good Girl
Johnny Suede
Cruel Intentions


In [15]:
recommend(1)

Movie Avatar
 
Aliens
Moonraker
Alien³
Mission to Mars
Alien


In [16]:
titles = ds['title']
indices = pd.Series(ds.index, index=ds['title'])
def get_recommendations(title):
    print('Movie ' +title )
    idx = indices[title]
    sim_scores = list(enumerate(cos_similarities[idx]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    sim_scores = sim_scores[1:6]
    movie_indices = [i[0] for i in sim_scores]
    return titles.iloc[movie_indices]


In [17]:
get_recommendations('Avatar')

Movie Avatar


2403             Aliens
1531          Moonraker
838              Alien³
373     Mission to Mars
3158              Alien
Name: title, dtype: object

In [None]:
Bibliography
Balabanovic, M. & Shoham, Y. (1997) FAB: Content-based collaborative recommendation. COMMUNICATIONS OF THE ACM, 40(3).
Emmanuel, M. & Deshpande, A.R. (2016) Context based Recommendation Methods: A Brief Review. International Journal of Computer Applications (0975 – 8887) International Conference on Cognitive Knowledge Engineering 2016.
Hapke, H., Howard, C. & Lane, H. (2019) Natural Language Processing in Action. Manning Publications.
Medium towards datascience. (2019) introduction-to-two-approaches-of-content-based-recommendation-system-fc797460c18c [Online]. Available from: https://towardsdatascience.com/introduction-to-two-approaches-of-content-based-recommendation-system-fc797460c18c [Accessed 11 January 2020].
Vaidya, N. & Khachane, A.R. (2017) Recommender systems-the need of the ecommerce ERA. International Conference on Computing Methodologies and Communication (ICCMC), pp.100-04. Available at: https://ieeexplore.ieee.org/document/8282616/keywords.
TFIDF. (2020) http://www.tfidf.com/ [Online]. Available from: http://www.tfidf.com/ [Accessed 12 January 2020].


