In [1]:
import pandas as pd
import requests
from  decouple import config
import ast
import time
import scipy.sparse as sp
from sklearn.feature_extraction.text import CountVectorizer,TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

In [2]:
def get_data():
    movie_data=pd.read_csv('../all_movies_data.csv')
    movie_data['name']=movie_data['name'].str.lower()
    return movie_data

In [3]:
df=get_data()
df.head()

Unnamed: 0,tmdb_id,imdb_id,year,name,rating,description,directors,cast,genres
0,274.0,tt0102926,1991,the silence of the lambs,8.6,A young F.B.I. cadet must receive the help of ...,Jonathan Demme,"Jodie Foster, Anthony Hopkins, Lawrence A. Bon...","Crime, Drama, Thriller"
1,280.0,tt0103064,1991,terminator 2: judgment day,8.6,"A cyborg, identical to the one who failed to k...",James Cameron,"Arnold Schwarzenegger, Linda Hamilton, Edward ...","Action, Sci-Fi"
2,10020.0,tt0101414,1991,beauty and the beast,8.0,A prince cursed to spend his days as a hideous...,"Gary Trousdale, Kirk Wise","Paige O'Hara, Robby Benson, Jesse Corti, Rex E...","Animation, Family, Fantasy"
3,879.0,tt0102057,1991,hook,6.8,"When Captain James Hook kidnaps his children, ...",Steven Spielberg,"Dustin Hoffman, Robin Williams, Julia Roberts,...","Adventure, Comedy, Family"
4,8367.0,tt0102798,1991,robin hood: prince of thieves,6.9,Robin Hood decides to fight back as an outlaw ...,Kevin Reynolds,"Kevin Costner, Morgan Freeman, Mary Elizabeth ...","Action, Adventure, Drama"


In [4]:
df.shape

(9330, 9)

In [5]:
df.isna().sum()

tmdb_id        0
imdb_id        0
year           0
name           0
rating         0
description    0
directors      0
cast           0
genres         0
dtype: int64

In [6]:
df=df.dropna(how='any')
df.shape

(9330, 9)

In [7]:
df.to_csv('all_movie_data.csv',index=False)

In [8]:
def combine_data(data):
    data_recommend = data.drop(columns=['tmdb_id','imdb_id','name','year','rating'],axis=1)
    data_recommend['combine'] = data_recommend[data_recommend.columns[0:4]].apply(lambda x: ','.join(x.dropna().astype(str)),axis=1)
    data_recommend = data_recommend.drop(columns=['description','directors','cast','genres'])
    return data_recommend

In [9]:
combine_res=combine_data(df)

In [10]:
combine_res.loc[0]

combine    A young F.B.I. cadet must receive the help of ...
Name: 0, dtype: object

In [11]:
def transform_data(data_combine, data):
    count = CountVectorizer(stop_words='english')
    count_matrix = count.fit_transform(data_combine['combine'])
    
    tfidf = TfidfVectorizer(stop_words='english',token_pattern=u'([a-zA-Z-/]{1,})')
    tfidf_matrix = tfidf.fit_transform(data['description'])

    combine_sparse = sp.hstack([count_matrix, tfidf_matrix], format='csr')
    
    cosine_sim = cosine_similarity(combine_sparse, combine_sparse)
    cosine_sim = cosine_similarity(tfidf_matrix)
    
    return cosine_sim

In [12]:
transform_res=transform_data(combine_data(df),df)
transform_res

array([[1.        , 0.        , 0.01475334, ..., 0.        , 0.        ,
        0.        ],
       [0.        , 1.        , 0.        , ..., 0.        , 0.        ,
        0.        ],
       [0.01475334, 0.        , 1.        , ..., 0.        , 0.        ,
        0.04542394],
       ...,
       [0.        , 0.        , 0.        , ..., 1.        , 0.03652816,
        0.03546562],
       [0.        , 0.        , 0.        , ..., 0.03652816, 1.        ,
        0.02517909],
       [0.        , 0.        , 0.04542394, ..., 0.03546562, 0.02517909,
        1.        ]])

In [13]:
def recommend_movies(title, data, combine, transform):

    indices = pd.Series(data.index, index = data['name'])
    index = indices[title]

    sim_scores = list(enumerate(transform[index]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    sim_scores = sim_scores[1:12]
    
    movie_indices = [i[0] for i in sim_scores]

    imdb_id = data['imdb_id'].iloc[movie_indices]
    tmdb_id = data['tmdb_id'].iloc[movie_indices]
    movie_year = data['year'].iloc[movie_indices]
    movie_title = data['name'].iloc[movie_indices]
    movie_genres = data['genres'].iloc[movie_indices]

    recommendation_data = pd.DataFrame(columns=['imdb_id','tmdb_id','year','title','genres'])
    
    recommendation_data['imdb_id'] = imdb_id
    recommendation_data['tmdb_id'] = tmdb_id
    recommendation_data['year'] = movie_year
    recommendation_data['title'] = movie_title
    recommendation_data['genres'] = movie_genres

    return recommendation_data

In [14]:
def get_poster(id):
        response = requests.get('https://api.themoviedb.org/3/movie/{}?api_key={}'.format(id,config('API_KEY')))
        data_dict=response.json()
        return 'https://image.tmdb.org/t/p/original'+data_dict['poster_path']

In [15]:
def results(movie_name):
    movie_name = movie_name.lower()
    
    movie_df = get_data()
    combine_result = combine_data(movie_df)
    transform_result = transform_data(combine_result,movie_df)
    
    if movie_name not in movie_df['name'].unique():
        return 'NA'
    
    else:
        recommendations = recommend_movies(movie_name, movie_df, combine_result, transform_result)
        df=pd.DataFrame(recommendations.to_dict('records'))
        df['poster']=df['tmdb_id'].apply(lambda x: get_poster(x))
        return df.to_dict(orient='records')

In [30]:
res=results('The Dark Knight')
res

[{'imdb_id': 'tt0372784',
  'tmdb_id': 272.0,
  'year': 2005,
  'title': 'batman begins',
  'genres': 'Action, Crime, Drama',
  'poster': 'https://image.tmdb.org/t/p/original/8RW2runSEc34IwKN2D1aPcJd2UL.jpg'},
 {'imdb_id': 'tt1345836',
  'tmdb_id': 49026.0,
  'year': 2012,
  'title': 'the dark knight rises',
  'genres': 'Action, Crime, Drama',
  'poster': 'https://image.tmdb.org/t/p/original/85cWkCVftiVs0BVey6pxX8uNmLt.jpg'},
 {'imdb_id': 'tt0103776',
  'tmdb_id': 364.0,
  'year': 1992,
  'title': 'batman returns',
  'genres': 'Action, Crime, Fantasy',
  'poster': 'https://image.tmdb.org/t/p/original/jKBjeXM7iBBV9UkUcOXx3m7FSHY.jpg'},
 {'imdb_id': 'tt10314450',
  'tmdb_id': 601844.0,
  'year': 2020,
  'title': 'becky',
  'genres': 'Action, Crime, Drama',
  'poster': 'https://image.tmdb.org/t/p/original/9wqKF883Kn9c0SZV4ZmkIkFhBLh.jpg'},
 {'imdb_id': 'tt14402926',
  'tmdb_id': 736074.0,
  'year': 2021,
  'title': 'batman: the long halloween, part two',
  'genres': 'Animation, Action, Cr

In [31]:
for i in range(len(res)):
    print(res[i]['title'])

batman begins
the dark knight rises
batman returns
becky
batman: the long halloween, part two
batman ninja
batman: gotham by gaslight
kshana kshanam
batman & robin
the batman
batman: the killing joke


In [18]:
results('interstellar')

[{'imdb_id': 'tt0397313',
  'tmdb_id': 9036.0,
  'year': 2006,
  'title': 'eight below',
  'genres': 'Adventure, Drama, Family',
  'poster': 'https://image.tmdb.org/t/p/original/xAKEG2CemdK5M86GidaAE5nRWTW.jpg'},
 {'imdb_id': 'tt1213663',
  'tmdb_id': 107985.0,
  'year': 2013,
  'title': "the world's end",
  'genres': 'Action, Comedy, Sci-Fi',
  'poster': 'https://image.tmdb.org/t/p/original/kpglnOBYmKn0AkkWDzGxzKHDbds.jpg'},
 {'imdb_id': 'tt0120738',
  'tmdb_id': 2157.0,
  'year': 1998,
  'title': 'lost in space',
  'genres': 'Action, Adventure, Family',
  'poster': 'https://image.tmdb.org/t/p/original/4miEpZmUOMqV8P0T6oq5HVBiVHw.jpg'},
 {'imdb_id': 'tt0838221',
  'tmdb_id': 4538.0,
  'year': 2007,
  'title': 'the darjeeling limited',
  'genres': 'Adventure, Comedy, Drama',
  'poster': 'https://image.tmdb.org/t/p/original/oSW5OVXTulaIXcoNwJAp5YEKpbP.jpg'},
 {'imdb_id': 'tt0120913',
  'tmdb_id': 7450.0,
  'year': 2000,
  'title': 'titan a.e.',
  'genres': 'Animation, Action, Adventure'

### tmdb api reomend movie and test accuracy of my model and actual

In [None]:
url=('https://api.themoviedb.org/3/movie/{}/similar?api_key={}&language=en-US&page=1'
.format(19995,config('API_KEY')))

In [None]:
resp=requests.get(url).json()

In [None]:
for movie in resp['results']:
    print(movie['title'])

The Iron Giant
The X Files: I Want to Believe
DragonHeart
The 6th Day
Species
Universal Soldier
Mad Max Beyond Thunderdome
Cannibal Holocaust
Cannibal Ferox
Antz
The Tree of Life
The Reader
Alien³
Alien Resurrection
The Faculty
The Longest Day
The Gods Must Be Crazy II
Legend
The Deer Hunter
Midway
