In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# Reading ratings file
ratings = pd.read_csv('ratings.dat', sep='::', encoding='latin-1', names=['user_id', 'movie_id', 'rating', 'timestamp'])

# Reading users file
users = pd.read_csv('users.dat', sep = '::', encoding='latin-1', names=['user_id', 'gender', 'age_desc', 'occ_desc', 'zipcode'])

# Reading movies file
movies = pd.read_csv('movies.dat', sep='::', encoding='latin-1', names=['movie_id', 'title', 'genres'])

  import sys
  # Remove the CWD from sys.path while we load stuff.
  del sys.path[0]


In [2]:
ratings.head()

Unnamed: 0,user_id,movie_id,rating,timestamp
0,1,1193,5,978300760
1,1,661,3,978302109
2,1,914,3,978301968
3,1,3408,4,978300275
4,1,2355,5,978824291


In [3]:
ratings = ratings.drop(['timestamp'], axis=1)
ratings.head()

Unnamed: 0,user_id,movie_id,rating
0,1,1193,5
1,1,661,3
2,1,914,3
3,1,3408,4
4,1,2355,5


In [6]:
# Break up the big genre string into a string array
movies['genres'] = movies['genres'].str.split('|')
# Convert genres to string value
movies['genres'] = movies['genres'].fillna("").astype('str')

In [7]:
# VECTORIZE!! Convert the words into vectors
from sklearn.feature_extraction.text import TfidfVectorizer
tf = TfidfVectorizer(analyzer='word',ngram_range=(1, 2),min_df=0, stop_words='english')
tfidf_matrix = tf.fit_transform(movies['genres'])
tfidf_matrix.shape

(3883, 127)

In [8]:
# Conversion for cosine similarity
from sklearn.metrics.pairwise import linear_kernel
cosine_sim = linear_kernel(tfidf_matrix, tfidf_matrix)
cosine_sim[:4, :4]

array([[1.        , 0.14193614, 0.09010857, 0.1056164 ],
       [0.14193614, 1.        , 0.        , 0.        ],
       [0.09010857, 0.        , 1.        , 0.1719888 ],
       [0.1056164 , 0.        , 0.1719888 , 1.        ]])

In [73]:
# Build a 1-dimensional array with movie titles
titles = movies['title']
indices = pd.Series(movies.index, index=movies['title'])

# Function that gets movie recommendations based on the cosine similarity score of movie genres
def genre_recommendations(title):
    idx = indices[title]
    sim_scores = list(enumerate(cosine_sim[idx]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    sim_scores = sim_scores[1:21]
    movie_indices = [i[0] for i in sim_scores]
    mv = titles.iloc[movie_indices]
    return mv


In [492]:
# let's test it!
genre_recommendations('Indiana Jones and the Temple of Doom (1984)').head(20)

31                            Twelve Monkeys (1995)
1233                                   8 1/2 (1963)
1286      Butch Cassidy and the Sundance Kid (1969)
1543                            Wild America (1997)
2048    Indiana Jones and the Temple of Doom (1984)
3011                         Liberty Heights (1999)
3224                                   Trois (2000)
3402                        Inherit the Wind (1960)
3434                        Murphy's Romance (1985)
3631                        Running Man, The (1987)
3087                        Blood on the Sun (1945)
1607                 Telling Lies in America (1997)
2603                    Run Silent, Run Deep (1958)
3689                        Golden Bowl, The (2000)
1375                         Raising Arizona (1987)
3632                                 Starman (1984)
1807                         Still Breathing (1997)
673                               Alphaville (1965)
1181                     Princess Bride, The (1987)
1188        

In [16]:
# Let's make things look nice, shall we?
metadata = pd.read_csv('data/movies_metadata.csv')
metadata.head(2)

  interactivity=interactivity, compiler=compiler, result=result)


Unnamed: 0,adult,belongs_to_collection,budget,genres,homepage,id,imdb_id,original_language,original_title,overview,...,release_date,revenue,runtime,spoken_languages,status,tagline,title,video,vote_average,vote_count
0,False,"{'id': 10194, 'name': 'Toy Story Collection', ...",30000000,"[{'id': 16, 'name': 'Animation'}, {'id': 35, '...",http://toystory.disney.com/toy-story,862,tt0114709,en,Toy Story,"Led by Woody, Andy's toys live happily in his ...",...,1995-10-30,373554033.0,81.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,,Toy Story,False,7.7,5415.0
1,False,,65000000,"[{'id': 12, 'name': 'Adventure'}, {'id': 14, '...",,8844,tt0113497,en,Jumanji,When siblings Judy and Peter discover an encha...,...,1995-12-15,262797249.0,104.0,"[{'iso_639_1': 'en', 'name': 'English'}, {'iso...",Released,Roll the dice and unleash the excitement!,Jumanji,False,6.9,2413.0


In [17]:
image_data = metadata[['imdb_id', 'poster_path']]
image_data.head()

Unnamed: 0,imdb_id,poster_path
0,tt0114709,/rhIRbceoE9lR4veEXuwCC2wARtG.jpg
1,tt0113497,/vzmL6fP7aPKNKPRTFnZmiUfciyV.jpg
2,tt0113228,/6ksm1sjKMFLbO7UY2i6G1ju9SML.jpg
3,tt0114885,/16XOMpEaLWkrcPqSQqhTmeJuqQl.jpg
4,tt0113041,/e64sOI48hQXyru7naBFyssKFxVd.jpg


In [18]:
links = pd.read_csv("data/links.csv")
links.head()

Unnamed: 0,movieId,imdbId,tmdbId
0,1,114709,862.0
1,2,113497,8844.0
2,3,113228,15602.0
3,4,114885,31357.0
4,5,113041,11862.0


In [19]:
links = links[['movieId', 'imdbId']]

In [20]:
image_data = image_data[~ image_data.imdb_id.isnull()]

In [21]:
def app(x):
    try:
        return int(x[2:])
    except ValueError:
        print(x)

In [22]:
# Clean and format image data
image_data['imdbId'] = image_data.imdb_id.apply(app)
image_data = image_data[~ image_data.imdbId.isnull()]
image_data.imdbId = image_data.imdbId.astype(int)
image_data = image_data[['imdbId', 'poster_path']]

0
0
0


Unnamed: 0,imdbId,poster_path
0,114709,/rhIRbceoE9lR4veEXuwCC2wARtG.jpg
1,113497,/vzmL6fP7aPKNKPRTFnZmiUfciyV.jpg
2,113228,/6ksm1sjKMFLbO7UY2i6G1ju9SML.jpg
3,114885,/16XOMpEaLWkrcPqSQqhTmeJuqQl.jpg
4,113041,/e64sOI48hQXyru7naBFyssKFxVd.jpg


In [23]:
# Merge data on the imdbId
posters = pd.merge(image_data, links, on='imdbId', how='left')
posters[['movie_id', 'poster_path']] = posters[['movieId', 'poster_path']]
posters = posters[~ posters.movieId.isnull()]
posters.movieId = posters.movieId.astype(int)
posters.head()

Unnamed: 0,imdbId,poster_path,movieId,movie_id
0,114709,/rhIRbceoE9lR4veEXuwCC2wARtG.jpg,1,1.0
1,113497,/vzmL6fP7aPKNKPRTFnZmiUfciyV.jpg,2,2.0
2,113228,/6ksm1sjKMFLbO7UY2i6G1ju9SML.jpg,3,3.0
3,114885,/16XOMpEaLWkrcPqSQqhTmeJuqQl.jpg,4,4.0
4,113041,/e64sOI48hQXyru7naBFyssKFxVd.jpg,5,5.0


In [24]:
# Append the data to the movies df
movies = pd.merge(movies, posters, on='movie_id', how='left')
movies.head()

Unnamed: 0,movie_id,title,genres,imdbId,poster_path,movieId
0,1,Toy Story (1995),"['Animation', ""Children's"", 'Comedy']",114709.0,/rhIRbceoE9lR4veEXuwCC2wARtG.jpg,1.0
1,2,Jumanji (1995),"['Adventure', ""Children's"", 'Fantasy']",113497.0,/vzmL6fP7aPKNKPRTFnZmiUfciyV.jpg,2.0
2,3,Grumpier Old Men (1995),"['Comedy', 'Romance']",113228.0,/6ksm1sjKMFLbO7UY2i6G1ju9SML.jpg,3.0
3,4,Waiting to Exhale (1995),"['Comedy', 'Drama']",114885.0,/16XOMpEaLWkrcPqSQqhTmeJuqQl.jpg,4.0
4,5,Father of the Bride Part II (1995),['Comedy'],113041.0,/e64sOI48hQXyru7naBFyssKFxVd.jpg,5.0


In [25]:
# Clean it up a bit
movies = movies.drop(['movieId'], axis=1)

Unnamed: 0,movie_id,title,genres,imdbId,poster_path
0,1,Toy Story (1995),"['Animation', ""Children's"", 'Comedy']",114709.0,/rhIRbceoE9lR4veEXuwCC2wARtG.jpg
1,2,Jumanji (1995),"['Adventure', ""Children's"", 'Fantasy']",113497.0,/vzmL6fP7aPKNKPRTFnZmiUfciyV.jpg
2,3,Grumpier Old Men (1995),"['Comedy', 'Romance']",113228.0,/6ksm1sjKMFLbO7UY2i6G1ju9SML.jpg
3,4,Waiting to Exhale (1995),"['Comedy', 'Drama']",114885.0,/16XOMpEaLWkrcPqSQqhTmeJuqQl.jpg
4,5,Father of the Bride Part II (1995),['Comedy'],113041.0,/e64sOI48hQXyru7naBFyssKFxVd.jpg


In [493]:
# Grab a random sample of movies - this function will be used later in the website to display 6 random titles on the index page
movies.sample(20)

Unnamed: 0,movie_id,title,genres,imdbId,poster_path
3865,3933,"Killer Shrews, The (1959)","['Horror', 'Sci-Fi']",52969.0,/gCUmIRMjG6uYTl5DyyAYi6czpOq.jpg
696,704,"Quest, The (1996)","['Action', 'Adventure']",117420.0,/z05SPecyjaryEedxX9noKQdqIFH.jpg
1992,2059,"Parent Trap, The (1998)","[""Children's"", 'Drama']",120783.0,/7vR4e3ga3mdSEpYe8kMeUv3mgND.jpg
2812,2879,Operation Condor (Feiying gaiwak) (1990),"['Action', 'Adventure', 'Comedy']",99558.0,/cU3UPFvOG1Jq7dVE0gmMGnvVboH.jpg
3114,3181,Titus (1999),['Drama'],120866.0,/82mCeQoeT2C7h7vR6Ot9MQotE1R.jpg
2725,2792,Airplane II: The Sequel (1982),['Comedy'],83530.0,/pjI6j5sVTxJXuxnr2JM2FgvyFXS.jpg
596,600,Love and a .45 (1994),['Thriller'],110395.0,/6ziFdFPGPa3iysyLgJbjrzLLpgw.jpg
2821,2888,Drive Me Crazy (1999),"['Comedy', 'Romance']",164114.0,/ljg0qL7RPUaP8Td6jQ1yOPVSodK.jpg
1122,1136,Monty Python and the Holy Grail (1974),['Comedy'],71853.0,/jRx1nDUA4sb2NEKQaukXKBz6pCf.jpg
3684,3751,Chicken Run (2000),"['Animation', ""Children's"", 'Comedy']",120630.0,/z0MafJgUnVyVbczicYMkPKKHkBi.jpg


In [26]:
from IPython.display import HTML
from IPython.display import display

def display_recommendations(df):

    images = ''
    for ref in df.poster_path:
            if ref != '':
                link = 'http://image.tmdb.org/t/p/w185/' + ref
                images += "<img style='width: 120px; margin: 0px; \
                  float: left; border: 1px solid black;' src='%s' />" \
              % link
    display(HTML(images))

In [31]:
# Create a dataframe from the recommendations
rec_df = genre_recommendations('Good Will Hunting (1997)').head(8).to_frame()

In [33]:
rec_df.head()

Unnamed: 0,title
25,Othello (1995)
26,Now and Then (1995)
29,Shanghai Triad (Yao a yao yao dao waipo qiao) ...
30,Dangerous Minds (1995)
35,Dead Man Walking (1995)


In [36]:
cols = ['title']
display_df = rec_df.join(movies.set_index(cols), on=cols)

In [101]:
display_df.head()

Unnamed: 0,title,movie_id,genres,imdbId,poster_path
25,Othello (1995),26,['Drama'],114057.0,/qM0BXEQjmnAzlkDZ0tYmV6twqMX.jpg
26,Now and Then (1995),27,['Drama'],114011.0,/wD6rLdD2Ix3u9YLgE3Do8GyCHoz.jpg
29,Shanghai Triad (Yao a yao yao dao waipo qiao) ...,30,['Drama'],115012.0,/qcoOCoN7viOhboGwhYXyApdDuiq.jpg
30,Dangerous Minds (1995),31,['Drama'],112792.0,/y5Jee3QmYOlpqfaPPbfvtdVc5wj.jpg
35,Dead Man Walking (1995),36,['Drama'],112818.0,/y19uRkAHXOHLeySuBVMqnvVnsrA.jpg


In [39]:
print('Here are some titles that our algorithm recommends for you based on your search query:')
display_recommendations(display_df)

Here are some titles that our algorithm recommends for you based on your search query:


In [None]:
# titles for easy copypasta testing

# Terminator 2: Judgment Day (1991)
# Star Wars: Episode IV - A New Hope (1977)
# Ace Ventura: When Nature Calls (1995)
# Mortal Kombat (1995)
# Dangerous Minds (1995)
# Clueless (1995)
# Casino (1995)
# GoldenEye (1995)
# Phantom of the Opera, The (1943)
# Airheads (1994)
# Homeward Bound: The Incredible Journey (1993)
# Airplane! (1980)
# Office Space (1999)
# Fight Club (1999)
# Meet Joe Black (1998)
# Death Becomes Her (1992)
# Schindler's List (1993)
# Misery (1990)
# Blues Brothers, The (1980)
# Lethal Weapon 3 (1992)

In [156]:
# Testing combined functionality
def movie_bot(title):
    titles = movies['title']
    indices = pd.Series(movies.index, index=movies['title'])

    idx = indices[title]
    sim_scores = list(enumerate(cosine_sim[idx]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    sim_scores = sim_scores[1:21]
    movie_indices = [i[0] for i in sim_scores]
    mv = titles.iloc[movie_indices].head(12).to_frame()
    cols = ['title']
    temp_df = mv.join(movies.set_index(cols), on=cols)
    images = ''
    for ref in temp_df.poster_path:
        if ref != '':
            link = 'http://image.tmdb.org/t/p/w185/' + ref
            images += "<img style='width: 220px; height: 300px; margin: 0px; \
                float: left; border: 1px solid black;' src='%s' />" \
            % link
    return print(f'Based on your search query of {title}, here are some recommended titles:'), display(HTML(images))

In [167]:
movie_bot("Lethal Weapon 3 (1992)")

Based on your search query of Lethal Weapon 3 (1992), here are some recommended titles:


(None, None)

In [187]:
def movie_bot_web(title):
    titles = movies['title']
    indices = pd.Series(movies.index, index=movies['title'])

    idx = indices[title]
    sim_scores = list(enumerate(cosine_sim[idx]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    sim_scores = sim_scores[1:21]
    movie_indices = [i[0] for i in sim_scores]
    mv = titles.iloc[movie_indices].head(12).to_frame()
    cols = ['title']
    temp_df = mv.join(movies.set_index(cols), on=cols)
    images = []
    for ref in temp_df.poster_path:
        if ref != '':
            link = 'http://image.tmdb.org/t/p/w185/' + ref
            images.append("<img style='width: 220px; height: 300px; margin: 0px; float: left; border: 1px solid black;' src='%s'/>" % link)
    return str(images[0:12])

In [188]:
movie_bot_web("Lethal Weapon 3 (1992)")

'["<img style=\'width: 220px; height: 300px; margin: 0px;                 float: left; border: 1px solid black;\' src=\'http://image.tmdb.org/t/p/w185//3OrapxFNr12AhRxupQ8Z2jP1D13.jpg\' />", "<img style=\'width: 220px; height: 300px; margin: 0px;                 float: left; border: 1px solid black;\' src=\'http://image.tmdb.org/t/p/w185//xwiZZv8jSPx4F1vvXoJthBY6FRb.jpg\' />", "<img style=\'width: 220px; height: 300px; margin: 0px;                 float: left; border: 1px solid black;\' src=\'http://image.tmdb.org/t/p/w185//cPl88Kh44fo005rYrdIxm62GQCb.jpg\' />", "<img style=\'width: 220px; height: 300px; margin: 0px;                 float: left; border: 1px solid black;\' src=\'http://image.tmdb.org/t/p/w185//6uYYGXYZluoxloY7uIpFkvKwq8z.jpg\' />", "<img style=\'width: 220px; height: 300px; margin: 0px;                 float: left; border: 1px solid black;\' src=\'http://image.tmdb.org/t/p/w185//tyujnQl6topN3O9lPnGMpzGsYQM.jpg\' />", "<img style=\'width: 220px; height: 300px; margin: 0p

In [215]:
def movie_bot_2(title):
    titles = movies['title']
    indices = pd.Series(movies.index, index=movies['title'])

    idx = indices[title]
    sim_scores = list(enumerate(cosine_sim[idx]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    sim_scores = sim_scores[1:21]
    movie_indices = [i[0] for i in sim_scores]
    mv = titles.iloc[movie_indices].head(12).to_frame()
    cols = ['title']
    temp_df = mv.join(movies.set_index(cols), on=cols)
    images = ''
    for ref in temp_df.poster_path:
        if ref != '':
            link = 'http://image.tmdb.org/t/p/w185/' + ref
            images += "<img style='width: 220px; height: 300px; margin: 0px; \
                float: left; border: 1px solid black;' src='%s' />" \
            % link
    return print(f'<h2>Based on your search of {title}, here are some movies that we recommend:</h2><br>{images}')

In [216]:
movie_bot_2("Lethal Weapon 3 (1992)")

<h2>Based on your search of Lethal Weapon 3 (1992), here are some movies that we recommend:</h2><br><img style='width: 220px; height: 300px; margin: 0px;                 float: left; border: 1px solid black;' src='http://image.tmdb.org/t/p/w185//3OrapxFNr12AhRxupQ8Z2jP1D13.jpg' /><img style='width: 220px; height: 300px; margin: 0px;                 float: left; border: 1px solid black;' src='http://image.tmdb.org/t/p/w185//xwiZZv8jSPx4F1vvXoJthBY6FRb.jpg' /><img style='width: 220px; height: 300px; margin: 0px;                 float: left; border: 1px solid black;' src='http://image.tmdb.org/t/p/w185//cPl88Kh44fo005rYrdIxm62GQCb.jpg' /><img style='width: 220px; height: 300px; margin: 0px;                 float: left; border: 1px solid black;' src='http://image.tmdb.org/t/p/w185//6uYYGXYZluoxloY7uIpFkvKwq8z.jpg' /><img style='width: 220px; height: 300px; margin: 0px;                 float: left; border: 1px solid black;' src='http://image.tmdb.org/t/p/w185//tyujnQl6topN3O9lPnGMpzGsYQM.jp

In [268]:
def six_random_movies():
    rando_df = movies.sample(6)
    images = ''
    for ref in rando_df.poster_path:
        if ref != '':
            link = 'http://image.tmdb.org/t/p/w185/' + ref
            images += "<div class='col-md-2'><a href='#'><img style='width: 220px; height: 300px; margin: 0px; \
                float: left; border: 1px solid black;' src='%s' /></div>" \
            % link 
            
    return print(f'{images}')

In [269]:
six_random_movies()

<div class='col-md-2'><a href='#'><img style='width: 220px; height: 300px; margin: 0px;                 float: left; border: 1px solid black;' src='http://image.tmdb.org/t/p/w185//y7AxKPPCtZyrGQKvxzn5zTQ8wFm.jpg' /></div><div class='col-md-2'><a href='#'><img style='width: 220px; height: 300px; margin: 0px;                 float: left; border: 1px solid black;' src='http://image.tmdb.org/t/p/w185//sASN1VnJxWosdzp4mH40P47Xhhz.jpg' /></div><div class='col-md-2'><a href='#'><img style='width: 220px; height: 300px; margin: 0px;                 float: left; border: 1px solid black;' src='http://image.tmdb.org/t/p/w185//eKpxCQllaktjfqR11ITbYstcHmD.jpg' /></div><div class='col-md-2'><a href='#'><img style='width: 220px; height: 300px; margin: 0px;                 float: left; border: 1px solid black;' src='http://image.tmdb.org/t/p/w185//7NdKNNrONwbmCOJgTO4wYjWzQyW.jpg' /></div><div class='col-md-2'><a href='#'><img style='width: 220px; height: 300px; margin: 0px;                 float: left;

In [326]:
def six_movies():
    rando_df = movies.sample(6)
    url = ''
    for x in rando_df:
        title = rando_df["title"]
        link = 'http://image.tmdb.org/t/p/w185/' + rando_df.poster_path
        url += "<div class='col-md-2'><a href='" + title + "'><img style='width: 220px; height: 300px; margin: 0px; \
                float: left; border: 1px solid black;' src='" + link + "'/></div>"
    link2 = repr(url)
    return link2

In [327]:
six_movies()

"2168    <div class='col-md-2'><a href='One Man's Hero ...\n3856    <div class='col-md-2'><a href='Pajama Party (1...\n2800    <div class='col-md-2'><a href='Fright Night (1...\n3708    <div class='col-md-2'><a href='Make Mine Music...\n3634    <div class='col-md-2'><a href='Alien Nation (1...\n388     <div class='col-md-2'><a href='Secret Adventur...\ndtype: object"

In [291]:
rd = movies.sample(6)

In [484]:
# Generate a df with 6 movies, selected at random, and then format the response to be placed directly into the webpage using js
def six_movies():
    rando_df = movies.sample(6)
    title = []
    url = []
    div_open = "<div class='col-md-2'><a href='"
    img_open = "'><img class='border' style='width: 220px; height: 300px; margin: 0px; float: left; border: 1px solid black;' src='"
    div_close = "'></a></div>"
    for x in rando_df.title:
        title.append(x)
    for y in rando_df.poster_path:
        url.append("http://image.tmdb.org/t/p/w185" + y)
    
    return (f"{div_open}{title[0]}{img_open}{url[0]}{div_close} \
                   {div_open}{title[1]}{img_open}{url[1]}{div_close} \
                   {div_open}{title[2]}{img_open}{url[2]}{div_close} \
                   {div_open}{title[3]}{img_open}{url[3]}{div_close} \
                   {div_open}{title[4]}{img_open}{url[4]}{div_close} \
                   {div_open}{title[5]}{img_open}{url[5]}{div_close}")

In [485]:
six_movies()

"<div class='col-md-2'><a href='Midnight Run (1988)'><img class='border' style='width: 220px; height: 300px; margin: 0px; float: left; border: 1px solid black;' src='http://image.tmdb.org/t/p/w185/pVUfLM9z2H2rxB4Til8YvjUjnp.jpg'></a></div>                    <div class='col-md-2'><a href='Raining Stones (1993)'><img class='border' style='width: 220px; height: 300px; margin: 0px; float: left; border: 1px solid black;' src='http://image.tmdb.org/t/p/w185/eWUZT0vdQZLyeKpWtwheGY23TdW.jpg'></a></div>                    <div class='col-md-2'><a href='Year of Living Dangerously (1982)'><img class='border' style='width: 220px; height: 300px; margin: 0px; float: left; border: 1px solid black;' src='http://image.tmdb.org/t/p/w185/3fBuFmNLTDKuL3QOlgeIvLdekJn.jpg'></a></div>                    <div class='col-md-2'><a href='Innocent Sleep, The (1995)'><img class='border' style='width: 220px; height: 300px; margin: 0px; float: left; border: 1px solid black;' src='http://image.tmdb.org/t/p/w185/kmdu

In [480]:
# The brains of the operation. Generate 12 recommendations using cosine similarity. 
# Then, format the response to be placed directly into the webpage
# NOTE: This will later be changed in the Flask app, as the html will be hard coded into the .html file and query the Flask app
def movie_bot_final(title):
    titles = movies['title']
    indices = pd.Series(movies.index, index=movies['title'])
    idx = indices[title]
    sim_scores = list(enumerate(cosine_sim[idx]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    sim_scores = sim_scores[1:21]
    movie_indices = [i[0] for i in sim_scores]
    mv = titles.iloc[movie_indices].head(12).to_frame()
    cols = ['title']
    temp_df = mv.join(movies.set_index(cols), on=cols)
    moviename = []
    url = []
    div_open = "<div class='col-md-2'><a href='"
    img_open = "'><img class='border' style='width: 220px; height: 300px; margin: 15px 0 0 0; float: left; border: 1px solid black;' src='"
    div_close = "'></a></div>"
    for film in temp_df.title:
        moviename.append(film)
    for poster in temp_df.poster_path:
        url.append("http://image.tmdb.org/t/p/w185" + poster)
    
    return (f'<h3>Based on your search of {title}, here are some movies that you might find interesting:</h3> \
                   {div_open}{moviename[0]}{img_open}{url[0]}{div_close} \
                   {div_open}{moviename[1]}{img_open}{url[1]}{div_close} \
                   {div_open}{moviename[2]}{img_open}{url[2]}{div_close} \
                   {div_open}{moviename[3]}{img_open}{url[3]}{div_close} \
                   {div_open}{moviename[4]}{img_open}{url[4]}{div_close} \
                   {div_open}{moviename[5]}{img_open}{url[5]}{div_close} \
                   {div_open}{moviename[6]}{img_open}{url[6]}{div_close} \
                   {div_open}{moviename[7]}{img_open}{url[7]}{div_close} \
                   {div_open}{moviename[8]}{img_open}{url[8]}{div_close} \
                   {div_open}{moviename[9]}{img_open}{url[9]}{div_close} \
                   {div_open}{moviename[10]}{img_open}{url[10]}{div_close} \
                   {div_open}{moviename[11]}{img_open}{url[11]}{div_close}')    

In [486]:
movie_bot_final("Midnight Run (1988)")

"<h3>Based on your search of Midnight Run (1988), here are some movies that you might find interesting:</h3>                    <div class='col-md-2'><a href='Leaving Las Vegas (1995)'><img class='border' style='width: 220px; height: 300px; margin: 15px 0 0 0; float: left; border: 1px solid black;' src='http://image.tmdb.org/t/p/w185/37qHRJxnSh5YkuaN9FgfNnMl3Tj.jpg'></a></div>                    <div class='col-md-2'><a href='Carrington (1995)'><img class='border' style='width: 220px; height: 300px; margin: 15px 0 0 0; float: left; border: 1px solid black;' src='http://image.tmdb.org/t/p/w185/a7w6rPdTBgWAx6lnhwHemPx9pEw.jpg'></a></div>                    <div class='col-md-2'><a href='How to Make an American Quilt (1995)'><img class='border' style='width: 220px; height: 300px; margin: 15px 0 0 0; float: left; border: 1px solid black;' src='http://image.tmdb.org/t/p/w185/wnWAZHbjX5Wj6hyb0gGmFdMADZk.jpg'></a></div>                    <div class='col-md-2'><a href='When Night Is Falling (

In [489]:
# Export cleaned and formatted movies df to a csv to be used by the Flask app
movies.to_csv(index=False, path_or_buf='moovees.csv')

In [490]:
movies.title.count()

3885

In [491]:
movies.poster_path.count()

3802