In [1]:
import flask

In [2]:
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics.pairwise import cosine_similarity

In [3]:
app = flask.Flask(__name__, template_folder='templates')

In [4]:
df = pd.read_csv("movie_dataset.csv")

In [5]:
all_titles = [df['title'][i] for i in range(len(df['title']))]

In [6]:
features = ['keywords','cast','genres','director']

In [7]:
def combine_features(row):
    return row['keywords']+" "+row['cast']+" "+row['genres']+" "+row['director']

In [8]:
for feature in features:
    df[feature] = df[feature].fillna('') #filling all NaNs with blank string

In [9]:
df["combined_features"] = df.apply(combine_features,axis=1) #applying combined_features() method over each rows of dataframe and storing the combined string in "combined_features" column

In [10]:
df.iloc[0].combined_features

'culture clash future space war space colony society Sam Worthington Zoe Saldana Sigourney Weaver Stephen Lang Michelle Rodriguez Action Adventure Fantasy Science Fiction James Cameron'

In [11]:
cv = CountVectorizer() #creating new CountVectorizer() object
count_matrix = cv.fit_transform(df["combined_features"]) #feeding combined strings(movie contents) to CountVectorizer() object
indices = pd.Series(df.index, index=df['title'])

In [12]:
cosine_sim = cosine_similarity(count_matrix)

In [13]:
df = df.reset_index()

In [14]:
def get_recommendations(title):
    cosine_sim = cosine_similarity(count_matrix)
    idx = indices[title]
    similar_movies = list(enumerate(cosine_sim[idx])) #accessing the row corresponding to given movie to find all the similarity scores for that movie and then enumerating over it
    sim_scores = sorted(similar_movies,key=lambda x:x[1],reverse=True)
    sim_scores = sim_scores[1:11]
    movie_indices = [i[0] for i in sim_scores]
    tit = df['title'].iloc[movie_indices]
    dat = df['overview'].iloc[movie_indices]
    dire = df['director'].iloc[movie_indices]
    cas = df['cast'].iloc[movie_indices]
    vote_avg = df['vote_average'].iloc[movie_indices]
    genre = df['genres'].iloc[movie_indices]
    link = df['homepage'].iloc[movie_indices]
    return_df = pd.DataFrame(columns=['title','overview','director','cast','vote_average','genres','homepage'])
    return_df['title'] = tit
    return_df['overview'] = dat
    return_df['director'] = dire
    return_df['cast'] = cas
    return_df['vote_average'] = vote_avg
    return_df['genres'] = genre
    return_df['homepage'] = link
    return return_df


In [None]:
# Set up the main route
@app.route('/', methods=['GET', 'POST'])

def main():
    if flask.request.method == 'GET':
        return(flask.render_template('index.html'))
            
    if flask.request.method == 'POST':
        m_name = flask.request.form['movie_name']
        m_name = m_name.title()
        
        if m_name not in all_titles:
            return(flask.render_template('negative.html',name=m_name))
        else:
            result_final = get_recommendations(m_name)
            names = []
            dates = []
            director = []
            cast = []
            rating = []
            genres = []
            links = []
            for i in range(len(result_final)):
                names.append(result_final.iloc[i][0])
                dates.append(result_final.iloc[i][1])
                director.append(result_final.iloc[i][2])
                cast.append(result_final.iloc[i][3])
                rating.append(result_final.iloc[i][4])
                genres.append(result_final.iloc[i][5])
                links.append(result_final.iloc[i][6])

            return flask.render_template('positive.html',movie_names=names,movie_date=dates,movie_director=director,movie_cast=cast,movie_rating=rating,movie_genres=genres,movie_links=links,search_name=m_name)

        
if __name__ == '__main__':
    app.run()

 * Serving Flask app "__main__" (lazy loading)
 * Environment: production
   Use a production WSGI server instead.
 * Debug mode: off


 * Running on http://127.0.0.1:5000/ (Press CTRL+C to quit)
127.0.0.1 - - [04/Jul/2020 14:47:45] "GET / HTTP/1.1" 200 -
127.0.0.1 - - [04/Jul/2020 14:47:48] "POST / HTTP/1.1" 200 -
127.0.0.1 - - [04/Jul/2020 14:48:01] "GET / HTTP/1.1" 200 -
127.0.0.1 - - [04/Jul/2020 14:48:13] "POST / HTTP/1.1" 200 -
