# Cleaning the data

In [1]:
import ast
import pandas as pd
import numpy as np

def cleanmoviedb():    #cleaning and preparing the database
  movies = pd.read_csv('tmdb_5000_movies.csv')
  credits = pd.read_csv('tmdb_5000_credits.csv')
  movies= pd.merge(movies, credits, on ='title')
  movies = movies[['movie_id','title','overview','genres','keywords','cast','crew','vote_average','popularity']]
  movies.dropna(inplace=True)

  def clean(tobecleaned):
    cleanlist = []
    for item in ast.literal_eval(tobecleaned):
      cleanlist.append(item['name'])
    return cleanlist

  movies['genres'] = movies['genres'].apply(clean)
  movies['keywords'] = movies['keywords'].apply(clean)

  def top3clean(tobecleaned): #cast-returns only top 3 cast member's names
    cleanlist = []
    counter = 0
    for item in ast.literal_eval(tobecleaned):
      if counter!= 3:
        cleanlist.append(item['name'])
        counter+=1
      else:
        break
    return cleanlist

  movies['cast'] = movies['cast'].apply(top3clean)

  def fetchdirector(tobecleaned):
    cleanlist = []
    for item in ast.literal_eval(tobecleaned):
      if item['job'] == 'Director' :
        cleanlist.append(item['name'])
        break
    return cleanlist

  movies['crew'] = movies['crew'].apply(fetchdirector)
  movies['overview'] = movies['overview'].apply(lambda x: x.split())
  movies['genres'] = movies['genres'].apply(lambda x:[i.replace(" ","") for i in x])
  movies['keywords'] = movies['keywords'].apply(lambda x:[i.replace(" ","") for i in x])
  movies['cast'] = movies['cast'].apply(lambda x:[i.replace(" ","") for i in x])
  movies['crew'] = movies['crew'].apply(lambda x:[i.replace(" ","") for i in x])

  movies.to_csv('model_ready_movies_db.csv')
  return movies    #has id, title, keywords, overview, genre, cast, crew - cleaned in proper list format

model_movies_df = cleanmoviedb()
model_movies_df.head()

Unnamed: 0,movie_id,title,overview,genres,keywords,cast,crew,vote_average,popularity
0,19995,Avatar,"[In, the, 22nd, century,, a, paraplegic, Marin...","[Action, Adventure, Fantasy, ScienceFiction]","[cultureclash, future, spacewar, spacecolony, ...","[SamWorthington, ZoeSaldana, SigourneyWeaver]",[JamesCameron],7.2,150.437577
1,285,Pirates of the Caribbean: At World's End,"[Captain, Barbossa,, long, believed, to, be, d...","[Adventure, Fantasy, Action]","[ocean, drugabuse, exoticisland, eastindiatrad...","[JohnnyDepp, OrlandoBloom, KeiraKnightley]",[GoreVerbinski],6.9,139.082615
2,206647,Spectre,"[A, cryptic, message, from, Bond’s, past, send...","[Action, Adventure, Crime]","[spy, basedonnovel, secretagent, sequel, mi6, ...","[DanielCraig, ChristophWaltz, LéaSeydoux]",[SamMendes],6.3,107.376788
3,49026,The Dark Knight Rises,"[Following, the, death, of, District, Attorney...","[Action, Crime, Drama, Thriller]","[dccomics, crimefighter, terrorist, secretiden...","[ChristianBale, MichaelCaine, GaryOldman]",[ChristopherNolan],7.6,112.31295
4,49529,John Carter,"[John, Carter, is, a, war-weary,, former, mili...","[Action, Adventure, ScienceFiction]","[basedonnovel, mars, medallion, spacetravel, p...","[TaylorKitsch, LynnCollins, SamanthaMorton]",[AndrewStanton],6.1,43.926995


In [1]:
import pandas as pd
import numpy as np
model_movies_df = pd.read_csv("../5000 records/model_ready_movies_db.csv")

In [2]:
print(model_movies_df.iloc[0]['genres'])
print(type(model_movies_df.iloc[0]['genres']))
print(type(model_movies_df.iloc[0]['genres'][0]))

['Action', 'Adventure', 'Fantasy', 'ScienceFiction']
<class 'str'>
<class 'str'>


In [5]:
model_movies_df.columns

Index(['Unnamed: 0', 'movie_id', 'title', 'overview', 'genres', 'keywords',
       'cast', 'crew', 'vote_average', 'popularity', 'tags'],
      dtype='object')

In [3]:
import ast
model_movies_df['genres'] = model_movies_df['genres'].apply(ast.literal_eval)
model_movies_df['overview'] = model_movies_df['overview'].apply(ast.literal_eval)
#model_movies_df['IMAGE INFO'] = model_movies_df['IMAGE INFO'].apply(ast.literal_eval)
model_movies_df['keywords'] = model_movies_df['keywords'].apply(ast.literal_eval)
model_movies_df['cast'] = model_movies_df['cast'].apply(ast.literal_eval)
model_movies_df['crew'] = model_movies_df['crew'].apply(ast.literal_eval)

In [3]:
model_movies_df.dtypes

Unnamed: 0        int64
movie_id          int64
title            object
overview         object
genres           object
keywords         object
cast             object
crew             object
vote_average    float64
popularity      float64
dtype: object

# Single Movie input by user

In [4]:
#Single movie: 
def singlemovie(model_movies_df,moviename,headnum,flag):
  if flag == 'multiple':
    model_movies_df['tags'] = model_movies_df['genres'] + model_movies_df['keywords']
  if flag == 'single':
    model_movies_df['tags'] = model_movies_df['overview'] + model_movies_df['genres'] + model_movies_df['keywords'] + model_movies_df['cast'] + model_movies_df['crew']
  
  df = model_movies_df[['movie_id','title','tags','vote_average','popularity']]
  print(df.iloc[0]['tags'])
  print(df.head())
  
  '''print("type", df.dtypes)
  
  mylist = df.iloc[0]['tags']
  print("type of mylist", type(mylist))
  print("type of mylist[0]", type(mylist[0]))'''
  
  df['tags'] = df['tags'].apply(lambda x: " ".join(x))
  df['tags'] = df['tags'].apply(lambda x: x.lower())

  #as we will see list of feature names we see - 'activity', 'activities' and such words are separate words
  #To solve this problem, we do the following import - nltk (natural language processing library) and run the code: 
  #nltk has a root word for similar words (ex: danc for dancing, dance, danced etc)
  import nltk
  from nltk.stem.porter import PorterStemmer
  psobj = PorterStemmer()
  def stem(text):
    temp = []
    for i in text.split():
      temp.append(psobj.stem(i))
    return " ".join(temp)

  df['tags'] = df['tags'].apply(stem)
  #till now df has movie_id, title, tags(as stemmed words) and vote_average

  from sklearn.feature_extraction.text import CountVectorizer
  cvobj = CountVectorizer(max_features=6000, stop_words='english')
  vectors = cvobj.fit_transform(df['tags']).toarray() 
  #cvobj is a sci-py sparse matrix which we convert to a numpy array

  #now we calculate the distance of every movie, with every other movie 
  #higher the distance, lesser similarity
  #we do not calculate Euclid's distance, instead do the cosine distance method - angle between the two vectors
  #scikit learn's function - cosinesimilarity
  from sklearn.metrics.pairwise import cosine_similarity
  similarity = cosine_similarity(vectors)

  movie_index = df[df['title']==moviename].index[0]    #boolean masking
  distances = similarity[movie_index]

  '''
  make a list of id,similarity,vote_average and make it a dict and iterate over it directly
  movieidlist = [12,34,345,758,5,3455]
  distance = [0.2,0.33,1,0.8,0.1,0.1]
  movieimdblist = [7,6,8,9,6,8]
  combinedlist = zip(distance, movieimdblist)
  opdict = dict(zip(movieidlist,combinedlist))
  print(opdict)
  finaldict = sorted(opdict.items(), reverse=True, key=lambda x: (x[1][0],x[1][1]))
  print(finaldict)
  '''
  recommendedlist = sorted(list(enumerate(distances)),reverse=True, key=lambda x:x[1])[1:headnum]
  #enumerate with a list - list of the movie_ids of the dataframe

  imdbrecommendedlist = []

  for i in recommendedlist: #add the imdb rating for every element selected. 
    i = list(i)
    i.append(df.iloc[i[0]]['vote_average'])   #adding imdb rating
    i[0] = df.iloc[i[0]]['movie_id']
    imdbrecommendedlist.append(i)


  '''
  print("Rec list",recommendedlist)
  print("__________________________________")
  print("Imdb rec list",imdbrecommendedlist)
  print(imdbrecommendedlist)
  '''

  roundimdblist = []
  sortedimdblist = []
  for i in imdbrecommendedlist:
    roundimdblist.append([i[0],round(i[1],2),i[2]])
  sortedimdblist = sorted(roundimdblist, reverse = True, key = lambda x: (x[1],x[2]))
  
  return sortedimdblist


suggested_for_one_movie = singlemovie(model_movies_df,'Spider-Man 2',20,'single')
#print(suggested_for_one_movie)

for i in suggested_for_one_movie:
  print("Title:", " ".join(model_movies_df[model_movies_df['movie_id']==i[0]].title.tolist()), "\nIMDb Rating:", i[2],"\nSimilarity", i[1])


#returns a list of recommended movies, the elements of the list are the (movie_id, similarity and imdb rating)
  
  

['In', 'the', '22nd', 'century,', 'a', 'paraplegic', 'Marine', 'is', 'dispatched', 'to', 'the', 'moon', 'Pandora', 'on', 'a', 'unique', 'mission,', 'but', 'becomes', 'torn', 'between', 'following', 'orders', 'and', 'protecting', 'an', 'alien', 'civilization.', 'Action', 'Adventure', 'Fantasy', 'ScienceFiction', 'cultureclash', 'future', 'spacewar', 'spacecolony', 'society', 'spacetravel', 'futuristic', 'romance', 'space', 'alien', 'tribe', 'alienplanet', 'cgi', 'marine', 'soldier', 'battle', 'loveaffair', 'antiwar', 'powerrelations', 'mindandsoul', '3d', 'SamWorthington', 'ZoeSaldana', 'SigourneyWeaver', 'JamesCameron']
   movie_id                                     title  \
0     19995                                    Avatar   
1       285  Pirates of the Caribbean: At World's End   
2    206647                                   Spectre   
3     49026                     The Dark Knight Rises   
4     49529                               John Carter   

                             

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['tags'] = df['tags'].apply(lambda x: " ".join(x))
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['tags'] = df['tags'].apply(lambda x: x.lower())
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['tags'] = df['tags'].apply(stem)


Title: Spider-Man 3 
IMDb Rating: 5.9 
Similarity 0.51
Title: Spider-Man 
IMDb Rating: 6.8 
Similarity 0.4
Title: The Amazing Spider-Man 
IMDb Rating: 6.5 
Similarity 0.35
Title: Iron Man 2 
IMDb Rating: 6.6 
Similarity 0.28
Title: Superman 
IMDb Rating: 6.9 
Similarity 0.27
Title: Deadpool 
IMDb Rating: 7.4 
Similarity 0.26
Title: Batman Batman 
IMDb Rating: 7.0 
Similarity 0.26
Title: Ant-Man 
IMDb Rating: 7.0 
Similarity 0.26
Title: All Is Lost 
IMDb Rating: 6.6 
Similarity 0.26
Title: The Amazing Spider-Man 2 
IMDb Rating: 6.5 
Similarity 0.26
Title: Hancock 
IMDb Rating: 6.2 
Similarity 0.26
Title: Dungeons & Dragons: Wrath of the Dragon God 
IMDb Rating: 4.8 
Similarity 0.26
Title: The Legend of Hercules 
IMDb Rating: 4.4 
Similarity 0.26
Title: Big Fish 
IMDb Rating: 7.6 
Similarity 0.25
Title: Batman Batman 
IMDb Rating: 7.0 
Similarity 0.25
Title: Griff the Invisible 
IMDb Rating: 6.1 
Similarity 0.25
Title: In the Name of the King: A Dungeon Siege Tale 
IMDb Rating: 4.1 
Simi

# Multiple movies input by user

In [5]:
#retrieve the tags(genre, overview, keywords) of all the movies and combine into one
#then we perform the stemming of the tags

import nltk
from nltk.corpus import wordnet
nltk.download('wordnet')

def synonym(word):
  synonyms = []
  for syn in wordnet.synsets(word):
    for l in syn.lemmas():
      synonyms.append(l.name())
  return synonyms

from nltk.stem.porter import PorterStemmer
psobj = PorterStemmer()
def stem(text):
  temp = []
  for i in text.split():
      temp.append(psobj.stem(i))
  return " ".join(temp)


def functolist(lst):
  return " ".join(lst)


def rec(df,multiplemovielist):
  flag='multiple'
  firstlevellist = []
  '''creating the first movie list'''
  for i in multiplemovielist: 
    firstlevellist.extend(singlemovie(model_movies_df,i,40,flag))

  '''creating the tag_of_users_input_list and storing the ids of the original movies entered by user'''
  movie_id_of_users_input_list = []   #movie_ids of the movies entered by the user
  tag_of_users_input_list = []
  for movie in multiplemovielist:
    idofmovie = df[df['title']==movie]['movie_id'].item()
    movie_id_of_users_input_list.append(idofmovie)
    tag_of_users_input_list.append(stem(" ".join(df[df['movie_id']==idofmovie]['tags'].item())))   
    #the above list has n elements, where the user has entered n movies, and every element is the tag 
    #of a particular movie
  #print(movie_id_of_users_input_list)
  #print(tag_of_users_input_list)




  '''storing the movie ids of the ovies in firstlevellist'''
  movie_id_of_firstlevellist_movies = []
  for i in firstlevellist:
    movie_id_of_firstlevellist_movies.append(i[0])


  
  new_df = df[ df['movie_id'].isin(movie_id_of_firstlevellist_movies)] #indices of the firstlevellist.
  new_df['tags'] = new_df['tags'].apply(functolist)
  new_df['tags'] = new_df['tags'].apply(stem)

  #print(new_df['title'])
  #print(counters) 


  counterobjlist = []
  newmovielist = []

  for indicer in range(len(new_df)):     #for our example of two movies: 78 records, i.e 0 to 77
    counters = np.zeros((len(tag_of_users_input_list),1))
    for i in new_df.iloc[indicer]['tags'].split(): #for each word in every movie's tag (movies from new_df)
      for j in range(len(tag_of_users_input_list)):    
        #the above for loop is to check if any word of the current movie's tag, 
        #belongs to the tags of the movies entered by the user.
        if (stem(i) in tag_of_users_input_list[j].split()) or (stem(" ".join(synonym(i))) in tag_of_users_input_list[j].split()):
          counters[j]+=1

    if all(x>=1 for x in counters):
      newmovielist.append([new_df.iloc[indicer]['title'], new_df.iloc[indicer]['vote_average']]) #combine with ratings
      counterobjlist.append(counters)


  print(sorted(newmovielist, reverse=True, key= lambda x: x[1])) #sort by ratings
  #print(counterobjlist)

  
watchlist = ['Rush Hour 2', 'The Notebook']
#rec(model_movies_df,watchlist)

rec(model_movies_df,['Nerve', 'The Notebook']) 

# nerve - mystery, adventure, crime ; based on novel, technology, internet, hacking, teenager, newyork city, game, adaption, dare,
# cellphone-video,based on yound adult novel, smartphone, taking a risk

#notebook - romance, drama; poem, river, sadness, fight, love of one's life, dementia, class, secret love, tears, candle, mail-box

#spider-man 2 - action, adventure, fantasy; dual identity, love of one's life, pizza boy, marvel comic, sequal super hero, doctor, scientist,
# tenticle, death, super-villain


#spider-man 3 - fantasy, action, adventure - [1,1,7] - 9
#dragon blade - action, drama, adventure - [1,1,2] - 4
#winter's tale - drama, fantasy, mystery, romance -[2,2,1] - 5
#mississippi mermaid- crime, drama, romance - [1,3,1] - 5
#The Mortal Instruments: City of Bones - action, adv, drama, mystery, romance, fantasy - [3,2,3] - 8
#Beyond Borders - drama, romance, adventure, war - [1,3,2] - 6
#Hackers- action, comedy - [4,1,1] - 6
#wicker park - drama, mystery, romance, thriller - [1,3,1] - 5
#young sherlock holmes - action, adventure, crime, drama, family, mystery, thriller - [3,1,2] - 6
#beastly - drama, fantasy, romance - [3,2,1] - 6
#The Three Burials of Melquiades Estrada - adventure, crime, mystery, drama, western - [3,1,1] - 5
#In the Name of the King III - action, adventure, drama, fantasy - [1,1,3] - 5



[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\User\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


['Action', 'Adventure', 'Fantasy', 'ScienceFiction', 'cultureclash', 'future', 'spacewar', 'spacecolony', 'society', 'spacetravel', 'futuristic', 'romance', 'space', 'alien', 'tribe', 'alienplanet', 'cgi', 'marine', 'soldier', 'battle', 'loveaffair', 'antiwar', 'powerrelations', 'mindandsoul', '3d']
   movie_id                                     title  \
0     19995                                    Avatar   
1       285  Pirates of the Caribbean: At World's End   
2    206647                                   Spectre   
3     49026                     The Dark Knight Rises   
4     49529                               John Carter   

                                                tags  vote_average  popularity  
0  [Action, Adventure, Fantasy, ScienceFiction, c...           7.2  150.437577  
1  [Adventure, Fantasy, Action, ocean, drugabuse,...           6.9  139.082615  
2  [Action, Adventure, Crime, spy, basedonnovel, ...           6.3  107.376788  
3  [Action, Crime, Drama, Thrill

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['tags'] = df['tags'].apply(lambda x: " ".join(x))
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['tags'] = df['tags'].apply(lambda x: x.lower())
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['tags'] = df['tags'].apply(stem)


['Action', 'Adventure', 'Fantasy', 'ScienceFiction', 'cultureclash', 'future', 'spacewar', 'spacecolony', 'society', 'spacetravel', 'futuristic', 'romance', 'space', 'alien', 'tribe', 'alienplanet', 'cgi', 'marine', 'soldier', 'battle', 'loveaffair', 'antiwar', 'powerrelations', 'mindandsoul', '3d']
   movie_id                                     title  \
0     19995                                    Avatar   
1       285  Pirates of the Caribbean: At World's End   
2    206647                                   Spectre   
3     49026                     The Dark Knight Rises   
4     49529                               John Carter   

                                                tags  vote_average  popularity  
0  [Action, Adventure, Fantasy, ScienceFiction, c...           7.2  150.437577  
1  [Adventure, Fantasy, Action, ocean, drugabuse,...           6.9  139.082615  
2  [Action, Adventure, Crime, spy, basedonnovel, ...           6.3  107.376788  
3  [Action, Crime, Drama, Thrill

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['tags'] = df['tags'].apply(lambda x: " ".join(x))
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['tags'] = df['tags'].apply(lambda x: x.lower())
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['tags'] = df['tags'].apply(stem)
A value is trying to be set on a copy of a slice from a DataFrame

[['The Fault in Our Stars', 7.6], ['Brigham City', 7.3], ['The Girl with the Dragon Tattoo', 7.2], ['The Three Burials of Melquiades Estrada', 7.0], ['Red Riding: In the Year of Our Lord 1974', 7.0], ['Beyond Borders', 6.7], ['Wicker Park', 6.7], ['Young Sherlock Holmes', 6.7], ['Mississippi Mermaid', 6.7], ['Kill the Messenger', 6.6], ['Inherent Vice', 6.5], ['The Mortal Instruments: City of Bones', 6.2], ['Hackers', 6.2], ['A Walk Among the Tombstones', 6.2], ['Secret in Their Eyes', 6.2], ['Paper Towns', 6.1], ["Winter's Tale", 6.0], ['Beastly', 6.0], ['City By The Sea', 5.7], ['Slow Burn', 5.5], ['Broken Horses', 5.0]]


# Trail and Error Zone

In [None]:
l1= [3,2,3,4,2,4,1,2,10]
l1.extend([1,2,3,4])
l1.extend([5,6,7,8])
l1

[3, 2, 3, 4, 2, 4, 1, 2, 10, 1, 2, 3, 4, 5, 6, 7, 8]

In [None]:
model_movies_df.iloc[[1,2,3]]

Unnamed: 0,movie_id,title,overview,genres,keywords,cast,crew,vote_average,tags
1,285,Pirates of the Caribbean: At World's End,"[Captain, Barbossa,, long, believed, to, be, d...","[Adventure, Fantasy, Action]","[ocean, drugabuse, exoticisland, eastindiatrad...","[JohnnyDepp, OrlandoBloom, KeiraKnightley]",[GoreVerbinski],6.9,"[Captain, Barbossa,, long, believed, to, be, d..."
2,206647,Spectre,"[A, cryptic, message, from, Bond’s, past, send...","[Action, Adventure, Crime]","[spy, basedonnovel, secretagent, sequel, mi6, ...","[DanielCraig, ChristophWaltz, LéaSeydoux]",[SamMendes],6.3,"[A, cryptic, message, from, Bond’s, past, send..."
3,49026,The Dark Knight Rises,"[Following, the, death, of, District, Attorney...","[Action, Crime, Drama, Thriller]","[dccomics, crimefighter, terrorist, secretiden...","[ChristianBale, MichaelCaine, GaryOldman]",[ChristopherNolan],7.6,"[Following, the, death, of, District, Attorney..."


In [None]:
model_movies_df.iloc[2173]

movie_id                                                   328387
title                                                       Nerve
overview        [Industrious, high, school, senior,, Vee, Delm...
genres                                [Mystery, Adventure, Crime]
keywords        [basedonnovel, technology, internet, hacking, ...
cast                        [EmmaRoberts, DaveFranco, EmilyMeade]
crew                                                 [HenryJoost]
vote_average                                                  7.1
tags            [Industrious, high, school, senior,, Vee, Delm...
Name: 2173, dtype: object

In [None]:
model_movies_df.loc[1].title

"Pirates of the Caribbean: At World's End"

In [None]:
print(model_movies_df[model_movies_df['title']=='Nerve'])

      movie_id  title  ...          crew vote_average
2173    328387  Nerve  ...  [HenryJoost]          7.1

[1 rows x 8 columns]


In [None]:
savedf = model_movies_df.loc[model_movies_df['title']=='Nerve']

In [None]:
indexx = savedf.index

for i in savedf.loc[indexx].title:
  print(i,"\n")

Nerve 



####https://www.geeksforgeeks.org/python-pandas-series-item/

In [None]:
indexx = savedf.index
savedf.loc[indexx].title.item()

'Nerve'

In [None]:
indexx = savedf.index
somelist=[]
for i in savedf.loc[indexx].title:
  somelist.append(i)

print(somelist)

['Nerve']


In [None]:
model_movies_df.get_value(model_movies_df['title']=='Nerve','title')

AttributeError: ignored

In [None]:
model_movies_df[model_movies_df['title']=='Nerve']['title']

("('2173', ' ', '   Nerve\\nName: title, dtype: object')", '', '')

In [None]:
stem(" ".join(model_movies_df.iloc[2173]['tags']))


'industri high school senior, vee delmonico, ha had it with live life on the sidelines. when pressur by friend to join the popular onlin game nerve, vee decid to sign up for just one dare in what seem like harmless fun. but as she find herself caught up in the thrill of the adrenaline-fuel competit partner with a mysteri stranger, the game begin to take a sinist turn with increasingli danger acts, lead her into a high stake final that will determin her entir future. mysteri adventur crime basedonnovel technolog internet hack teenag newyorkc game adapt dare cellphonevideo basedonyoungadultnovel smartphon takingarisk emmarobert davefranco emilymead henryjoost'

In [None]:
model_movies_df[model_movies_df.index[0].isin([4425,89,183,4734])]

NameError: ignored

In [None]:
new =[]
new.append(model_movies_df[model_movies_df['movie_id'] == 2662].title)

In [None]:
for i in new:
  print(i)

3365    House of 1000 Corpses
Name: title, dtype: object


In [None]:
firstlevellist = []
for i in ['Nerve','The Notebook']: 
  firstlevellist.extend(singlemovie(model_movies_df,i))


print(firstlevellist)
print(firstlevellist[0])

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  import sys
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


[[101179, 0.27431885848479565, 5.7], [80215, 0.24823978809252792, 7.4], [82690, 0.23932844901698533, 7.1], [101299, 0.22606462930422794, 7.4], [64688, 0.22233428588149948, 6.7], [18501, 0.2084200422875625, 5.6], [51130, 0.19215378456610457, 7.0], [157386, 0.1894661866862684, 6.8], [10069, 0.1889072881627786, 5.3]]


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  import sys
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


[[4251, 0.30151134457776363, 7.4], [51384, 0.29268470350248177, 6.2], [293863, 0.29012942659282975, 7.4], [3060, 0.2849014411490949, 7.0], [1651, 0.2829125760578691, 6.7], [14608, 0.2825289656088067, 6.4], [41469, 0.26382242650554316, 6.0], [11889, 0.2611164839335468, 6.2], [597, 0.2528558164964056, 7.5]]
[[101179, 0.27, 5.7], [80215, 0.25, 7.4], [82690, 0.24, 7.1], [101299, 0.23, 7.4], [64688, 0.22, 6.7], [18501, 0.21, 5.6], [51130, 0.19, 7.0], [157386, 0.19, 6.8], [10069, 0.19, 5.3], [4251, 0.3, 7.4], [293863, 0.29, 7.4], [51384, 0.29, 6.2], [3060, 0.28, 7.0], [1651, 0.28, 6.7], [14608, 0.28, 6.4], [11889, 0.26, 6.2], [41469, 0.26, 6.0], [597, 0.25, 7.5]]
[101179, 0.27, 5.7]


In [None]:
import nltk
from nltk.stem.porter import PorterStemmer
psobj = PorterStemmer()
def stem(lst):
  temp = []
  for i in lst:
    temp.append(psobj.stem(i))
  return " ".join(temp)


df = model_movies_df.head(3)
print(df['tags'].apply(stem))


0    In the 22nd century, a parapleg marin is dispa...
1    captain barbossa, long believ to be dead, ha c...
2    A cryptic messag from bond’ past send him on a...
Name: tags, dtype: object


In [None]:
import nltk
from nltk.stem.porter import PorterStemmer
psobj = PorterStemmer()
def stem(text):
  temp = []
  for i in text.split():
    temp.append(psobj.stem(i))
  return " ".join(temp) 


df1 = model_movies_df.head(3)


df1['tags'] = [['this','is','tag1'],['this','is','tag2'],['this','is','tag3']]
print(df1)

#print("Type: ", type(df.tags))

def functolist(lst):
  return " ".join(lst)

df1['tags'] = df1['tags'].apply(functolist)
#print(df1)
df1['tags'] = df1['tags'].apply(stem)
print(df1)

#print(df.head().tags)")

   movie_id  ...              tags
0     19995  ...  [this, is, tag1]
1       285  ...  [this, is, tag2]
2    206647  ...  [this, is, tag3]

[3 rows x 9 columns]
   movie_id  ...         tags
0     19995  ...  thi is tag1
1       285  ...  thi is tag2
2    206647  ...  thi is tag3

[3 rows x 9 columns]


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


In [None]:
model_movies_df

Unnamed: 0,movie_id,title,overview,genres,keywords,cast,crew,vote_average
0,19995,Avatar,"[In, the, 22nd, century,, a, paraplegic, Marin...","[Action, Adventure, Fantasy, ScienceFiction]","[cultureclash, future, spacewar, spacecolony, ...","[SamWorthington, ZoeSaldana, SigourneyWeaver]",[JamesCameron],7.2
1,285,Pirates of the Caribbean: At World's End,"[Captain, Barbossa,, long, believed, to, be, d...","[Adventure, Fantasy, Action]","[ocean, drugabuse, exoticisland, eastindiatrad...","[JohnnyDepp, OrlandoBloom, KeiraKnightley]",[GoreVerbinski],6.9
2,206647,Spectre,"[A, cryptic, message, from, Bond’s, past, send...","[Action, Adventure, Crime]","[spy, basedonnovel, secretagent, sequel, mi6, ...","[DanielCraig, ChristophWaltz, LéaSeydoux]",[SamMendes],6.3
3,49026,The Dark Knight Rises,"[Following, the, death, of, District, Attorney...","[Action, Crime, Drama, Thriller]","[dccomics, crimefighter, terrorist, secretiden...","[ChristianBale, MichaelCaine, GaryOldman]",[ChristopherNolan],7.6
4,49529,John Carter,"[John, Carter, is, a, war-weary,, former, mili...","[Action, Adventure, ScienceFiction]","[basedonnovel, mars, medallion, spacetravel, p...","[TaylorKitsch, LynnCollins, SamanthaMorton]",[AndrewStanton],6.1
...,...,...,...,...,...,...,...,...
4804,9367,El Mariachi,"[El, Mariachi, just, wants, to, play, his, gui...","[Action, Crime, Thriller]","[unitedstates–mexicobarrier, legs, arms, paper...","[CarlosGallardo, JaimedeHoyos, PeterMarquardt]",[RobertRodriguez],6.6
4805,72766,Newlyweds,"[A, newlywed, couple's, honeymoon, is, upended...","[Comedy, Romance]",[],"[EdwardBurns, KerryBishé, MarshaDietlein]",[EdwardBurns],5.9
4806,231617,"Signed, Sealed, Delivered","[""Signed,, Sealed,, Delivered"", introduces, a,...","[Comedy, Drama, Romance, TVMovie]","[date, loveatfirstsight, narration, investigat...","[EricMabius, KristinBooth, CrystalLowe]",[ScottSmith],7.0
4807,126186,Shanghai Calling,"[When, ambitious, New, York, attorney, Sam, is...",[],[],"[DanielHenney, ElizaCoupe, BillPaxton]",[DanielHsia],5.7


In [None]:
!pip install wordnet


Collecting wordnet
  Downloading wordnet-0.0.1b2.tar.gz (8.8 kB)
Collecting colorama==0.3.9
  Downloading colorama-0.3.9-py2.py3-none-any.whl (20 kB)
Building wheels for collected packages: wordnet
  Building wheel for wordnet (setup.py) ... [?25l[?25hdone
  Created wheel for wordnet: filename=wordnet-0.0.1b2-py3-none-any.whl size=10520 sha256=25fe168f7e6aa4999e374f76316d001a8bfca4e6761ad27c346f234d6bbf2f45
  Stored in directory: /root/.cache/pip/wheels/8c/6d/01/fbf1762cd6428569ea48474660f9cfe029307cd6c0f2abcdec
Successfully built wordnet
Installing collected packages: colorama, wordnet
Successfully installed colorama-0.3.9 wordnet-0.0.1b2


In [None]:
import nltk

In [None]:
from nltk.corpus import wordnet 
nltk.download('wordnet')

[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


True

In [None]:
wordnet.synsets('action')

[Synset('action.n.01'),
 Synset('action.n.02'),
 Synset('military_action.n.01'),
 Synset('natural_process.n.01'),
 Synset('action.n.05'),
 Synset('action.n.06'),
 Synset('action.n.07'),
 Synset('legal_action.n.01'),
 Synset('action.n.09'),
 Synset('action.n.10'),
 Synset('action.v.01'),
 Synset('carry_through.v.01')]

In [None]:
nltk.download('wordnet')


def synn(word):
  synonyms = []
  for syn in wordnet.synsets(word):
    for l in syn.lemmas():
      synonyms.append(l.name())
  return synonyms

synnn = synn("1980s")
print(synnn)

[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
['eighties', '1980s']


In [None]:
wordnet.synsets('town')

[Synset('town.n.01'),
 Synset('town.n.02'),
 Synset('township.n.01'),
 Synset('town.n.04')]

In [None]:
townsyn = synn('town')
print(townsyn)

['town', 'town', 'townspeople', 'townsfolk', 'township', 'town', 'Town', 'Ithiel_Town']


In [None]:
synn('girl')

['girl',
 'miss',
 'missy',
 'young_lady',
 'young_woman',
 'fille',
 'female_child',
 'girl',
 'little_girl',
 'daughter',
 'girl',
 'girlfriend',
 'girl',
 'lady_friend',
 'girl']

In [None]:
for indicer in range(len(model_movies_df.head(3))):
  print(model_movies_df.iloc[[indicer]])

   movie_id  ...                                               tags
0     19995  ...  [In, the, 22nd, century,, a, paraplegic, Marin...

[1 rows x 9 columns]
   movie_id  ...                                               tags
1       285  ...  [Captain, Barbossa,, long, believed, to, be, d...

[1 rows x 9 columns]
   movie_id  ...                                               tags
2    206647  ...  [A, cryptic, message, from, Bond’s, past, send...

[1 rows x 9 columns]


In [None]:
print(model_movies_df.iloc[[0]]['tags'])
print(type(model_movies_df.iloc[[0]]['tags']))

0    [In, the, 22nd, century,, a, paraplegic, Marin...
Name: tags, dtype: object
<class 'pandas.core.series.Series'>


In [None]:
#creating a list of numpy array objects? 
numpyobjlist = [] 
import numpy as np 

for i in range(3):
  arrayy = np.full((2,3),1)
  numpyobjlist.append(arrayy)

for i in range(3):
  print(numpyobjlist[i])
  print("\n\n")

[[1 1 1]
 [1 1 1]]



[[1 1 1]
 [1 1 1]]



[[1 1 1]
 [1 1 1]]





In [None]:
newinput = []
inputt = "A small town girl is caught between dead-end jobs. A high-profile, successful man becomes wheelchair bound following an accident. The man decides his life is not worth living until the girl is hired for six months to be his new caretaker. Worlds apart and trapped together by circumstance, the two get off to a rocky start. But the girl becomes determined to prove to the man that life is worth living and as they embark on a series of adventures together, each finds their world changing in ways neither of them could begin to imagine. Drama, Romance. England based on novel depression small town wheelchair caretaker caregiver disabled twenty something woman director romantic drama dead end job accident"
from nltk.corpus import stopwords
nltk.download('stopwords')
nltk.download('punkt')
from nltk.tokenize import word_tokenize
text_tokens = word_tokenize(inputt)

all_stopwords = stopwords.words('english')
all_stopwords.extend(['a','A','an','An','the','The'])

newinput = [x for x in text_tokens if x not in all_stopwords]


print(stem(" ".join(newinput)))


[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
small town girl caught dead-end job . high-profil , success man becom wheelchair bound follow accid . man decid life worth live girl hire six month new caretak . world apart trap togeth circumst , two get rocki start . but girl becom determin prove man life worth live embark seri adventur togeth , find world chang way neither could begin imagin . drama , romanc . england base novel depress small town wheelchair caretak caregiv disabl twenti someth woman director romant drama dead end job accid


In [None]:
'''
Rush Hour 3 - action, comedy, crime, thriller
Rush Hour 2 - action, comedy, crime, thriller
Ganster Squad - crime, drama, action, thriller
Edge of Darkness - crime, drama, mystery, thriller
The International - drama, thriller, crime
Run All Night - action, crime, drama, mystery, thriller
Dream House - drama, thriller, mystery
The Adventures of Ford Fairlane - action, comedy, thriller, crime, mystery
The Bone Collector - drama, mystery, thriller, crime
The Pledge - drama, mystery, thriller, crime
Kiss of Death - action, crime, drama, thriller
Midnight in the Garden of Good and Evil - crime, drama, mystery, thriller
Blood Ties - thriller, crime, drama
Faster - crime, drama, action, thriller
Zulu - crime, drama, thriller
Slow Burn - mystery, crime, drama, thriller
The Son of No One - drama, thriller, crime
Broken Horses - thriller, mystery, drama, crime
The Caveman's Valentine - drama, mystery, thriller
Stoker - drama, horror, thriller
The Life Before Her Eyes - thriller, drama, mystery
Trash - adventure, crime, drama, thriller
Raising Cain - drama, horror, thriller, crime
Red Riding: In the Year of Our Lord 1974 - thriller, mystery, crime, drama
Clay Pigeons - comedy, crime, drama, thriller
Betty Fisher and Other Stories - drama, thriller, crime, foreign
In Too Deep - drama, action, thriller, crime
Goddess of Love - mystery, drama, thriller, horror
Novocaine - comedy, crime, thriller
Zero Effect - comedy, crime, mystery, thriller
Kill the Messenger - thiller, crime, drama, mystery
Easy Money - drama, thriller, crime
Harper - action, drama, thriller, crime, mystery
Freeze Frame - thriller, drama, crime
Nine Dead - crime, drama, horror, thriller
N-secure - drama, crime, thriller
The Blue Room - drama, crime, thriller
Brigham City - crime, drama, mystery, thriller
The Sound and The Shadow - thriller, comedy, mystery
Adulterers - thriller, crime, drama
Dutch Kills - thriller, crime, drama
Compliance - thriller, crime, drama
'''

In [7]:

#retrieve the tags(genre, overview, keywords) of all the movies and combine into one
#then we perform the stemming of the tags

import nltk
from nltk.corpus import wordnet
nltk.download('wordnet')

def synonym(word):
  synonyms = []
  for syn in wordnet.synsets(word):
    for l in syn.lemmas():
      synonyms.append(l.name())
  return synonyms

from nltk.stem.porter import PorterStemmer
psobj = PorterStemmer()
def stem(text):
  temp = []
  for i in text.split():
      temp.append(psobj.stem(i))
  return " ".join(temp)


def functolist(lst):
  return " ".join(lst)


def rec(df,multiplemovielist):
  flag='multiple'
  firstlevellist = []
  for i in multiplemovielist: 
    firstlevellist.extend(singlemovie(model_movies_df,i,40,flag))
  movie_id_of_users_input_list = []   #movie_ids of the movies entered by the user
  tag_of_users_input_list = []
  for movie in multiplemovielist:
    idofmovie = df[df['title']==movie]['movie_id'].item()
    movie_id_of_users_input_list.append(idofmovie)
    tag_of_users_input_list.append(stem(" ".join(df[df['movie_id']==idofmovie]['tags'].item())))   
  movie_id_of_firstlevellist_movies = []
  for i in firstlevellist:
    movie_id_of_firstlevellist_movies.append(i[0])
  new_df = df[ df['movie_id'].isin(movie_id_of_firstlevellist_movies)] #indices of the firstlevellist.
  new_df['tags'] = new_df['tags'].apply(functolist)
  new_df['tags'] = new_df['tags'].apply(stem)

  counterobjlist = []
  newmovielist = []

  for indicer in range(len(new_df)):     #for our example of two movies: 78 records, i.e 0 to 77
    counters = np.zeros((len(tag_of_users_input_list),1))
    for i in new_df.iloc[indicer]['tags'].split(): #for each word in every movie's tag (movies from new_df)
      for j in range(len(tag_of_users_input_list)):    
        #the above for loop is to check if any word of the current movie's tag, 
        #belongs to the tags of the movies entered by the user.
        if (stem(i) in tag_of_users_input_list[j].split()) or (stem(" ".join(synonym(i))) in tag_of_users_input_list[j].split()):
          counters[j]+=1

    if all(x>=1 for x in counters):
      newmovielist.append([new_df.iloc[indicer]['title'], new_df.iloc[indicer]['vote_average']])
      counterobjlist.append(counters)


  return sorted(newmovielist, reverse=True, key= lambda x: x[1])

sorted_movie_list = rec(model_movies_df,['Urban Legend','Snatch','LOL','Devil']) 
print(sorted_movie_list)

[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\User\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['tags'] = df['tags'].apply(lambda x: " ".join(x))
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['tags'] = df['tags'].apply(lambda x: x.lower())


['Action', 'Adventure', 'Fantasy', 'ScienceFiction', 'cultureclash', 'future', 'spacewar', 'spacecolony', 'society', 'spacetravel', 'futuristic', 'romance', 'space', 'alien', 'tribe', 'alienplanet', 'cgi', 'marine', 'soldier', 'battle', 'loveaffair', 'antiwar', 'powerrelations', 'mindandsoul', '3d']
   movie_id                                     title  \
0     19995                                    Avatar   
1       285  Pirates of the Caribbean: At World's End   
2    206647                                   Spectre   
3     49026                     The Dark Knight Rises   
4     49529                               John Carter   

                                                tags  vote_average  popularity  
0  [Action, Adventure, Fantasy, ScienceFiction, c...           7.2  150.437577  
1  [Adventure, Fantasy, Action, ocean, drugabuse,...           6.9  139.082615  
2  [Action, Adventure, Crime, spy, basedonnovel, ...           6.3  107.376788  
3  [Action, Crime, Drama, Thrill

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['tags'] = df['tags'].apply(stem)


['Action', 'Adventure', 'Fantasy', 'ScienceFiction', 'cultureclash', 'future', 'spacewar', 'spacecolony', 'society', 'spacetravel', 'futuristic', 'romance', 'space', 'alien', 'tribe', 'alienplanet', 'cgi', 'marine', 'soldier', 'battle', 'loveaffair', 'antiwar', 'powerrelations', 'mindandsoul', '3d']
   movie_id                                     title  \
0     19995                                    Avatar   
1       285  Pirates of the Caribbean: At World's End   
2    206647                                   Spectre   
3     49026                     The Dark Knight Rises   
4     49529                               John Carter   

                                                tags  vote_average  popularity  
0  [Action, Adventure, Fantasy, ScienceFiction, c...           7.2  150.437577  
1  [Adventure, Fantasy, Action, ocean, drugabuse,...           6.9  139.082615  
2  [Action, Adventure, Crime, spy, basedonnovel, ...           6.3  107.376788  
3  [Action, Crime, Drama, Thrill

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['tags'] = df['tags'].apply(lambda x: " ".join(x))
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['tags'] = df['tags'].apply(lambda x: x.lower())
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['tags'] = df['tags'].apply(stem)


['Action', 'Adventure', 'Fantasy', 'ScienceFiction', 'cultureclash', 'future', 'spacewar', 'spacecolony', 'society', 'spacetravel', 'futuristic', 'romance', 'space', 'alien', 'tribe', 'alienplanet', 'cgi', 'marine', 'soldier', 'battle', 'loveaffair', 'antiwar', 'powerrelations', 'mindandsoul', '3d']
   movie_id                                     title  \
0     19995                                    Avatar   
1       285  Pirates of the Caribbean: At World's End   
2    206647                                   Spectre   
3     49026                     The Dark Knight Rises   
4     49529                               John Carter   

                                                tags  vote_average  popularity  
0  [Action, Adventure, Fantasy, ScienceFiction, c...           7.2  150.437577  
1  [Adventure, Fantasy, Action, ocean, drugabuse,...           6.9  139.082615  
2  [Action, Adventure, Crime, spy, basedonnovel, ...           6.3  107.376788  
3  [Action, Crime, Drama, Thrill

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['tags'] = df['tags'].apply(lambda x: " ".join(x))
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['tags'] = df['tags'].apply(lambda x: x.lower())
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['tags'] = df['tags'].apply(stem)


['Action', 'Adventure', 'Fantasy', 'ScienceFiction', 'cultureclash', 'future', 'spacewar', 'spacecolony', 'society', 'spacetravel', 'futuristic', 'romance', 'space', 'alien', 'tribe', 'alienplanet', 'cgi', 'marine', 'soldier', 'battle', 'loveaffair', 'antiwar', 'powerrelations', 'mindandsoul', '3d']
   movie_id                                     title  \
0     19995                                    Avatar   
1       285  Pirates of the Caribbean: At World's End   
2    206647                                   Spectre   
3     49026                     The Dark Knight Rises   
4     49529                               John Carter   

                                                tags  vote_average  popularity  
0  [Action, Adventure, Fantasy, ScienceFiction, c...           7.2  150.437577  
1  [Adventure, Fantasy, Action, ocean, drugabuse,...           6.9  139.082615  
2  [Action, Adventure, Crime, spy, basedonnovel, ...           6.3  107.376788  
3  [Action, Crime, Drama, Thrill

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['tags'] = df['tags'].apply(lambda x: " ".join(x))
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['tags'] = df['tags'].apply(lambda x: x.lower())
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['tags'] = df['tags'].apply(stem)
A value is trying to be set on a copy of a slice from a DataFrame

[['Brigham City', 7.3], ['Red Riding: In the Year of Our Lord 1974', 7.0], ['Betty Fisher and Other Stories', 7.0], ['Zulu', 6.7], ['Stoker', 6.5], ['Easy Money', 6.5], ['The Swindle', 6.4], ['Severance', 6.4], ['Compliance', 6.3], ['In Too Deep', 6.2], ['Goddess of Love', 6.2], ['Faster', 6.1], ['The International', 6.0], ['Blood Ties', 6.0], ['Raising Cain', 5.9], ['Some Guy Who Kills People', 5.7], ['I Know What You Did Last Summer', 5.6], ['White Noise 2: The Light', 5.6], ['The Whole Ten Yards', 5.5], ['Slow Burn', 5.5], ['Nine Dead', 5.2], ['Adulterers', 5.2], ['Broken Horses', 5.0], ['We Have Your Husband', 5.0], ['The Son of No One', 4.8], ['Thr3e', 4.8], ['N-Secure', 4.3], ['Amnesiac', 4.1], ['#Horror', 3.3], ['Windsor Drive', 2.0], ['The Sound and the Shadow', 0.0], ['Dutch Kills', 0.0], ['Sanctuary: Quite a Conundrum', 0.0]]
