In [14]:
#MC906 - Introducao a Inteligencia Artificial
#Construindo um sistema de recomendacao de filmes

#Recomendacao baseada em conteudo

import pandas
from pandas.plotting import scatter_matrix
from sklearn.feature_extraction.text import TfidfVectorizer
import matplotlib.pyplot as plt
from sklearn.metrics.pairwise import linear_kernel

movies = pandas.read_csv('ml-latest-small/movies.csv')
print(movies.head(10))

tf = TfidfVectorizer(analyzer='word',ngram_range=(1, 2),min_df=0, stop_words='english')
tfidf_matrix = tf.fit_transform(movies['genres'])

cosine_sim = linear_kernel(tfidf_matrix, tfidf_matrix)
cosine_sim

titles = movies['title']
indices = pandas.Series(movies.index, index=movies['title'])

#Funcao que calcula recomendacoes de filme com base no score de similaridade de cossenos entre generos
#OBS: ha filmes identicos com mais de uma entrada no dataset (ex: 'Saturn 3 (1980)'). Nesse caso, o metodo nao funciona
def genre_recommendations(title):
    idx = indices[title]
    sim_scores = list(enumerate(cosine_sim[idx]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    sim_scores = sim_scores[1:21]
    #print(sim_scores)
    movie_indices = [i[0] for i in sim_scores]
    return titles.iloc[movie_indices]

   movieId                               title  \
0        1                    Toy Story (1995)   
1        2                      Jumanji (1995)   
2        3             Grumpier Old Men (1995)   
3        4            Waiting to Exhale (1995)   
4        5  Father of the Bride Part II (1995)   
5        6                         Heat (1995)   
6        7                      Sabrina (1995)   
7        8                 Tom and Huck (1995)   
8        9                 Sudden Death (1995)   
9       10                    GoldenEye (1995)   

                                        genres  
0  Adventure|Animation|Children|Comedy|Fantasy  
1                   Adventure|Children|Fantasy  
2                               Comedy|Romance  
3                         Comedy|Drama|Romance  
4                                       Comedy  
5                        Action|Crime|Thriller  
6                               Comedy|Romance  
7                           Adventure|Children  
8       

In [11]:
#Testando a validade do recomendador com sequels
print(genre_recommendations('Free Willy (1993)').head(5)) #aventura, infantil, drama
print(genre_recommendations('Free Willy 2: The Adventure Home (1995)').head(5)) #aventura, infantil, drama 

print(genre_recommendations('Karate Kid, The (1984)').head(5)) #drama 
print(genre_recommendations('Karate Kid, Part II, The (1986)').head(5)) #acao, aventura, drama
print(genre_recommendations('Karate Kid, Part III, The (1989)').head(5)) #acao, aventura, infantil, drama
print(genre_recommendations('Next Karate Kid, The (1994)').head(5)) #acao, infantil, romance
print(genre_recommendations('Karate Kid, The (2010)').head(5)) #acao, infantil, drama

365                               Black Beauty (1994)
396                                 Free Willy (1993)
498                                      Andre (1994)
773     Homeward Bound: The Incredible Journey (1993)
1170                                     Buddy (1997)
Name: title, dtype: object
365                               Black Beauty (1994)
396                                 Free Willy (1993)
498                                      Andre (1994)
773     Homeward Bound: The Incredible Journey (1993)
1170                                     Buddy (1997)
Name: title, dtype: object
25                     Othello (1995)
30             Dangerous Minds (1995)
36    Cry, the Beloved Country (1995)
39                 Restoration (1995)
50                     Georgia (1995)
Name: title, dtype: object
986                                  Ben-Hur (1959)
1331               Man in the Iron Mask, The (1998)
1488                 Poseidon Adventure, The (1972)
1494    Seven Samurai (Shichinin 

In [12]:
#Testando para diferentes generos
print(genre_recommendations('Mulan (1998)').head(5)) #aventura, animacao, infantil, comedia, drama, musical, romance
print(genre_recommendations('Life Is Beautiful (La Vita è bella) (1997)').head(5)) #drama, romance, guerra, comedia
print(genre_recommendations('Sound of Music, The (1965)').head(5)) #musical, romance
print(genre_recommendations('Shining, The (1980)').head(5)) #terror
print(genre_recommendations('Paranormal Activity 3 (2011)').head(5)) #terror
print(genre_recommendations('10 Things I Hate About You (1999)').head(5)) #comedia, romance

6230                         High School Musical (2006)
5160                                     Shrek 2 (2004)
8349    Ernest & Célestine (Ernest et Célestine) (2012)
44                                    Pocahontas (1995)
618                 Hunchback of Notre Dame, The (1996)
Name: title, dtype: object
1730           Life Is Beautiful (La Vita è bella) (1997)
2262                  Train of Life (Train de vie) (1998)
6296    Tiger and the Snow, The (La tigre e la neve) (...
6624    I Served the King of England (Obsluhoval jsem ...
346                            Colonel Chabert, Le (1994)
Name: title, dtype: object
792                            Sound of Music, The (1965)
4405                          From Justin to Kelly (2003)
5432                                 Easter Parade (1948)
168     Umbrellas of Cherbourg, The (Parapluies de Che...
827                                  Dirty Dancing (1987)
Name: title, dtype: object
188                                  Castle Freak (1995)
59

In [13]:
#Personalizando para um usuario
ratings = pandas.read_csv('ml-latest-small/ratings.csv')
print("Informe ID do usuario a quem se deseja fazer recomendacao (numero de 1 a 610): ")
user = int(input())

#Seleciona classificacoes em que o usuario deu 5 estrelas
ratings_by_user = ratings['userId'] == user 
ratings_by_user = ratings[ratings_by_user]
fivestar_ratings_by_user = ratings_by_user['rating'] == 5.0
fivestar_ratings_by_user = ratings_by_user[fivestar_ratings_by_user]

#para cada filme classificado com 5 estrelas, o recomendador fornece uma lista de 5 filmes similares
for row in range(fivestar_ratings_by_user.shape[0]): 
	movie = fivestar_ratings_by_user.iat[row, 1] #id do filme cujo titulo queremos extrair
	pos = movies.index[movies['movieId'] == movie].tolist()[0] #posicao na linha de movies.csv onde esse id se encontra
	name = movies.iat[pos, 1] #titulo do filme
	print("Porque o usuario de ID " + str(user) + " gostou de " + name + ", recomendamos tambem:")
	print(genre_recommendations(name).head(5))
	print("\n")

Informe ID do usuario a quem se deseja fazer recomendacao (numero de 1 a 610): 
2
Porque o usuario de ID 2 gostou de Step Brothers (2008), recomendamos tambem:
17                        Four Rooms (1995)
18    Ace Ventura: When Nature Calls (1995)
58                          Bio-Dome (1996)
61                            Friday (1995)
79                       Black Sheep (1996)
Name: title, dtype: object


Porque o usuario de ID 2 gostou de Inside Job (2010), recomendamos tambem:
87     Heidi Fleiss: Hollywood Madam (1995)
96                           Catwalk (1996)
101            Anne Frank Remembered (1995)
110                   Jupiter's Wife (1994)
114                  Man of the Year (1995)
Name: title, dtype: object


Porque o usuario de ID 2 gostou de Warrior (2011), recomendamos tambem:
25                     Othello (1995)
30             Dangerous Minds (1995)
36    Cry, the Beloved Country (1995)
39                 Restoration (1995)
50                     Georgia (1995)
Name: