# Cap. 6 - Análise Prescritiva

## Sistemas de Recomendação

Kaggle: https://www.kaggle.com/datasets/harshitshankhdhar/imdb-dataset-of-top-1000-movies-and-tv-shows/data

In [1]:
# Importações
import pandas as pd
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.neighbors import NearestNeighbors

In [2]:
# Coleta e pré-processamento
def prepare_data():
    imdb = pd.read_csv('imdb_top_1000.csv')
    
    # Filtrando os dados
    keep_cols = ['Genre', 'Director', 'Star1', 'Star2']
    imdb_filtered = imdb.loc[:,keep_cols]

    # Tratanto variáveis numéricas
    numeric_pipe = Pipeline([('scaler', StandardScaler())])

    # Tratando variáveis categóricas
    categorical_pipe = Pipeline([('encoder', OneHotEncoder(drop = 'first'))])

    col_transf = ColumnTransformer([
        ('numeric', numeric_pipe, imdb_filtered._get_numeric_data().columns.tolist()),
        ('categoric', categorical_pipe, imdb_filtered.select_dtypes('object').columns.tolist()) ])

    col_transf_fit = col_transf.fit(imdb_filtered)
    imdb_filtered_transf = col_transf_fit.transform(imdb_filtered)

    return imdb, imdb_filtered_transf

In [3]:
# Recomendação de filme
def get_movie_recommendations(movie_name, imdb, imdb_filtered_transf):
    # Buscando o filme
    movie_idx = imdb[imdb["Series_Title"].str.lower().str.contains(movie_name.lower())].index[0]
    
    n_neighbors = 5
    nneighbors = NearestNeighbors(n_neighbors=n_neighbors, metric='cosine').fit(imdb_filtered_transf)
    
    movie_data = imdb_filtered_transf[movie_idx].toarray().reshape(1, -1)
    _, ind = nneighbors.kneighbors(movie_data)

    # Recomendações
    recommended_movies = imdb.loc[ind[0][1:], :]
    return recommended_movies


In [6]:
# Executando a aplicação
imdb, imdb_filtered_transf = prepare_data()

movie_name = input("Escreva o filme que você gostou: ")
recommendations = get_movie_recommendations(movie_name, imdb, imdb_filtered_transf)

print("\nFilme: ", imdb[imdb["Series_Title"].str.lower().str.contains(movie_name.lower())].Series_Title.iloc[0])
print("="*80)
print("Recomendações")
print("="*80)
recommendations[['Series_Title', 'Genre']]


Escreva o filme que você gostou: the incredibles

Filme:  The Incredibles
Recomendações


Unnamed: 0,Series_Title,Genre
891,Incredibles 2,"Animation, Action, Adventure"
389,The Iron Giant,"Animation, Action, Adventure"
58,Spider-Man: Into the Spider-Verse,"Animation, Action, Adventure"
367,Ratatouille,"Animation, Adventure, Comedy"
