# Application de recommendation de films

Vous devrez élaborer une application capable de retourner 5 recommandations de films similaires et intéressants pour l'utilisateur, à partir d’une requête d’un nom de film. 

In [60]:
# import des librairies 
import pandas as pd
import numpy as np 
import os
from dotenv import load_dotenv

# connection to bdd
from sqlalchemy import create_engine
from surprise import SVD
from surprise import Dataset
from surprise import Reader
from surprise.model_selection import train_test_split
from surprise import accuracy

In [80]:
# connexion à la bdd avec sqlalchemy
load_dotenv()

username = os.getenv("username")
password = os.getenv("password")
host = os.getenv("host")
port = os.getenv("port")
dbname = os.getenv("dbname")

connection_string = f"mysql+mysqlconnector://{username}:{password}@{host}:{port}/{dbname}"
engine = create_engine(connection_string)

In [81]:
df = pd.read_sql_query("SELECT * FROM input_table",
    engine)
display(df.tail(10))
print(df.shape)

Unnamed: 0,movie_title,actor_1_name,genres,imdb_score,director_name,duration,language,country
3645,Smiling Fish & Goat on Fire,Derick Martini,Comedy|Romance,7.6,Kevin Jordan,90.0,English,USA
3646,Clerks,Jason Mewes,Comedy,7.8,Kevin Smith,102.0,English,USA
3647,In the Company of Men,Stacy Edwards,Comedy|Drama,7.3,Neil LaBute,97.0,English,Canada
3648,Slacker,Tommy Pallotta,Comedy|Drama,7.1,Richard Linklater,100.0,English,USA
3649,Pink Flamingos,Divine,Comedy|Crime|Horror,6.1,John Waters,108.0,English,USA
3650,Clean,Maggie Cheung,Drama|Music|Romance,6.9,Olivier Assayas,110.0,French,France
3651,The Circle,Fereshteh Sadre Orafaiy,Drama,7.5,Jafar Panahi,90.0,Persian,Iran
3652,Primer,Shane Carruth,Drama|Sci-Fi|Thriller,7.0,Shane Carruth,77.0,English,USA
3653,El Mariachi,Carlos Gallardo,Action|Crime|Drama|Romance|Thriller,6.9,Robert Rodriguez,81.0,Spanish,USA
3654,My Date with Drew,John August,Documentary,6.6,Jon Gunn,90.0,English,USA


(3655, 8)


In [82]:
df.columns

Index(['movie_title', 'actor_1_name', 'genres', 'imdb_score', 'director_name',
       'duration', 'language', 'country'],
      dtype='object')

In [83]:
df.dtypes

movie_title       object
actor_1_name      object
genres            object
imdb_score       float64
director_name     object
duration         float64
language          object
country           object
dtype: object

In [84]:
df['movie_title'] = df['movie_title'].str.rstrip('\xa0')

In [85]:
features = input_movie[['actor_1_name', 'genres', 'imdb_score', 'director_name', 'duration', 'language', 'country']]
features.to_numpy()

array([['Christoph Waltz', 'Action|Adventure|Thriller', 6.8,
        'Sam Mendes', 148.0, 'English', 'UK']], dtype=object)

In [86]:
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.preprocessing import MinMaxScaler

In [91]:
def get_recommendations(input_title):
    input_movie = df[df['movie_title'] == input_title]
    
    features = input_movie[['actor_1_name', 'genres', 'imdb_score', 'director_name', 'duration', 'language', 'country']]
    
    tfidf = TfidfVectorizer()
    tfidf_matrix = tfidf.fit_transform(df['genres'] + " " + df['actor_1_name'] + " " + df['director_name'] + " " + df['language'] + " " + df['country'])
    
    scaler = MinMaxScaler()
    numerical_features = scaler.fit_transform(df[['imdb_score', 'duration']])
    
    combined_features = pd.concat([pd.DataFrame(tfidf_matrix.toarray()), pd.DataFrame(numerical_features)], axis=1)
    
    cosine_sim = cosine_similarity(combined_features, combined_features)
    
    idx = df.index[df['movie_title'] == input_title].tolist()[0]
    
    sim_scores = list(enumerate(cosine_sim[idx]))
    
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    
    sim_indices = [i[0] for i in sim_scores[1:6]]  
    
    return df['movie_title'].iloc[sim_indices]

In [92]:
# Appel de la fonction avec le titre du film en entrée
top_5_similar_movies = get_recommendations("Spectre")
print(top_5_similar_movies)

61      The Legend of Tarzan
29                   Skyfall
2769                Big Eyes
2327         American Beauty
434        Road to Perdition
Name: movie_title, dtype: object
