In [1]:
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import MultiLabelBinarizer
import streamlit as st

In [3]:
df = pd.read_csv('movies.csv')
df

Unnamed: 0,title,movieId,director_name,actor_2_name,genres,actor_1_name,movie_title,actor_3_name,plot_keywords,movie_imdb_link
0,Lion of the Desert,5196,Moustapha Akkad,Rod Steiger,Biography|Drama|History|War,Oliver Reed,Lion of the Desert,John Gielgud,arab|general|libya|muslim|mussolini,http://www.imdb.com/title/tt0081059/?ref_=fn_t...
1,Commando,153496,Mark L. Lester,Vernon Wells,Action|Adventure|Thriller,Bill Duke,Commando,Rae Dawn Chong,body count|dictator|jumping from an airplane|k...,http://www.imdb.com/title/tt0088944/?ref_=fn_t...
2,Commando,6664,Mark L. Lester,Vernon Wells,Action|Adventure|Thriller,Bill Duke,Commando,Rae Dawn Chong,body count|dictator|jumping from an airplane|k...,http://www.imdb.com/title/tt0088944/?ref_=fn_t...
3,Commando,189687,Mark L. Lester,Vernon Wells,Action|Adventure|Thriller,Bill Duke,Commando,Rae Dawn Chong,body count|dictator|jumping from an airplane|k...,http://www.imdb.com/title/tt0088944/?ref_=fn_t...
4,The Thin Red Line,166237,Terrence Malick,Miranda Otto,Drama|War,Nick Stahl,The Thin Red Line,Dash Mihok,battle|hill|jungle|multiple perspectives|tropi...,http://www.imdb.com/title/tt0120863/?ref_=fn_t...
...,...,...,...,...,...,...,...,...,...,...
4612,Mars Needs Moms,85261,Simon Wells,Dan Fogler,Action|Adventure|Animation|Comedy|Family|Sci-Fi,Elisabeth Harnois,Mars Needs Moms,Tom Everett Scott,boy|martian|rescue|robot|sexism,http://www.imdb.com/title/tt1305591/?ref_=fn_t...
4613,Robots,32031,Chris Wedge,Drew Carey,Adventure|Animation|Comedy|Family|Sci-Fi,Jim Broadbent,Robots,Paula Abdul,actor voicing multiple characters|box office h...,http://www.imdb.com/title/tt0358082/?ref_=fn_t...
4614,Aqua Teen Hunger Force Colon Movie Film for Th...,52462,Matt Maiellaro,Fred Armisen,Action|Adventure|Animation|Comedy|Fantasy|Sci-Fi,Tina Fey,Aqua Teen Hunger Force Colon Movie Film for Th...,Chris Kattan,based on cult tv series|critically acclaimed|e...,http://www.imdb.com/title/tt0455326/?ref_=fn_t...
4615,Enchanted,56152,Kevin Lima,Teala Dunn,Animation|Comedy|Family|Fantasy|Musical|Romance,Jeff Bennett,Enchanted,Fred Tatasciore,fairy tale|lawyer|love|new york|prince,http://www.imdb.com/title/tt0461770/?ref_=fn_t...


In [4]:
df=pd.read_csv("ratings.csv")
df

Unnamed: 0,userId,movieId,rating,timestamp
0,18560,516,5.0,1145303288
1,122553,261,5.0,839352591
2,154994,266,5.0,941588514
3,135201,3594,5.0,1012191276
4,116535,1291,5.0,1359679804
...,...,...,...,...
13009875,136211,1196,3.5,1376254388
13009876,10570,112556,3.5,1494152642
13009877,115716,2990,3.5,1056487869
13009878,121473,3246,3.5,1497227343


In [None]:
## TRAINEMENT NETTOYAGE ET PROCESSING DES DONNEES

In [None]:
##  Chargement et preprocessing des données

In [2]:

def traitement():
    ratings_df = pd.read_csv('ratings.csv')
    movies_df = pd.read_csv('movies.csv')
       
    movies_df = movies_df.drop_duplicates(subset='movie_title', keep='first')
    movies_df['genres'] = movies_df['genres'].str.split('|')
    
    mlb = MultiLabelBinarizer()
    genres_encoded = pd.DataFrame(mlb.fit_transform(movies_df['genres']),
                                columns=mlb.classes_,
                                index=movies_df.index)
    
    ## on doit fusionner des caractéristiques dans un DataFrame

    features = pd.concat([
        genres_encoded,
        pd.get_dummies(movies_df['director_name']),
        pd.get_dummies(movies_df['actor_1_name']),
        pd.get_dummies(movies_df['actor_2_name']),
        pd.get_dummies(movies_df['actor_3_name'])
    ], axis=1)
    
    return ratings_df, movies_df, features

In [7]:
#### RECOMMANDATIONS COLLABORATIVES

In [8]:

def recos_collaboratives(movie_title, movies_df, ratings_df, n_recommandations=5):
    
    movie_id = movies_df[movies_df['movie_title'] == movie_title]['movieId'].iloc[0]
    
    # Création de la  matrice utilisateur-film
    ratings_matrix = pd.pivot_table(ratings_df, values='rating', 
                                  index='userId', columns='movieId', fill_value=0)
    
    # Calculer la similarité entre les films
    movie_similarity = cosine_similarity(ratings_matrix.T)
    movie_similarity_df = pd.DataFrame(movie_similarity, 
                                     index=ratings_matrix.columns,
                                     columns=ratings_matrix.columns)
    
    # Obtenir les films similaires
    similar_scores = movie_similarity_df[movie_id].sort_values(ascending=False)
    similar_movies = similar_scores.index[1:n_recommandations+1].tolist()
    
    recommandations = movies_df[movies_df['movieId'].isin(similar_movies)][['movie_title', 'genres']]
    return recommandations

In [None]:
## RECOMMANDATIONS SUR LE CONTENU

In [9]:
def recos_contenus(movie_title, movies_df, content_features, n_recommandations=5):
    
    # Trouver l'index du film sélectionné
    movie_idx = movies_df[movies_df['movie_title'] == movie_title].index[0]
    
    # Calculer la similarité basée sur le contenu
    similarity = cosine_similarity(content_features)
    similar_scores = pd.Series(similarity[movie_idx], index=movies_df.index)
    
    # Obtenir les films similaires
    similar_movie_indices = similar_scores.sort_values(ascending=False)[1:n_recommandations+1].index
    recommandations = movies_df.iloc[similar_movie_indices][['movie_title', 'genres']]
    return recommandations