# <center><span style='background:yellow'> Réalisez une application de recommandation de contenu</span></center>
# <center><span style='background:yellow'>Chaîne de traitements IA bout-en-bout pour Content-Based</span></center>

---




In [1]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


### Chaîne de traitements bout en bout

In [2]:
import numpy as np
import pandas as pd
from tqdm import tqdm
import os
from sklearn.metrics.pairwise import cosine_similarity

data_path = "/content/drive/MyDrive/projet9/data/news-portal-user-interactions-by-globocom/"

###############################################################################################
###############################################################################################

# lecture des fichiers embeddings et metadata
def read_data(embeddings_filename, metadata_filename):
  print("Reading articles embeddings and metadata files ...")
  articles_embeddings = pd.read_pickle(data_path + embeddings_filename)
  print("Embeddings matrix shape", articles_embeddings.shape)
  article_metadata = pd.read_csv(data_path + metadata_filename)
  print("Articles metadata shape", article_metadata.shape)
  return articles_embeddings, article_metadata


###############################################################################################
###############################################################################################

# Lecture et merge des fichiers clicks 
def merge_clicks_data():
  print("Reading and merging clicks files ...")
  clicks_path = data_path + 'clicks/clicks/'
  clicks_dir = os.listdir(clicks_path)
  print("Files number :", len(clicks_dir))

  all_clicks_df = pd.DataFrame([])
  for filename in clicks_dir:
    df = pd.read_csv(clicks_path + filename)
    all_clicks_df = pd.concat([all_clicks_df,df])
  
  print("All users clicks shape", all_clicks_df.shape)
    
  return all_clicks_df

###############################################################################################
###############################################################################################

# foncion qui retourne les recommandations pour un user_id donné
def content_based_filtering(user_id, embeddings_matrix, articles_metadata, users_data, n_recommendations = 5):
  print("Preparing recommendations ...")
  # Définir les articles que le user a déja lu
  user_id_clicked_articles = users_data.filter(items = ['user_id', 'click_article_id']).query(f'user_id == {user_id}')
  # Choisir un article au hasard 
  user_choised_article_id = np.random.choice(user_id_clicked_articles['click_article_id'], size=1, replace=False)[0]

  # Calcul de la similarité cosinus entre les préférences de l'utilisateur et les embeddings des éléments
  user_embeddings_row = embeddings_matrix[user_choised_article_id, :].reshape(1,-1)
  new_embedding_matrix = np.delete(embeddings_matrix, user_choised_article_id, axis = 0)
  print("Cosine similarity ...")
  similarities = cosine_similarity(new_embedding_matrix, user_embeddings_row).reshape(1,-1)[0]

  # Récupération des indices des éléments les plus similaires
  sorted_similarities = np.sort(similarities)[::-1][:n_recommendations]
  top_indices = np.argsort(similarities)[::-1][:n_recommendations]
  recommandations_df = articles_metadata.iloc[top_indices, :-1]
    
  # Retourne les indices des éléments recommandés
  return top_indices

###############################################################################################
###############################################################################################

# la fonction final
def recommendations_function(user_id, embeddings_filename, metadata_filename, n_recommendations = 5):
  embeddings_matrix, articles_metadata = read_data(embeddings_filename, metadata_filename)
  users_data = merge_clicks_data()
  recommendations = content_based_filtering(user_id, 
                                            embeddings_matrix=embeddings_matrix, 
                                            articles_metadata=articles_metadata, 
                                            users_data=users_data, 
                                            n_recommendations=n_recommendations)
  return recommendations

### Test de la fonction

In [3]:
from ipywidgets import interact, interactive, fixed, interact_manual
import ipywidgets as widgets

@interact(user_id=widgets.IntSlider(min=0, max= 322_896, value = 160_000), n_recommendations = fixed(5))

def recommendations_function(user_id, embeddings_filename= "articles_embeddings.pickle", metadata_filename="articles_metadata.csv", n_recommendations = 5):
  embeddings_matrix, articles_metadata = read_data(embeddings_filename, metadata_filename)
  users_data = merge_clicks_data()
  recommendations = content_based_filtering(user_id, 
                                            embeddings_matrix=embeddings_matrix, 
                                            articles_metadata=articles_metadata,
                                            users_data = users_data, 
                                            n_recommendations=n_recommendations)
  
  recommandations_df = articles_metadata.iloc[recommendations, :-1]

  return recommandations_df

interactive(children=(IntSlider(value=160000, description='user_id', max=322896), Text(value='articles_embeddi…