### Running Streamlit from Colab
+ streamlit
+ pyngrok

In [1]:
%%capture
!pip install streamlit
!pip install pyngrok==4.1.1

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
%%writefile app.py
import streamlit as st
import pandas as pd
import numpy as np
import webbrowser
import re
from sklearn.impute import KNNImputer

### Import des données 
### Les données sont stockés sur Google drive 

rating_imp = pd.read_csv('/content/drive/MyDrive/Data cinéma/rating_imp.csv')
movies_metadata_filtre = pd.read_csv('/content/drive/MyDrive/Data cinéma/movies_metadata_filtre.csv')
cosine_sim = pd.read_csv('/content/drive/MyDrive/Data cinéma/cosine_sim.csv')
cosine_sim = np.array(cosine_sim)
indices = pd.Series(movies_metadata_filtre.index, index=movies_metadata_filtre['title'])

info_film = movies_metadata_filtre.sort_values('popularity', ascending=False)[['movieId','title','release_date','vote_count', 'vote_average', 'popularity']]
selection = info_film[['title']]
st.title('Algorithme de recommandation de film')

film = st.multiselect('Choix des films que vous voulez noter', selection)

note = ['']*len(film)
movieId = ['']*len(film)
for i in range(len(film)):
  movieId[i] = info_film.copy().loc[info_film['title'] == film[i]]['movieId']
  note[i] = st.slider("Choisir la note du film : " + str(film[i]),1,5)

movieId = list(map(int, movieId))
note_ind = pd.DataFrame(np.transpose(np.array([movieId,note])), columns = ['movieId', 'rating_imp'])
note_ind[['userId']] = 0
note_ind = note_ind[['movieId','rating_imp','userId']]

def get_recommendations_imputation(note_ind):
  concat_rating = pd.concat([note_ind, rating_imp])
  concat_rating_pivot = concat_rating.pivot(index='userId', columns='movieId', values='rating_imp')
  imputer = KNNImputer(n_neighbors=20, weights="uniform")
  concat_rating_pivot_imp = pd.DataFrame(imputer.fit_transform(concat_rating_pivot))
  concat_rating_pivot_imp.columns = concat_rating_pivot.columns
  concat_rating_pivot_imp['userId'] = concat_rating_pivot.index
  concat_rating_imp = concat_rating_pivot_imp.melt(id_vars=['userId'], var_name='movieId', value_name='note_estim')
  concat_rating_imp = concat_rating_imp.copy()[concat_rating_imp['userId'] == 0]
  concat_rating_imp_fin=concat_rating.merge(concat_rating_imp,on=('userId','movieId'),how='outer')
  concat_rating_imp_fin=concat_rating_imp_fin.copy()[concat_rating_imp_fin['rating_imp'].isna()][['movieId','note_estim']]
  return concat_rating_imp_fin

def get_recommendations_text_minning(note_ind):
  title = note_ind.merge(info_film,on='movieId')['title']
  idx = indices[title]
  df = cosine_sim[idx]
  note = np.array(note_ind['rating_imp'])
  df=note@df/sum(note)
  sim_scores = list(enumerate(df))
  sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
  sim_scores = sim_scores[0:(500+len(idx))]
  movie_indices = [i[0] for i in sim_scores if i[0] not in list(idx)]
  # Return the top 10 most similar movies
  resultat = movies_metadata_filtre[['movieId']].iloc[movie_indices]
  resultat['similarity'] = df[movie_indices]
  return resultat

if(st.button("Lancer l'algo de recommandation : ")):
  concat_rating_imputation = get_recommendations_imputation(note_ind)
  concat_rating_text_minning = get_recommendations_text_minning(note_ind)

  base_hybride = concat_rating_text_minning.merge(concat_rating_imputation)
  base_hybride = base_hybride.assign(hybride=base_hybride['similarity']*base_hybride['note_estim']**2)

  film_N = note_ind.merge(info_film,on='movieId').sort_values("rating_imp",ascending=False)[['title','release_date','rating_imp','vote_count', 'vote_average', 'popularity']]
  film_M1 = concat_rating_imputation.merge(info_film,on='movieId').sort_values("note_estim",ascending=False)[['title','release_date','note_estim','vote_count', 'vote_average', 'popularity']]
  film_M2 = concat_rating_text_minning.merge(info_film,on='movieId').sort_values("similarity",ascending=False)[['title','release_date','similarity','vote_count', 'vote_average', 'popularity']]
  film_MH = base_hybride.merge(info_film,on='movieId').sort_values("hybride",ascending=False)[['title','release_date','hybride', 'note_estim','similarity']]

  url = "https://www.google.com/search?q="+re.sub(" ", "+", film_MH.head(1)['title'].to_string(index=False))
  webbrowser.open_new_tab(url)

  st.write('\ntop des films conseillés non visionnés : Méthode Hybride: \n')
  style = film_MH.head(8).style.hide_index()
  st.write(style.to_html(), unsafe_allow_html=True)

  st.write('liste des films notés : \n')
  style = film_N.style.hide_index()
  st.write(style.to_html(), unsafe_allow_html=True)

  st.write('\ntop des films conseillés non visionnés : Méthode collaborative: \n')
  style = film_M1.head(5).style.hide_index()
  st.write(style.to_html(), unsafe_allow_html=True)

  st.write('\ntop des films conseillés non visionnés : Méthode Basée sur le contenu: \n')
  style = film_M2.head(5).style.hide_index()
  st.write(style.to_html(), unsafe_allow_html=True)

 

Writing app.py


In [4]:
!ngrok authtoken 26hPvWkWbwApVlINcgl5FrVkv37_2BqnUnBQGt7HaDTkStpG1

Authtoken saved to configuration file: /root/.ngrok2/ngrok.yml


In [5]:
from pyngrok import ngrok

!streamlit run app.py&>/dev/null&
public_url = ngrok.connect(port='8501')
public_url

'http://4a77-34-125-60-112.ngrok.io'

In [6]:
# !ngrok
# !pgrep streamlit

In [7]:
# !kill 1464