### Running Streamlit from Colab
+ streamlit
+ pyngrok

In [1]:
%%capture
!pip install streamlit
!pip install pyngrok==4.1.1

In [2]:
from google.colab import drive
drive.mount('/content/drive')
%cd /content/drive/MyDrive/

Mounted at /content/drive
/content/drive/MyDrive


In [3]:
%%writefile app.py
import streamlit as st
import pandas as pd
import numpy as np

# Lecture Movies metadata
links = pd.read_csv('/content/drive/MyDrive/Data cinéma/links_small.csv')
def format_imdb(x):
    return ('tt{:07}'.format(x))
links['imdb_id'] = links['imdbId'].transform(format_imdb)

movies_metadata = pd.read_csv('/content/drive/MyDrive/Data cinéma/movies_metadata.csv',low_memory=False,usecols=['title','imdb_id','release_date','popularity','vote_average','vote_count'])
movies_metadata = movies_metadata.drop_duplicates()
m  = movies_metadata['vote_count'].quantile(0.9)
movies_metadata_small = movies_metadata.copy().loc[movies_metadata['vote_count'] >= m]
movies_metadata_small['popularity'] = movies_metadata_small['popularity'].astype(float)

# Lecture ratings small
ratings_small = pd.read_csv('/content/drive/MyDrive/Data cinéma/ratings_small.csv',low_memory=False)
moyenne = pd.DataFrame(ratings_small.groupby(['movieId'])["rating"].mean())
nb_note = pd.DataFrame(ratings_small.groupby(['movieId'])["rating"].count())
moyenne.columns = ['moyenne']
nb_note.columns = ['nb_note']
info_movies=moyenne.merge(nb_note,on='movieId')
m2  = info_movies['nb_note'].quantile(0.9)
info_movies_filtre = info_movies.copy().loc[info_movies['nb_note'] >= m2]
film_filtre = pd.DataFrame(info_movies_filtre.index)
movies_metadata_filtre = pd.DataFrame(movies_metadata_small)
movies_metadata_filtre = movies_metadata_filtre.merge(links[['movieId','imdb_id']],on='imdb_id')
movies_metadata_filtre=film_filtre.merge(movies_metadata_filtre,on='movieId')

film_filtre = pd.DataFrame(movies_metadata_filtre['movieId'])
ratings_filtre=film_filtre.merge(ratings_small,on='movieId')[['movieId','userId','rating']]
ratings_filtre = ratings_filtre.sort_values(['userId','movieId'])

from sklearn.impute import KNNImputer

rating_pivot = ratings_filtre.pivot(index='userId', columns='movieId', values='rating')
imputer = KNNImputer(n_neighbors=30, weights="uniform")
rating_pivot_imp = pd.DataFrame(imputer.fit_transform(rating_pivot))
rating_pivot_imp.columns = rating_pivot.columns
rating_pivot_imp['userId'] = rating_pivot.index
rating_imp = rating_pivot_imp.melt(id_vars=['userId'], var_name='movieId', value_name='rating_imp')


info_film = movies_metadata_filtre.sort_values('popularity', ascending=False)[['movieId','title','release_date','vote_count', 'vote_average', 'popularity']]
selection = info_film[['title']]

st.title('Algo de recommandation film K plus proche voisin')

film = st.multiselect('Choix des films que vous voulez noter', selection)

note = ['']*len(film)
movieId = ['']*len(film)
for i in range(len(film)):
  movieId[i] = info_film.copy().loc[info_film['title'] == film[i]]['movieId']
  note[i] = st.slider("Choisir la note du film : " + str(film[i]),1,5)

movieId = list(map(int, movieId))
note_ind = pd.DataFrame(np.transpose(np.array([movieId,note])), columns = ['movieId', 'rating_imp'])
note_ind[['userId']] = 0
note_ind = note_ind[['movieId','rating_imp','userId']]

if(st.button("Lancer l'algo de recommandation : ")):
  concat_rating = pd.concat([note_ind, rating_imp])
  concat_rating_pivot = concat_rating.pivot(index='userId', columns='movieId', values='rating_imp')
  imputer = KNNImputer(n_neighbors=20, weights="uniform")
  concat_rating_pivot_imp = pd.DataFrame(imputer.fit_transform(concat_rating_pivot))
  concat_rating_pivot_imp.columns = concat_rating_pivot.columns
  concat_rating_pivot_imp['userId'] = concat_rating_pivot.index
  concat_rating_imp = concat_rating_pivot_imp.melt(id_vars=['userId'], var_name='movieId', value_name='rating_imp_2')
  concat_rating_imp = concat_rating_imp.copy()[concat_rating_imp['userId'] == 0]
  concat_rating_imp_fin=concat_rating.merge(concat_rating_imp,on=('userId','movieId'),how='outer')
  concat_rating_imp_fin=concat_rating_imp_fin.copy()[concat_rating_imp_fin['rating_imp'].isna()][['movieId','userId','rating_imp_2']]

  st.write('liste des films notés : \n')
  st.write(note_ind.merge(info_film,on='movieId').sort_values("rating_imp",ascending=False)[['title','release_date','rating_imp','vote_count', 'vote_average', 'popularity']])

  st.write('\ntop des films conseillés non visionnés : \n')
  st.write(concat_rating_imp_fin.merge(info_film,on='movieId').sort_values("rating_imp_2",ascending=False).head(8)[['title','release_date','rating_imp_2','vote_count', 'vote_average', 'popularity']])

Writing app.py


In [4]:
!ngrok authtoken 2692uSvWIPHIb3mFMR5FTGkQyqC_65cGzN5yvCaYHmyAnUNA3

Authtoken saved to configuration file: /root/.ngrok2/ngrok.yml


In [5]:
from pyngrok import ngrok

!streamlit run app.py&>/dev/null&
public_url = ngrok.connect(port='8501')
public_url

'http://e76d-34-86-125-197.ngrok.io'

In [6]:
# !kill 904
# !ngrok
# !pgrep streamlit