# Prerequisite

- Create an account on Ngrok (https://ngrok.com/). ngrok provides a real-time web UI where you can introspect all HTTP traffic running over your tunnels.

- Get Authentication Token (https://dashboard.ngrok.com/get-started/your-authtoken)

- The Auth token is in this format: ./ngrok authtoken xxxxxxxxxxxxxxxxxxxxxxxxxx

- Use the auth token to connect your Ngrok account.

# Install packages

Install streamlit and pyngrok

In [None]:
!pip install streamlit

In [None]:
!pip install pyngrok

# 1. Libraries

In [26]:
# Python ≥3.5 is required
import sys
assert sys.version_info >= (3, 5)

# Scikit-Learn ≥0.20 is required
import sklearn
assert sklearn.__version__ >= '0.20'

try:
    # %tensorflow_version only exists in Colab.
    %tensorflow_version 2.x
    IS_COLAB = True
except Exception:
    IS_COLAB = False

# TensorFlow ≥2.0 is required
import tensorflow as tf
from tensorflow import keras
assert tf.__version__ >= '2.0'

if not tf.config.list_physical_devices('GPU'):
    print('No GPU was detected. LSTMs and CNNs can be very slow without a GPU.')
    if IS_COLAB:
        print('Go to Runtime > Change runtime and select a GPU hardware accelerator.')

# Common imports
import os
import shutil
import itertools
import glob
import numpy as np
import matplotlib.image as mpimg
import pandas as pd

import string
import pickle

from tqdm import tqdm
from time import time
from PIL import Image


# to make this notebook's output stable across runs
np.random.seed(42)
tf.random.set_seed(42)

# To plot pretty figures
%matplotlib inline
import matplotlib as mpl
import matplotlib.pyplot as plt
# mpl.rc('axes', labelsize=14)
# mpl.rc('xtick', labelsize=12)
# mpl.rc('ytick', labelsize=12)

No GPU was detected. LSTMs and CNNs can be very slow without a GPU.
Go to Runtime > Change runtime and select a GPU hardware accelerator.


In [27]:
# streamlit
import streamlit as st 
import streamlit.components.v1 as stc

# ngrok
from pyngrok import ngrok

In [28]:
# For text mining
from sklearn.feature_extraction.text import CountVectorizer,TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

# 2. Connect to Google Drive

In [29]:
# Check if NVIDIA GPU is enabled
# !nvidia-smi

In [30]:
# from google.colab import drive
# drive.mount('/content/gdrive')
# !ls

In [31]:
# %cd /content/gdrive/My Drive/
# !ls

In [32]:
! pwd

/content


## 3. Paths to Data Directories

In [33]:
DATA_PATH = '/content/sample_data/'

# 4. Read data

In [34]:
data_file = os.path.join(DATA_PATH, "courses_cleaned_data.csv")
df = pd.read_csv(data_file)
df.head(4)

Unnamed: 0,course_id,course_title,url,is_paid,price,num_subscribers,num_reviews,num_lectures,level,content_duration,duration,duration_unit,published_timestamp,subject,year,cleaned_title
0,1070968,Ultimate Investment Banking Course,https://www.udemy.com/ultimate-investment-bank...,True,200,2147,23,51,All Levels,1.5 hours,90,mins,2017-01-18T20:58:58Z,Business Finance,2017,ultimate investment banking course
1,1113822,Complete GST Course & Certification - Grow You...,https://www.udemy.com/goods-and-services-tax/,True,75,2792,923,274,All Levels,39 hours,2340,mins,2017-03-09T16:34:20Z,Business Finance,2017,complete gst course certification grow ca p...
2,1006314,Financial Modeling for Business Analysts and C...,https://www.udemy.com/financial-modeling-for-b...,True,45,2174,74,51,Intermediate Level,2.5 hours,150,mins,2016-12-19T19:26:30Z,Business Finance,2016,financial modeling business analysts consult...
3,1210588,Beginner to Pro - Financial Analysis in Excel ...,https://www.udemy.com/complete-excel-finance-c...,True,95,2451,11,36,All Levels,3 hours,180,mins,2017-05-30T20:07:24Z,Business Finance,2017,beginner pro financial analysis excel


In [35]:
df.columns

Index(['course_id', 'course_title', 'url', 'is_paid', 'price',
       'num_subscribers', 'num_reviews', 'num_lectures', 'level',
       'content_duration', 'duration', 'duration_unit', 'published_timestamp',
       'subject', 'year', 'cleaned_title'],
      dtype='object')

# 5. Write code to file

 - Write code to a file and save it using %%writefile.
 - The %%writefile writes the current cell to a filename \<filename.py\>.

In [116]:
%%writefile app.py
import os
import pandas as pd 
import numpy as np

import streamlit as st 
import streamlit.components.v1 as stc

from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics.pairwise import cosine_similarity

st.set_page_config(
    page_title="",
    page_icon="🧊",
    layout="wide",
    initial_sidebar_state="expanded")


def load_data(data_path):
  """
  Load data from csv file into data frame
  """
  return pd.read_csv(data_path)


def similarity_cosine_matrix(X):
  """
  Vectorize the text data
  And compute the Cosine Similarity Matrix
  """
  count_vec = CountVectorizer()
  count_vec_matrix = count_vec.fit_transform(X)

  # Calc. Cosine Similarity Matrix
  cosine_sim_matrix = cosine_similarity(count_vec_matrix)
  return cosine_sim_matrix




@st.cache  # This function will be cached
def recommend_courses(df, title, cosine_similarity_scores, n_recommendations = 10, verbose = False):
  """
  Find the recommended courses
  """
  # A Series for finding Course_Title <-> Index
  title_indx_df = pd.Series(df.index, index=df['course_title']).drop_duplicates()

  # Index of the course_title
  id = title_indx_df[title]

  # Details of the course in consideration
  if verbose :
    print("Details of the course (id = ", id, "):")
    print(df.iloc[id])
    print()

  # Cosine_similarity_scores
  cosine_similarity_scores = np.round(cosine_similarity_scores[id],3)
  scores = list(enumerate(cosine_similarity_scores))

  # Sort the similarity Scores
  sorted_scores = sorted(scores, key=lambda x:x[1], reverse=True)

  # Remove the first element, because it is being compared to itself, -> score = 1
  sorted_scores.pop(0)

  # Find n_recommendations
  rec_course_idx = [x[0] for x in sorted_scores]
  rec_course_scores = [x[1] for x in sorted_scores]
  recommended_courses = df.iloc[rec_course_idx]
  recommended_courses["cosine_similarity_scores"] = rec_course_scores

  return recommended_courses.head(n_recommendations)




@st.cache  # This function will be cached
def search_term_if_not_found(term, df):
	result_df = df[df['course_title'].str.contains(term)]
	return result_df





widget = """
<div style="width:100%;height:50%;margin:1px;padding:6px;position:relative;border-radius:2px;border-bottom-right-radius: 6px;
box-shadow:0 0 5px 5px #ccc; background-color: #F1F5F5;
  border-left: 0px solid #6c6c6c;">

<h4>{}</h4>
<p style="color:black;"><span style="color:black;">📈Similarity Score: </span>{}</p>
<p style="color:black;"><span style="color:black;">🔗</span><a href="{}" target="_blank">Link</a></p>
<p style="color:black;"><span style="color:black;">💲Price: $</span>{}</p>
<p style="color:black;"><span style="color:black;">🧑‍🎓👨🏽‍🎓 Subscribers: </span>{}</p>

</div>
"""




def main():

  st.title("Hoola: Course Recommender")

  menu = ["Home", "Recommend Courses", "About"]
  
  choice = st.sidebar.selectbox("Menu", menu)

  df = load_data("/content/sample_data/courses_cleaned_data.csv")

  if choice == "Home":
    st.subheader("Most popular courses")

    # Show courses
    show_cols = ['course_title', 'price', 'num_subscribers', 'num_lectures', 'level', 'content_duration', 'year','subject']
    n_shown = st.selectbox("Number of courses to show:", [5,10,20,30, 50, 100, 200, 500, len(df)])
    # st.write("You selected:", n_shown)
    st.dataframe(df[show_cols].sort_values(by='num_subscribers', ascending=False).head(n_shown))

    
    # Find courses
    st.subheader("Find your course")
    find_cols = ['course_title', 'price', 'num_subscribers',
          'num_lectures', 'level', 'content_duration', 'year']

    cat = st.selectbox("Select suject:", ['Business Finance', 'Graphic Design', 'Musical Instruments', 'Web Development'])
    query = st.text_input("Search:")
    nf_shown = st.selectbox("Number of courses to show:", [5,10,20,30,100])

    if st.button("Submit"):
      found_df = df[find_cols][ (df["subject"] == cat) & (df['course_title'].str.contains(query)) ].sort_values(by='num_subscribers', ascending=False)
      st.dataframe(found_df.head(nf_shown))



  elif choice == "Recommend Courses":
    st.subheader("Course recommendation")

    cosine_similarity_scores = similarity_cosine_matrix(df['cleaned_title'])

    query = st.text_input("Search:")

    n_rec = st.number_input("Number of recommended courses:", 3, 30, 5)
    if st.button("Submit"):
      if query is not None:
        try:
          results = recommend_courses(df, query, cosine_similarity_scores, n_recommendations = n_rec)

          # Show as JSON
          with st.expander("Results as JSON"):
            results_json = results.to_dict('index')
            st.write(results_json)

          for rowid, row in results.iterrows():
            rec_title   = row["course_title"] 
            rec_score   = row["cosine_similarity_scores"]
            rec_url     = row["url"]
            rec_price   = row["price"]
            rec_num_sub = row["num_subscribers"]

            stc.html(widget.format(rec_title, rec_score, rec_url, rec_price, rec_num_sub),height=350)

        except:
          results= "No courses found! Please enter the exact title of the course."
          st.warning(results)
          st.info("Below are some suggestions:")
          sugg_cols = ['course_title', 'price', 'num_lectures', 'level', 'content_duration', 'year','subject']
          result_df = search_term_if_not_found(query, df)
          st.dataframe(result_df[sugg_cols])

  else:
    st.subheader("About")
    st.text("Hoola: Course Recommender with Python, Pandas, Cosine Similarity and Streamlit")



if __name__ == '__main__':
  main()

Overwriting app.py


In [37]:
%%writefile test_app.py
import streamlit as st 
import streamlit.components.v1 as stc

def main():
  st.title("Title")
  st.subheader("Run streamlit from colab")


  menu = ["Home", "About"]
  choice = st.sidebar.selectbox("Menu",menu)
  
  if choice == 'Home':
    st.subheader("Streamlit From Colab")
  else:
    st.subheader("About")
    st.text("Streamlit & Colab")



if __name__ == '__main__':
  main()

Overwriting test_app.py


In [38]:
# Check if the test_app.py exists
!pwd
!ls

/content
app.py	sample_data  test_app.py


# Run the app on ngrok

Connect to ngrok

In [39]:
!ngrok authtoken xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
!ngrok authtoken 1xFQ6ykKg6fBVPDw4ZyCiMYZylz_BcNdAN6UyoxkR4HaC4m5

Authtoken saved to configuration file: /root/.ngrok2/ngrok.yml


In [40]:
# Run the processes in the background.
# !streamlit run test_app.py &>/dev/null&
# !streamlit run --server.port 80 test_app.py &>/dev/null&
!streamlit run --server.port 80 app.py &>/dev/null&

In [41]:
! pgrep streamlit

235
334


Use pyngrok to create a tunnel and passing in the port from streamlit (ie 80).

In [42]:
# Setup a tunnel using streamlit port 80
pub_url = ngrok.connect(port='80')
pub_url

2021-08-26 15:06:21.131 INFO    pyngrok.ngrok: Opening tunnel named: http-80-a4cb5cae-56a3-4d15-ae39-bf236870c133
2021-08-26 15:06:21.136 INFO    pyngrok.process.ngrok: t=2021-08-26T15:06:21+0000 lvl=info msg=start pg=/api/tunnels id=42142bec922d4709
2021-08-26 15:06:21.286 INFO    pyngrok.process.ngrok: t=2021-08-26T15:06:21+0000 lvl=info msg="started tunnel" obj=tunnels name="http-80-a4cb5cae-56a3-4d15-ae39-bf236870c133 (http)" addr=http://localhost:80 url=http://d886-34-125-124-115.ngrok.io
2021-08-26 15:06:21.289 INFO    pyngrok.process.ngrok: t=2021-08-26T15:06:21+0000 lvl=info msg="started tunnel" obj=tunnels name=http-80-a4cb5cae-56a3-4d15-ae39-bf236870c133 addr=http://localhost:80 url=https://d886-34-125-124-115.ngrok.io
2021-08-26 15:06:21.291 INFO    pyngrok.process.ngrok: t=2021-08-26T15:06:21+0000 lvl=info msg=end pg=/api/tunnels id=42142bec922d4709 status=201 dur=150.242111ms
2021-08-26 15:06:21.297 INFO    pyngrok.process.ngrok: t=2021-08-26T15:06:21+0000 lvl=info msg=sta

<NgrokTunnel: "http://d886-34-125-124-115.ngrok.io" -> "http://localhost:80">

A public URL (https://*******.ngrok.io ) will be created, and your app will be running on it.

# Terminate the App

In [117]:
# !ps -eaf | grep streamlit
!pgrep streamlit

235
334


In [118]:
# Kill the processes
# ! kill <id>

In [119]:
# Disconnect ngrok
ngrok.kill()

2021-08-26 17:14:01.095 INFO    pyngrok.process: Killing ngrok process: 239
