# PyCaret 2 NLP Example
This notebook is created using PyCaret 2.0. Last updated : 28-07-2020

In [None]:
# check version
from pycaret.utils import version
version()

# 1. Loading Dataset

In [3]:
from pycaret.datasets import get_data
data = get_data('kiva')

Unnamed: 0,country,en,gender,loan_amount,nonpayment,sector,status
0,Dominican Republic,"""Banco Esperanza"" is a group of 10 women looki...",F,1225,partner,Retail,0
1,Dominican Republic,"""Caminemos Hacia Adelante"" or ""Walking Forward...",F,1975,lender,Clothing,0
2,Dominican Republic,"""Creciendo Por La Union"" is a group of 10 peop...",F,2175,partner,Clothing,0
3,Dominican Republic,"""Cristo Vive"" (""Christ lives"" is a group of 10...",F,1425,partner,Clothing,0
4,Dominican Republic,"""Cristo Vive"" is a large group of 35 people, 2...",F,4025,partner,Food,0


# 2. Initialize Setup

In [4]:
from pycaret.nlp import *
nlp1 = setup(data, target = 'en', session_id=123, log_experiment=True, log_plots = True, experiment_name='kiva1')

Description,Value
session_id,123
Documents,6818
Vocab Size,10671
Custom Stopwords,False


# 3. Create Model

In [5]:
models()

Unnamed: 0_level_0,Name,Reference
ID,Unnamed: 1_level_1,Unnamed: 2_level_1
lda,Latent Dirichlet Allocation,gensim/models/ldamodel
lsi,Latent Semantic Indexing,gensim/models/lsimodel
hdp,Hierarchical Dirichlet Process,gensim/models/hdpmodel
rp,Random Projections,gensim/models/rpmodel
nmf,Non-Negative Matrix Factorization,sklearn.decomposition.NMF


In [6]:
lda = create_model('lda')

In [7]:
nmf = create_model('nmf', num_topics = 6)

# 4. Assign Labels

In [8]:
lda_results = assign_model(lda)
lda_results.head()

Unnamed: 0,country,en,gender,loan_amount,nonpayment,sector,status,Topic_0,Topic_1,Topic_2,Topic_3,Dominant_Topic,Perc_Dominant_Topic
0,Dominican Republic,group woman look receive small loan take small...,F,1225,partner,Retail,0,0.443424,0.17063,0.001865,0.384082,Topic 0,0.44
1,Dominican Republic,walk forward group entrepreneur seek second lo...,F,1975,lender,Clothing,0,0.335674,0.416064,0.001612,0.24665,Topic 1,0.42
2,Dominican Republic,group people hope start business group look re...,F,2175,partner,Clothing,0,0.568597,0.153035,0.002369,0.275999,Topic 0,0.57
3,Dominican Republic,live group woman look receive first loan young...,F,1425,partner,Clothing,0,0.285612,0.225983,0.001858,0.486547,Topic 3,0.49
4,Dominican Republic,vive large group people hope take loan many se...,F,4025,partner,Food,0,0.383666,0.345023,0.001839,0.269472,Topic 0,0.38


# 5. Analyze Model

In [9]:
plot_model(lda)

In [10]:
plot_model(lda, plot = 'bigram')

In [11]:
plot_model(lda, plot = 'tsne')

In [12]:
evaluate_model(lda)

interactive(children=(ToggleButtons(description='Plot Type:', icons=('',), options=(('Frequency Plot', 'freque…

# 6. MLFlow UI

In [14]:
get_logs()

Unnamed: 0,run_id,experiment_id,status,artifact_uri,start_time,end_time,metrics.TT,params.beta_loss,params.shuffle,params.alpha,...,tags.USI,tags.Run ID,tags.mlflow.runName,tags.mlflow.source.type,tags.Run Time,tags.Source,tags.URI,tags.Size KB,tags.mlflow.user,tags.mlflow.source.name
0,2a5b8e7686ee4cd8aeb8a13eb377b9c9,8,FINISHED,file:///C:/Users/moezs/pycaret-demo-td/mlruns/...,2020-07-29 13:04:08.223000+00:00,2020-07-29 13:04:08.403000+00:00,0.98,frobenius,False,0.0,...,f26d,2a5b8e7686ee4cd8aeb8a13eb377b9c9,Non-Negative Matrix Factorization,LOCAL,1.0,create_model,e8df776e,240.69,moezs,C:\Users\moezs\Anaconda3\envs\pycaret-nightly-...
1,da7d3fc7b69b420a81b3347f34c4e18e,8,FINISHED,file:///C:/Users/moezs/pycaret-demo-td/mlruns/...,2020-07-29 13:04:06.700000+00:00,2020-07-29 13:04:07.179000+00:00,30.77,,,[1.5272782 2.6973162 0.12972674 1.8319774 ],...,f26d,da7d3fc7b69b420a81b3347f34c4e18e,Latent Dirichlet Allocation,LOCAL,30.79,create_model,69b3642c,764.39,moezs,C:\Users\moezs\Anaconda3\envs\pycaret-nightly-...
2,62e8f0b83bcb4241a696071d6081f353,8,FINISHED,file:///C:/Users/moezs/pycaret-demo-td/mlruns/...,2020-07-29 13:03:23.819000+00:00,2020-07-29 13:03:35.842000+00:00,,,,,...,f26d,62e8f0b83bcb4241a696071d6081f353,Session Initialized f26d,LOCAL,78.52,setup,49d40e54,,moezs,C:\Users\moezs\Anaconda3\envs\pycaret-nightly-...


In [13]:
!mlflow ui

Running the mlflow server failed. Please see the logs above for details.


# End
Thank you. For more information / tutorials on PyCaret, please visit https://www.pycaret.org