<a href="https://colab.research.google.com/github/karaage0703/stable-diffusion-colab-tools/blob/main/005_prompt_analytics.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Prompt Analytics


Reference:
- https://github.com/takapy0210/nlplot
- https://speakerdeck.com/takapy/zi-ran-yan-yu-ke-shi-hua-raiburari-nlplot-nogoshao-jie

# Setup
Install nlplot

In [None]:
!pip install -qq nlplot

Import library

In [None]:
import nlplot
import pandas as pd
import plotly
from plotly.subplots import make_subplots
from plotly.offline import iplot
import matplotlib.pyplot as plt

Download csv file

In [None]:
!wget https://raw.githubusercontent.com/karaage0703/stable-diffusion-colab-tools/main/data/prompt_list.csv

Attention: If you want to use your original csv.

Execute following command and upload your own csv file.

In [None]:
# from google.colab import files
# uploaded = files.upload()

Read csv fil

In [None]:
df = pd.read_csv('prompt_list.csv')
df.head()

## Analytics

### Analytics setup

In [None]:
# target_col as a list type or a string separated by a space.
npt = nlplot.NLPlot(df, target_col='prompt_list')

In [None]:
# Stopword calculations can be performed.
stopwords = npt.get_stopword(top_n=8, min_freq=0)
stopwords

### N-gram bar chart

uni-gram

In [None]:
fig_unigram = npt.bar_ngram(
    title='uni-gram',
    xaxis_label='word_count',
    yaxis_label='word',
    ngram=1,
    top_n=20,
    width=800,
    height=1100,
    color=None,
    horizon=True,
    stopwords=stopwords,
    verbose=False,
    save=False,
)
fig_unigram.show()

bigram

In [None]:
fig_bigram = npt.bar_ngram(
    title='bi-gram',
    xaxis_label='word_count',
    yaxis_label='word',
    ngram=2,
    top_n=20,
    width=800,
    height=1100,
    color=None,
    horizon=True,
    stopwords=stopwords,
    verbose=False,
    save=False,
)
fig_bigram.show()

### N-gram tree Map

In [None]:
fig_treemap = npt.treemap(
    title='Tree map',
    ngram=1,
    top_n=50,
    width=1300,
    height=600,
    stopwords=stopwords,
    verbose=False,
    save=False
)
fig_treemap.show()

### Histogram of the word count

In [None]:
fig_histgram = npt.word_distribution(
    title='word distribution',
    xaxis_label='count',
    yaxis_label='',
    width=1000,
    height=500,
    color=None,
    template='plotly',
    bins=None,
    save=False,
)
fig_histgram.show()

### wordcloud

In [None]:
fig_wc = npt.wordcloud(
    width=1000,
    height=600,
    max_words=100,
    max_font_size=100,
    colormap='tab20_r',
    stopwords=stopwords,
    mask_file=None,
    save=False
)
plt.figure(figsize=(15, 25))
plt.imshow(fig_wc, interpolation="bilinear")
plt.axis("off")
plt.show()

### co-occurrence networks

In [None]:
npt.build_graph(stopwords=stopwords, min_edge_frequency=1)
# The number of nodes and edges to which this output is plotted.
# If this number is too large, plotting will take a long time, so adjust the [min_edge_frequency] well.
# >> node_size:70, edge_size:166
fig_co_network = npt.co_network(
    title='Co-occurrence network',
    sizing=100,
    node_size='adjacency_frequency',
    color_palette='hls',
    width=1100,
    height=700,
    save=False
)
iplot(fig_co_network)

### sunburst chart

In [None]:
fig_sunburst = npt.sunburst(
    title='sunburst chart',
    colorscale=True,
    color_continuous_scale='Oryel',
    width=1000,
    height=800,
    save=False
)
fig_sunburst.show()

In [None]:
# other
# The original data frame of the co-occurrence network can also be accessed
display(
    npt.node_df.head(), npt.node_df.shape,
    npt.edge_df.head(), npt.edge_df.shape
)