# **Dynamic Topic Modeling Viewer**

This notebook aims to visualize the evolution of the top seven topics generated from 939,452 scientific articles corresponding to the period 1990 - 2019. The graph shows how the probabilities for the most relevant words (top 100) generated by the topic change over time.

In [None]:
#Install libraries
!pip install matplotlib pandas numpy ipywidgets

### **Import libraries and load data**

In [3]:
%matplotlib inline
import pandas as pd
from matplotlib import pyplot
import numpy as np
import ipywidgets as widgets
from ipywidgets import interact, interact_manual

word_topics = pd.read_csv('word_distribution_topics_top100.csv', index_col = 0)

### **Functions**

In [4]:
#Returns the words associated with a selected topic
def filterWordsTopic(df, topic):
  data = df['word'].loc[(df['topic'] == topic)]
  return np.unique(data)

#Generates the graph of the selected words and topics
def graph_topic_distribution(df, topic_id = 0, words = [''], start_year = 1990, end_year = 2019):
  fig = pyplot.figure(figsize=(20,8))
  ax = fig.add_subplot(1, 1, 1)

  xmajor_ticks = range(start_year, end_year + 1, 1)
  ax.set_xticks(xmajor_ticks)
  pyplot.grid(linestyle='--', color='silver')


  for w in words:
    data = df.loc[(df['topic'] == topic_id) & (df['ano'] <= end_year) & (df['ano'] >= start_year) & (df['word'] == w)]

    #>>>Avoid joining discontinuous points
    indexes = xmajor_ticks
    nlp = []
    for i in indexes:
      if i not in np.array(data['ano']):
        nlp.append([topic_id, i, np.nan, w])

    alt = pd.DataFrame(nlp, columns=['topic', 'ano', 'prob', 'word'])
    data = pd.concat([data, alt])
    data.sort_values(by = ['ano'], inplace = True)
    data.set_index('ano', inplace = True)
    ###############<<<

    #Generate a curve for each selected word
    pyplot.plot(data['prob'], marker='o', linestyle='solid')

  pyplot.ylabel('Probability')
  pyplot.xlabel('Years')
  pyplot.legend(words, loc='upper left')
  pyplot.show()

#Modify the graph parameters based on the selected options
def set_params_graph(period, topic, words):
  graph_topic_distribution(df = word_topics, topic_id=topic, words=words, start_year=period[0], end_year=period[1])

### **Generate widgets**

1. Run cell to generate widgets
2. Select the date range
3. Select topics
4. Select word(s)

Hold down the shift or control key to select more than one word at a time

Every time you change the selected topic, the set of words associated with it is loaded

If a word disappears for an interval of time it does not necessarily mean that it was not used, it could be that it was not used enough to appear in the top 100


In [5]:
####**********************Period slider control*************************###

period = [i for i in range(1990, 2020)]
selectionPeriod = widgets.SelectionRangeSlider(
    options=period,
    index=(0,29),
    description='Period (Years)',
    disabled=False,
    layout={'width': '500px'},
    style = {'description_width': 'initial'}
)

####**********************Topic selector control*************************###
topics = [('Security, access control, encryption scheme, cryptography, cloud computing', 0),
          ('Computational complexity, optimization, graph partition, numerical analysis, combinatorics', 1),
          ('Data-exchange, semantics', 2),
          ('Computer vision, object detection, recognition, video tracking', 3),
          ('Interactive computing, human-computer interaction, human-centered design, business management systems', 4),
          ('Artificial intelligence, machine learning, clustering', 5),
          ('Energy efficiency, power systems, network communication', 6)]

selectionTopic = widgets.Dropdown(
    options=topics,
    value=0,
    description='Topic:',
    disabled=False
)

#Allows the word set of the word selector control to change when the topic changes
def on_change(change):
  if change['type'] == 'change' and change['name'] == 'value':
    selectionWords.options = set_words[change['new']]

selectionTopic.observe(on_change)

####**********************Word selector control*************************###
#Set of words associated with each topic
set_words = [filterWordsTopic(word_topics, 0),
             filterWordsTopic(word_topics, 1),
             filterWordsTopic(word_topics, 2),
             filterWordsTopic(word_topics, 3),
             filterWordsTopic(word_topics, 4),
             filterWordsTopic(word_topics, 5),
             filterWordsTopic(word_topics, 6)]

selectionWords = widgets.SelectMultiple(
    options=set_words[0],
    rows=10,
    description='Words',
    disabled=False
)


####**********************Generate controls*************************###
widgets.interact(
    set_params_graph,
    period = selectionPeriod,
    topic = selectionTopic,
    words = selectionWords
)

interactive(children=(SelectionRangeSlider(description='Period (Years)', index=(0, 29), layout=Layout(width='5…

<function __main__.set_params_graph(period, topic, words)>