In [1]:
import pandas as pd
from utils import get_path

from plotly import express as px, io as pio
pd.options.plotting.backend = 'plotly'
pio.renderers.default = 'plotly_mimetype+notebook_connected'

# Description

Concepts describing the main topics of a publication (note: automatically derived from the publication text using machine learning).

[Concepts from Dimensions](https://docs.dimensions.ai/dsl/datasource-publications.html#publications-concepts-scores-long-desc)

# Data source

## Load table

In [2]:
path_cncpt = get_path('INOVA_COVID', 'concepts.csv')
df_cncpt = pd.read_csv(path_cncpt)
df_cncpt

Unnamed: 0,ano,tags,concept,relevance,count
0,2020,"North,National,World",prior treatment experience,0.571,1
1,2020,"North,National,World",preclinical evaluation,3.787,8
2,2020,"North,National,World",RNA-dependent RNA polymerase,36.987,79
3,2020,"North,National,World",cytotoxicity,31.684,85
4,2020,"North,National,World",GAD scores,0.331,1
...,...,...,...,...,...
3783043,2021,"South_North,Shared_L,Inter_Blocs,International...",Data Analytics Architecture,0.721,1
3783044,2021,"South_North,Shared_L,Inter_Blocs,International...",multimedia systems,0.663,1
3783045,2021,"South_North,Shared_L,Inter_Blocs,International...",traffic videos,0.658,1
3783046,2021,"South_North,Shared_L,Inter_Blocs,International...",study tips,0.003,1


## Tags

In [3]:
df_cncpt['tags'].value_counts()

North,National,World                                    1484578
South,National,World                                     964534
Norths,Intra_Bloc,International,World                    468983
South_L,Bloc_L,Inter_Blocs,International,World           222955
South_North,Shared_L,Inter_Blocs,International,World     202430
North_L,Bloc_L,Inter_Blocs,International,World           201483
North_South,Shared_L,Inter_Blocs,International,World     122037
Souths,Intra_Bloc,International,World                    116048
Name: tags, dtype: int64

# Concepts per tag

In [4]:
def select_df(df, tag, anos=[2020, 2021], col_value=None, top=15):
  anos = [anos] if type(anos) != list else anos
  df_select = df[(
      df_cncpt['ano'].isin(anos)
    ) & (
      df_cncpt['tags'].str.contains(tag)
  )].copy()
  df_select['tags'] = tag
  df_select = df_select.drop(columns=['ano'])
  df_select = df_select.groupby(['tags', 'concept'], as_index=False).sum()
  if col_value not in {'relevance', 'count'}:
    values = df_select['relevance'] * df_select['count']
  else:
    values = df_select[col_value]
  df_select['values'] = values
  df_select = df_select.sort_values(
    by='values', ascending=False
  ).reset_index(drop=True)
  return df_select[:top]

## World

### 2020

In [5]:
df_select = select_df(df_cncpt, 'World', 2020)
fig = px.pie(df_select, values='values', names='concept')
fig.show()

### 2021

In [6]:
df_select = select_df(df_cncpt, 'World', 2021)
fig = px.pie(df_select, values='values', names='concept')
fig.show()

### 2020 & 2021

In [7]:
df_select = select_df(df_cncpt, 'World')
fig = px.pie(df_select, values='values', names='concept')
fig.show()

## National

### 2020

In [8]:
df_select = select_df(df_cncpt, 'National', 2020)
fig = px.pie(df_select, values='values', names='concept')
fig.show()

### 2021

In [9]:
df_select = select_df(df_cncpt, 'National', 2021)
fig = px.pie(df_select, values='values', names='concept')
fig.show()

### 2020 & 2021

In [10]:
df_select = select_df(df_cncpt, 'National')
fig = px.pie(df_select, values='values', names='concept')
fig.show()

## International

### 2020

In [11]:
df_select = select_df(df_cncpt, 'International', 2020)
fig = px.pie(df_select, values='values', names='concept')
fig.show()

### 2021

In [12]:
df_select = select_df(df_cncpt, 'International', 2021)
fig = px.pie(df_select, values='values', names='concept')
fig.show()

### 2020 & 2021

In [13]:
df_select = select_df(df_cncpt, 'International')
fig = px.pie(df_select, values='values', names='concept')
fig.show()

## Intra_Bloc

### 2020

In [14]:
df_select = select_df(df_cncpt, 'Intra_Bloc', 2020)
fig = px.pie(df_select, values='values', names='concept')
fig.show()

### 2021

In [15]:
df_select = select_df(df_cncpt, 'Intra_Bloc', 2021)
fig = px.pie(df_select, values='values', names='concept')
fig.show()

### 2020 & 2021

In [16]:
df_select = select_df(df_cncpt, 'Intra_Bloc')
fig = px.pie(df_select, values='values', names='concept')
fig.show()

## Inter_Blocs

### 2020

In [17]:
df_select = select_df(df_cncpt, 'Inter_Blocs', 2020)
fig = px.pie(df_select, values='values', names='concept')
fig.show()

### 2021

In [18]:
df_select = select_df(df_cncpt, 'Inter_Blocs', 2021)
fig = px.pie(df_select, values='values', names='concept')
fig.show()

### 2020 & 2021

In [19]:
df_select = select_df(df_cncpt, 'Inter_Blocs')
fig = px.pie(df_select, values='values', names='concept')
fig.show()

## Shared_L

### 2020

In [20]:
df_select = select_df(df_cncpt, 'Shared_L', 2020)
fig = px.pie(df_select, values='values', names='concept')
fig.show()

### 2021

In [21]:
df_select = select_df(df_cncpt, 'Shared_L', 2021)
fig = px.pie(df_select, values='values', names='concept')
fig.show()

### 2020 & 2021

In [22]:
df_select = select_df(df_cncpt, 'Shared_L')
fig = px.pie(df_select, values='values', names='concept')
fig.show()

## Bloc_L

### 2020

In [23]:
df_select = select_df(df_cncpt, 'Bloc_L', 2020)
fig = px.pie(df_select, values='values', names='concept')
fig.show()

### 2021

In [24]:
df_select = select_df(df_cncpt, 'Bloc_L', 2021)
fig = px.pie(df_select, values='values', names='concept')
fig.show()

### 2020 & 2021

In [25]:
df_select = select_df(df_cncpt, 'Bloc_L')
fig = px.pie(df_select, values='values', names='concept')
fig.show()

## North_L

### 2020

In [26]:
df_select = select_df(df_cncpt, 'North_L', 2020)
fig = px.pie(df_select, values='values', names='concept')
fig.show()

### 2021

In [27]:
df_select = select_df(df_cncpt, 'North_L', 2021)
fig = px.pie(df_select, values='values', names='concept')
fig.show()

### 2020 & 2021

In [28]:
df_select = select_df(df_cncpt, 'North_L')
fig = px.pie(df_select, values='values', names='concept')
fig.show()

## South_L

### 2020

In [29]:
df_select = select_df(df_cncpt, 'South_L', 2020)
fig = px.pie(df_select, values='values', names='concept')
fig.show()

### 2021

In [30]:
df_select = select_df(df_cncpt, 'South_L', 2021)
fig = px.pie(df_select, values='values', names='concept')
fig.show()

### 2020 & 2021

In [31]:
df_select = select_df(df_cncpt, 'South_L')
fig = px.pie(df_select, values='values', names='concept')
fig.show()

## North_South

### 2020

In [32]:
df_select = select_df(df_cncpt, 'North_South', 2020)
fig = px.pie(df_select, values='values', names='concept')
fig.show()

### 2021

In [33]:
df_select = select_df(df_cncpt, 'North_South', 2021)
fig = px.pie(df_select, values='values', names='concept')
fig.show()

### 2020 & 2021

In [34]:
df_select = select_df(df_cncpt, 'North_South')
fig = px.pie(df_select, values='values', names='concept')
fig.show()

## South_North

### 2020

In [35]:
df_select = select_df(df_cncpt, 'South_North', 2020)
fig = px.pie(df_select, values='values', names='concept')
fig.show()

### 2021

In [36]:
df_select = select_df(df_cncpt, 'South_North', 2021)
fig = px.pie(df_select, values='values', names='concept')
fig.show()

### 2020 & 2021

In [37]:
df_select = select_df(df_cncpt, 'South_North')
fig = px.pie(df_select, values='values', names='concept')
fig.show()

## Norths

### 2020

In [38]:
df_select = select_df(df_cncpt, 'Norths', 2020)
fig = px.pie(df_select, values='values', names='concept')
fig.show()

### 2021

In [39]:
df_select = select_df(df_cncpt, 'Norths', 2021)
fig = px.pie(df_select, values='values', names='concept')
fig.show()

### 2020 & 2021

In [40]:
df_select = select_df(df_cncpt, 'Norths')
fig = px.pie(df_select, values='values', names='concept')
fig.show()

## Souths

### 2020

In [41]:
df_select = select_df(df_cncpt, 'Souths', 2020)
fig = px.pie(df_select, values='values', names='concept')
fig.show()

### 2021

In [42]:
df_select = select_df(df_cncpt, 'Souths', 2021)
fig = px.pie(df_select, values='values', names='concept')
fig.show()

### 2020 & 2021

In [43]:
df_select = select_df(df_cncpt, 'Souths')
fig = px.pie(df_select, values='values', names='concept')
fig.show()

## North

### 2020

In [44]:
df_select = select_df(df_cncpt, 'North,', 2020)
fig = px.pie(df_select, values='values', names='concept')
fig.show()

### 2021

In [45]:
df_select = select_df(df_cncpt, 'North,', 2021)
fig = px.pie(df_select, values='values', names='concept')
fig.show()

### 2020 & 2021

In [46]:
df_select = select_df(df_cncpt, 'North,')
fig = px.pie(df_select, values='values', names='concept')
fig.show()

## South

### 2020

In [47]:
df_select = select_df(df_cncpt, 'South,', 2020)
fig = px.pie(df_select, values='values', names='concept')
fig.show()

### 2021

In [48]:
df_select = select_df(df_cncpt, 'South,', 2021)
fig = px.pie(df_select, values='values', names='concept')
fig.show()

### 2020 & 2021

In [49]:
df_select = select_df(df_cncpt, 'South,')
fig = px.pie(df_select, values='values', names='concept')
fig.show()