# Diversity macro analysis

We track the evolution of diversity with various parametres and explore its
potential drivers

## Preamble

In [None]:
%run ../notebook_preamble.ipy
%config Completer.use_jedi = False


pd.options.mode.chained_assignment = None 

In [None]:
# Uncomment and run if you want to save figures
# driv = altair_visualisation_setup()

In [None]:
import altair as alt
import random
import logging
import networkx as nx
import yaml

from narrowing_ai_research.utils.list_utils import *
from narrowing_ai_research.utils.altair_utils import *
from narrowing_ai_research.transformers.networks import *
from narrowing_ai_research.paper.s4_diversity_macro import *
from narrowing_ai_research.utils.read_utils import *
from narrowing_ai_research.paper.s3_org_eda import create_paper_dates_dict, paper_orgs_processing
from narrowing_ai_research.transformers.diversity import Diversity

alt.data_transformers.disable_max_rows()

## Read data

### Metadata

In [None]:
with open(f"{project_dir}/paper_config.yaml",'r') as infile:
    div_params = yaml.safe_load(infile)['section_4']['div_params']

### Data

In [None]:
papers, topic_mix = read_process_data()

### Historical analysis

In [None]:
# Create lookups between years and papers
year_ids = papers.loc[papers['is_ai']==True].groupby('year')['article_id'].apply(lambda x: set(x))

In [None]:
yearly_diversity_norm = year_diversity_results(topic_mix,
                                               year_ids,
                                               div_params)

In [None]:
make_chart_diversity_evol(yearly_diversity_norm,save=False)

#### Interpreting measures of diversity

##### Balance and Stirling Rao

What is the share of topical activity accounted by topics in different positions of the distribution?

What is their mean distance to the other topics?

In [None]:
shares_long, centrality_ranked_all = extract_distribution_centrality(topic_mix,year_ids) 

In [None]:
make_chart_distribution_centrality(shares_long,centrality_ranked_all,saving=False)