In [2]:
import numpy as np
import pandas as pd

from config import read_config
from data import prepare_data
from plotting import (get_cluster_fill_data, get_spec_fill_data,
                      get_colors, plot_treemap, plot_multiverse,
                      plot_caterpillar, plot_sample_size, plot_cluster_size,
                      plot_spec_tiles, plot_cluster_tiles, plot_inferential,
                      plot_p_hist)
from specs import generate_specs
from user_data import preprocess_data

In [3]:
%load_ext autoreload
%autoreload 2

## Constants

Set paths to your **data**, **config**, and **specs** files, whether to load or preprocess data, and whether to load or generate specs.

In [8]:
TITLE = "R2D2_3"
DIR = "../data/R2D2-Meta-Analysis"
DATA_PATH = f"{DIR}/R2D2.csv"

# TITLE = "Chernobyl"
# DIR = "../data/Chernobyl-Meta-Analysis"
# DATA_PATH = f"{DIR}/Chernobyl.rda"

PP_DATA_PATH = f"{DIR}/data_{TITLE}.csv"
CONFIG_PATH = f"{DIR}/config_{TITLE}.json"
SPECS_PATH = f"{DIR}/specs_{TITLE}.csv"
BOOT_PATH = f"{DIR}/boot_{TITLE}.csv"
PREPROCESS_DATA = False # Load of preprocess data
GENERATE_SPECS = False # Load or generate specs
GENERATE_BOOTDATA = False # Load or generate boot data

## Preprocess Data

Preprocess the dataset. The preprocessed data will be saved in a `.csv` file with the given title, prefixed by `data_`. The prefix is required for the Plotly Dashboard to work properly.

In [9]:
if PREPROCESS_DATA:
    ma_data = preprocess_data(DATA_PATH, title=TITLE)
else:
    ma_data = pd.read_csv(PP_DATA_PATH)
print(f"Data Shape: {ma_data.shape}")
ma_data.head()

Data Shape: (31, 16)


Unnamed: 0,Study_name,publ_yr,publ_yr_recoded,sex,age_group,sample,race,method,published_estimate,N,r,r_se,z,z_se,z_var,r_var
0,Manning (2003),2003,1,men,adults,healthy,white,direct,yes,50,0.29,0.133598,0.298566,0.145865,0.021277,0.017848
1,Latourelle (2008),2008,6,men,adults,healthy,white,image,no,35,0.0,0.176777,0.0,0.176777,0.03125,0.03125
2,Latourelle (2008),2008,6,women,adults,healthy,white,image,no,72,0.0,0.120386,0.0,0.120386,0.014493,0.014493
3,Mas (2009),2009,7,men,adults,healthy,white,image,no,72,-0.0685,0.119821,-0.068607,0.120386,0.014493,0.014357
4,Mas (2009),2009,7,men,adults,clinical,white,image,no,63,0.0021,0.129099,0.0021,0.129099,0.016667,0.016667


Prepare the preprocessed dataset for meta-analysis. This adds **cluster-** and **effect- IDs**, sets datatypes, etc.. For details, consult the function documentation.

In [10]:
config = read_config(path=CONFIG_PATH)
data = prepare_data(config["colmap"], data=ma_data)
print(f"Data Shape: {data.shape}")
data.head()

Data Shape: (31, 18)


Unnamed: 0,c_id,Study_name,e_id,publ_yr,publ_yr_recoded,sex,age_group,sample,race,method,published_estimate,N,r,r_se,z,z_se,z_var,r_var
0,1,Manning (2003),1,2003,1,men,adults,healthy,white,direct,yes,50,0.29,0.133598,0.298566,0.145865,0.021277,0.017848
1,2,Latourelle (2008),2,2008,6,men,adults,healthy,white,image,no,35,0.0,0.176777,0.0,0.176777,0.03125,0.03125
2,2,Latourelle (2008),3,2008,6,women,adults,healthy,white,image,no,72,0.0,0.120386,0.0,0.120386,0.014493,0.014493
3,3,Mas (2009),4,2009,7,men,adults,healthy,white,image,no,72,-0.0685,0.119821,-0.068607,0.120386,0.014493,0.014357
4,3,Mas (2009),5,2009,7,men,adults,clinical,white,image,no,63,0.0021,0.129099,0.0021,0.129099,0.016667,0.016667


## Specifications

Generate specifications, or load them from the given `SPECS_PATH`. If specifications are generated, they will be saved in a `.csv` file at the given `SPECS_PATH`.

In [11]:
if GENERATE_SPECS:
    specs = generate_specs(
        data,
        config["which_lists"],
        config["how_lists"],
        config["colmap"],
        config["k_min"],
        config["level"],
        SPECS_PATH
    )
else:
    specs = pd.read_csv(SPECS_PATH)
print(specs.shape)
specs.head()

(340, 20)


Unnamed: 0,sex,method,age_group,sample,race,published_estimate,effect,ma_method,test,mean,lb,ub,p,k,set,set_es,kc,full_set,rank,ci
0,men,image,adults,healthy,white,no,z,REML,t-test,-0.046836,-0.864575,0.838899,0.719751,2,23,24,2,0,1,1.703474
1,men,image,adults,healthy,white,no,z,REML,z-test,-0.046836,-0.237284,0.147079,0.637611,2,23,24,2,0,2,0.384363
2,men,image,adults,healthy,white,no,z,ML,t-test,-0.046836,-0.864575,0.838899,0.719751,2,23,24,2,0,3,1.703474
3,men,image,adults,healthy,white,no,z,ML,z-test,-0.046836,-0.237284,0.147079,0.637611,2,23,24,2,0,4,0.384363
4,men,image,adults,all_sample,white,no,z,ML,t-test,-0.028613,-0.351993,0.30087,0.75128,3,23,245,2,0,5,0.652863


## Bootstrap Data

Generate bootstrap data, or load it from the given `BOOT_PATH`. If bootstrap data is generated, they will be saved in a `.csv` file at the given `BOOT_PATH`.

In [12]:
if GENERATE_BOOTDATA:
    boot_data = generate_boot_data(
        specs,
        config["n_boot_iter"],
        data,
        config["colmap"],
        config["level"],
        BOOT_PATH
    )
else:
    boot_data = pd.read_csv(BOOT_PATH)
print(boot_data.shape)
boot_data.head()

(340, 4)


Unnamed: 0,rank,obs,boot_lb,boot_ub
0,1,-0.046836,-0.209924,-0.021769
1,2,-0.046836,-0.209924,-0.021769
2,3,-0.046836,-0.209924,-0.019433
3,4,-0.046836,-0.209924,-0.019433
4,5,-0.028613,-0.171824,-0.01313


## Plotting

Prepare **cluster-** and **specification-** fill data for the respective tile maps, and the list of colors that constitute the color scheme.

In [13]:
cluster_fill_data = get_cluster_fill_data(
    data,
    specs,
    config["colmap"]
)
spec_fill_data = get_spec_fill_data(
    config["n_which"],
    config["which_lists"],
    config["n_how"],
    config["how_lists"],
    specs
)
fill_levels = len(np.unique([v for v in spec_fill_data.values()]))
colors = get_colors(fill_levels)

Get important variables for multiverse plots.

In [14]:
n_total_specs = len(specs)
k_range = [config["k_min"], max(specs["k"])]
labels = config["labels"]
title = config["title"]

### Treemap

Treemap of the meta-analytic dataset. It visualizes each study and the reported effect size, with the colors indicating the size of the study sample size `N` (hot colors for low, cold colors for high sample sizes). If studies report multiple effect sizes, the size of each study's tile corresponds to the amount of reported effect sizes. The tile's color indicates the average sample size of the reported effects.

In [15]:
treemap = plot_treemap(data, config["title"], config["colmap"])
treemap.show()

### Inferential Specification Plot

In [16]:
fig_inferential = plot_inferential(boot_data, title, n_total_specs)
fig_inferential.show()

### p-Value Histogram

In [17]:
fig_p_hist = plot_p_hist(specs, title, n_total_specs)
fig_p_hist.show()

### Multiverse

In [21]:
fig = plot_multiverse(
    specs,
    n_total_specs,
    k_range,
    cluster_fill_data,
    spec_fill_data,
    labels,
    colors,
    config["level"],
    title
)
fig.show()
# fig.write_image(f"{config["title"]}.pdf")

Plot individual components of the multiverse

In [28]:
fig_cluster_tiles = plot_cluster_tiles(specs, cluster_fill_data, n_total_specs, title)
fig_cluster_tiles.show()

In [23]:
fig_caterpillar = plot_caterpillar(specs, n_total_specs, colors, k_range, title)
fig_caterpillar.show()

In [24]:
fig_cluster_size = plot_cluster_size(specs, k_range, n_total_specs, title)
fig_cluster_size.show()

In [25]:
fig_sample_size = plot_sample_size(specs, k_range, n_total_specs, title)
fig_sample_size.show()

In [26]:
fig_spec_tiles = plot_spec_tiles(specs, n_total_specs, spec_fill_data, labels, colors, k_range, title)
fig_spec_tiles.show()