In [None]:
#| hide
from reproduce_work.core import *
from reproduce_work.build import *
from reproduce_work.dynamic import *

In [1]:
2+2

4

# reproduce-work

> A docker based framework to facilitate cross-platform computational reproducibility in scientific publishing.


### Main features:

- ✅ Computational reproducibility across all major operating systems and platforms
- ✅ A simple and intuitive API that fits into your existing workflow
- ✅ Verifiable, traceable, reproducible results in scientific documents
- ✅ Fancy PDF outputs

### Roadmap features:

- 🔳 Fancy HTML outputs
- 🔳 Data validation, versioning, and provenance tracking
- 🔳 Support for more target publishers and document formats
- 🔳 Deep linking to verified open data and code

## Current support

### Workflows
As of v0.0.1, reproduce.work has supprt for development/analysis workflows based on:

- Python
- Jupyter notebooks
- R, RStudio
- Julia
- pandoc

### Target publishers and document formats
Primary outputs are currently PDF and HTML documents, with support for:

- LaTeX
- Jupyter notebooks
- RMarkdown
- pandoc markdown

## Install

```sh
pip install reproduce_work
```

## How to use

### Getting started

In [None]:
import os
from pathlib import Path
print(Path(Path.cwd(), 'nbs'))

In [None]:
os.listdir(Path(Path.cwd(), 'nbs'))

In [None]:
from pathlib import Path
import os
home_dir = Path(Path.home())
this_dir = Path(home_dir, 'nbs')
os.chdir(this_dir)
os.listdir(this_dir)

In [2]:
import reproduce_work
from reproduce_work.core import generate_config,read_base_config
from reproduce_work.dynamic import publish_variable,publish_data,publish_file,register_notebook

Setting reproduce.work config dir to ./reproduce
Setting reproduce.work config dir to ./reproduce


In [None]:
2+2

Generate configuration at beginning of your project

In [3]:
generate_config(inputs={
    'authors': {
        'author1': {
            'name': 'Alex P. Miller',
            'email': 'alex.miller@marshall.usc.edu',
            'affiliation': 'USC Marshall School of Business'
        }
     },
     'nbdev_project': True,
     'dev_image_tag': 'reproduce-work-dev',
     #'verbose': True # enables print statements whenever data are updated/saved
})

Successfully generated reproduce.work configuration at reproduce/config.toml


In [4]:
register_notebook('index.ipynb')

KeyError: 'notebooks'

Run code to analyze data and generate figures

In [None]:
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from scipy.stats import linregress

%config InlineBackend.figure_formats = ['svg']
plt.rcParams['figure.figsize'] = [6, 5]

random_state = np.random.RandomState(552)

# Generate data
x = random_state.normal(size=100)
y = 0.5*x + random_state.normal(scale=10, size=100)

# Plot data with seaborn
sns.set_style('white')

# compute slope and stats
slope, intercept, r_value, p_value, std_err = linregress(x, y)

# plot x-y scatter data
x_y_plot = sns.scatterplot(x=x, y=y)

# plot best fit line
x_y_plot.plot(x, intercept + slope*x, color='black')

# Compute p-value of slope 
# render p-value with 3 decimal places and no scientific notation
p_value_str = f'{p_value:.3f}'

# add p-value and slope to plot
x_y_plot.text(x=4.5, y=25, s='β')
x_y_plot.text(x=4.75, y=20, s=f'p-value: {p_value_str}')
x_y_plot.text(x=4.75, y=15, s=f'slope: {slope:.3f}')
x_y_plot.text(x=4.75, y=10, s=f'intercept: {intercept:.3f}')

# Set title
x_y_plot.set_title('Reproducibly Simulated Data')

# save figure
plt.savefig('reproducible_plot.svg', bbox_inches='tight')

There are three primary types:

- named variables
    - defined via: `reproduce_work.publish_variable`
    - Designed to be used to facilitate production and review of reproducible results
- data assignments
    - defined via: `reproduce_work.publish_data`
    - Designed to standardize data management and facilitate data validation
- file assignments
    - defined via: `reproduce_work.publish_file`
    - Designed to standardize file management and facilitate file validation, allowing for reproducible workflows for arbitrarily complex data structures



In [None]:
publish_variable(p_value_str, "p_value_str", metadata={
    'description': 'The p-value of the coefficient on the slope of the linear regression line.',
})

In [None]:
publish_data(x, "x", metadata={
    'description': 'The simulated X data',
    'units': 'kilograms'
})

In [None]:
publish_data(y, "y", metadata={
    'description': 'The simulated Y data',
    'units': 'meters'
})

In [None]:
publish_file('reproducible_plot.svg', metadata={
    'description': 'A plot of X vs Y',
})

In [None]:
base_config = read_base_config()
with open(base_config['repro']['files']['dynamic'], 'r') as f:
    print(f.read())

Decide which data, figures, and files to save for publication.

In [None]:
#| hide
import nbdev; nbdev.nbdev_export()