# Sciris demo

This file demonstrates some of the main features of Sciris. For more information, please see the tutorials:

http://docs.sciris.org/tutorials

## Getting started

First, let's make sure Sciris is installed:

In [None]:
%pip install sciris

In [None]:
# Let's import everything we'll need later
import numpy as np
import pandas as pd
import sciris as sc
import pylab as pl # Equivalent to "import matplotlib.pyplot as plt", but easier to type!

## Array operations

In [None]:
minval = 0.9
data = np.random.rand(50)

In [None]:
# Without Sciris
inds = np.nonzero(data>minval)[0]
print(f'The indices over {minval} were {", ".join([str(i) for i in inds])}.')

In [None]:
# With Sciris
inds = sc.findinds(data>minval)
print(f'The indices over {minval} were {sc.strjoin(inds)}.')

## Containers

In [None]:
# Create an objdict
data = sc.objdict(a=[1,2,3], b=[4,5,6]) 
print(data)

In [None]:
# Demonstrate the objdict
assert data.a == data['a'] == data[0] # Flexible options for indexing
assert data[:].sum() == 21 # You can sum a dict!
for i, key, value in data.enumitems():
  print(f'Item {i} is named "{key}" and has value {value}')

In [None]:
# Create some data
dates = pd.date_range('2022-01-01', '2022-02-28') # Create a list of dates
values = 1e6*np.random.randn(31+28)**2 # Generate some values
outliers = values > 2*values.mean() # Find outliers

In [None]:
# Plot without Sciris
data = pd.DataFrame.from_dict(dict(x=dates, y=values, outliers=outliers)) # Shortcut to pd.DataFrame
pl.scatter(data.x, data.y, c=data.outliers); # Vanilla Matplotlib!

In [None]:
# Plot with Sciris
sc.options(jupyter=True) # Use higher-resolution plotting
data = sc.dataframe(x=dates, y=values, outliers=outliers) # Shortcut to pd.DataFrame
pl.scatter(data.x, data.y, c=data.outliers) # Vanilla Matplotlib!
sc.dateformatter() # Format a date axis nicely
sc.commaticks() # Convert the y-axis to use commas
sc.boxoff() # Turn off the box around the plot
sc.setylim(); # Set y-limit (tight by default)

## Saving and loading

In [None]:
class Sim:
    
    def __init__(self, days, trials):
        self.days = days
        self.trials = trials
    
    def run(self):
        self.x = np.arange(self.days)
        self.y = np.cumsum(np.random.randn(self.days, self.trials)**3, axis=0)
    
    def plot(self):
        with pl.style.context('sciris.fancy'): # Custom plot style
            pl.plot(self.x, self.y, alpha=0.6)

sim = Sim(days=100, trials=10)
sim.run()
sim.plot()

In [None]:
# Save
sc.save('my-sim.obj', sim) # Save any Python object to disk

# Load and plot
new_sim = sc.load('my-sim.obj') # Load any Python object
new_sim.plot()

In [None]:
# We can keep using the same object as new
new_sim.run()
new_sim.plot()

## Parallelization

In [None]:
# Define the function to parallelize
def func(scale, x_offset, y_offset):
    np.random.seed(scale)
    data = sc.objdict() # Note the use of objdict to create a convenient container
    data.scale = scale
    data.x = x_offset+scale*np.random.randn(100)
    data.y = y_offset+scale*np.random.randn(100)
    return data

x_offset = 5
y_offset = 10
scales = [40, 30, 20, 10] # Reverse order is easier to see when plotted

In [None]:
# Run in parallel without Sciris
arglist = [] # Construct arguments
for scale in scales:
    args = (scale, x_offset, y_offset)
    arglist.append(args)

def helper_func(args): # We can only pass a single argument
    return func(*args)

import concurrent.futures
with concurrent.futures.ProcessPoolExecutor() as executor:
    futures = executor.map(helper_func, arglist) # Actually run
    results1 = list(futures)

In [None]:
# Run in parallel with Sciris
results = sc.parallelize(func, scales, x_offset=x_offset, y_offset=y_offset)

In [None]:
# Plot
for data in results:
    pl.scatter(data.x, data.y, alpha=0.5, label=f'Scale {data.scale}')

## Plotting

(As promised! 😁)

In [None]:
# Create the data
raw = np.random.rand(20,20) - 0.2

# Smooth it
smooth = sc.gauss2d(raw, scale=2)

# Plot and time it
with sc.timer():
    fig = pl.figure(figsize=(8,8))
    sc.bar3d(smooth, fig=fig, cmap='orangeblue')
    sc.figlayout()
    sc.savefig('bars.png')
    pl.show()

## Odds & ends

In [None]:
# Show that metadata was saved in the figure
sc.loadmetadata('bars.png')

In [None]:
# We can also get system metadata
sc.metadata()

In [None]:
# Check current CPU load
sc.cpuload()

In [None]:
# Check current CPU performance on a single core
sc.benchmark()

In [None]:
# ...on all cores
sc.benchmark(parallel=True)

In [None]:
# Quickly download two URLs in parallel
data = sc.download(dict(Python='https://python.org', R='https://r-project.org'), save=False)

In [None]:
# Let's check something...
word = 'diversity'
for site,html in data.items():
    print(f'{site}: {html.lower().count(word)}')