## Setup
Runs a bash command (using iPython %bash magic) to install all required dependencies

All packages (alongside their corresponding versions) can be found in the setup directory, specifically `requirements.txt`

The cell below assumes a Unix / Mac OS, however if that's not the case the following command can be used from the project's root dir to install all required dependencies

`pip3 install --no-warn-script-location --progress-bar pretty --quiet -r ./setup/requirements.txt `

In [1]:
%%bash
yes | pip3 install --no-warn-script-location --progress-bar pretty --quiet -r ./setup/requirements.txt

Couldn't find program: 'bash'


## Imports
Contains import statements for all the required libraries and frameworks

In [2]:
import bokeh
import plotnine
import altair
import numpy as np
from pprint import pprint

from generators import (
    scatter as scatter_generator,
    bar as bar_generator,
    line as line_generator,
    contour as contour_generator,
    kd as kd_generator,
    histogram as histogram_generator,
    errorbar as errorbar_generator,
    bubble as bubble_generator,
    area as area_generator,
    box as box_generator,
)

from styles import (
    scatter as scatter_styles,
    bar as bar_styles,
    line as line_styles,
    contour as contour_styles,
    kd as kd_styles,
    histogram as histogram_styles,
    errorbar as errorbar_styles,
    bubble as bubble_styles,
    area as area_styles,
    box as box_styles,
)

## Define Hyperparameters
Specifies the maximum number of graphs to be generated

Additionally, the types of libraries and plots can be specified as well

In [3]:
# number of graphs to be generated
max_num_graphs = 35

# set of libraries generating graphs
libraries = [
    'bokeh',
    'altair',
    'plotnine'
]

# outer dict keys (followed by inner dict keys)
generator_key, styles_key, graph_key = 'generator', 'styles', 'graphs'
library_key, data_key, style_key = 'library', 'data', 'style'

# set of plots being generated
graphs = {
    'scatter': { 
        generator_key: scatter_generator.generate_data,
        styles_key: scatter_styles,
    },
    'bar': {
        generator_key: bar_generator.generate_data,
        styles_key: bar_styles,
    },
    'line': {
        generator_key: line_generator.generate_data,
        styles_key: line_styles,
    },
    'contour': {
        generator_key: contour_generator.generate_data,
        styles_key: contour_styles,
    },
    'kd': {
        generator_key: kd_generator.generate_data,
        styles_key: kd_styles,
    },
    'histogram': {
        generator_key: histogram_generator.generate_data,
        styles_key: histogram_styles,
    },
    'errorbar': {
        generator_key: errorbar_generator.generate_data,
        styles_key: errorbar_styles,
    },
    'bubble': {
        generator_key: bubble_generator.generate_data,
        styles_key: bubble_styles,
    },
    'area': {
        generator_key: area_generator.generate_data,
        styles_key: area_styles,
    },
    'box': {
        generator_key: box_generator.generate_data,
        styles_key: box_styles,
    },
}

## Data Separation
Generates a dict representing the number of graphs that need to be created for each library/graph pair

The dictionary keys represent `(library, graph)` where the value represents the number of graphs to be generated

In [4]:
import random

def split_number_evenly(n, n_arrays):
    values = np.linspace(n, 0, n_arrays+1).astype(int)
    return [value - values[i+1] for i, value in enumerate(values[:-1])]

def split_number_randomly(n, n_arrays):
    number, numbers = n, []
    while (number > 0):
        random_number = round(random.random()*number)
        numbers.append(random_number)
        number -= random_number
    numbers.extend([0] * (n_arrays - len(numbers)))
    return random.sample(numbers, len(numbers))

def generate_occurences_dict(
    num_graphs,
    libraries,
    graphs,
    equal_library_distribution=True,
    equal_graph_distribution=True,
):
    # dict for storing the number of graphs per library / graph to generate
    # (e.g. occurences['bokeh', 'bar'] might return a value of 5)
    occurences, library_occurences, graph_occurences = {}, [], []  
    # split library occurences (1-d array)
    split_library_func = split_number_evenly if (equal_library_distribution) else split_number_randomly
    library_occurences = split_library_func(num_graphs, len(libraries))
    # split graph occurences (1-d array)
    split_graph_func =  split_number_evenly if equal_graph_distribution else split_number_randomly
    graph_occurences = [split_graph_func(num_graphs_per_library, len(graphs)) for num_graphs_per_library in library_occurences]
    # add values generated above to occurence dict
    for i, num_graphs_per_library in enumerate(graph_occurences):
        for j, num_graph_occurences in enumerate(num_graphs_per_library):
            library, graph = libraries[i], graphs[j]
            occurences[library, graph] = num_graph_occurences
    return occurences

# generate graph occurence dictionary
graph_names = list(graphs.keys())
occurences = generate_occurences_dict(100, libraries, graph_names)
pprint(occurences)

{('altair', 'area'): 3,
 ('altair', 'bar'): 3,
 ('altair', 'box'): 3,
 ('altair', 'bubble'): 3,
 ('altair', 'contour'): 4,
 ('altair', 'errorbar'): 4,
 ('altair', 'histogram'): 3,
 ('altair', 'kd'): 3,
 ('altair', 'line'): 3,
 ('altair', 'scatter'): 4,
 ('bokeh', 'area'): 3,
 ('bokeh', 'bar'): 3,
 ('bokeh', 'box'): 3,
 ('bokeh', 'bubble'): 4,
 ('bokeh', 'contour'): 3,
 ('bokeh', 'errorbar'): 3,
 ('bokeh', 'histogram'): 4,
 ('bokeh', 'kd'): 3,
 ('bokeh', 'line'): 4,
 ('bokeh', 'scatter'): 4,
 ('plotnine', 'area'): 3,
 ('plotnine', 'bar'): 3,
 ('plotnine', 'box'): 3,
 ('plotnine', 'bubble'): 3,
 ('plotnine', 'contour'): 4,
 ('plotnine', 'errorbar'): 4,
 ('plotnine', 'histogram'): 3,
 ('plotnine', 'kd'): 3,
 ('plotnine', 'line'): 3,
 ('plotnine', 'scatter'): 4}


## Data Generation
Generates the corresponding data based on graph type (i.e. `generate_bar()`)

Data is stored in a generated_graphs object (i.e. an `X` attribute in said graph object)

In [5]:
# generates an X value (and optional y value) based on graph type
def generate_data(graph):
    data_generator = graphs[graph][generator_key]
    data = data_generator()
    return data

# append tuples in the format (library, (X, y)) to each corresponding graph type
# e.g. graphs['bar']['data'] might equal [('bokeh', ([1, 2, 3], ['a', 'b', 'c'])]
generated_graphs = graphs.copy()
for (library, graph) in occurences:
    data = generate_data(graph)
    generated_graphs[graph].setdefault(graph_key, []).append({
        library_key: library,
        data_key: data,
    })

pprint(generated_graphs)

{'area': {'generator': <function generate_data at 0x000001B33AB23940>,
          'graphs': [{'data': [], 'library': 'bokeh'},
                     {'data': [], 'library': 'altair'},
                     {'data': [], 'library': 'plotnine'}],
          'styles': <module 'styles.area' from 'c:\\Users\\micha\\Desktop\\random-chart-generator\\styles\\area.py'>},
 'bar': {'generator': <function generate_data at 0x000001B33AB1E9D0>,
         'graphs': [{'data': (array([-5.        , -3.33333333, -1.66666667,  0.        ,  1.66666667,
        3.33333333,  5.        ]),
                              array([ 7.7099185 , 14.91004357, 17.22025834, 17.56896568, 18.68025764,
       19.15131334, 45.76113219])),
                     'library': 'bokeh'},
                    {'data': (array([-5.  , -3.75, -2.5 , -1.25,  0.  ,  1.25,  2.5 ,  3.75,  5.  ]),
                              [7.241917055977886,
                               23.397101997920792,
                               8.540849666307022,


## Data Stylization
Generates styles for a given library-graph pair (e.g. `generate_style(library, graph)`)

Calls the `generate_bokeh_style`, `generate_altair_style`, and `generate_plotnine_style` functions for each style module dynamically

The stylization code for each graph can be found in the `styles` dir respectively (i.e. `styles/bar.py`)

In [6]:
for (graph, graph_object) in generated_graphs.items():
    styles_module = graph_object[styles_key]
    for graph_content in graph_object[graph_key]:
        # retrieves the generate_LIBRARY_style function based on the graphs library
        # and then appends the generated style to the same object
        library = graph_content[library_key]
        style_function = 'generate_{0}_style'.format(library)
        style_generator = getattr(styles_module, style_function)
        graph_content[style_key] = style_generator()

pprint(generated_graphs)

{'area': {'generator': <function generate_data at 0x000001B33AB23940>,
          'graphs': [{'data': [], 'library': 'bokeh', 'style': {}},
                     {'data': [], 'library': 'altair', 'style': {}},
                     {'data': [], 'library': 'plotnine', 'style': {}}],
          'styles': <module 'styles.area' from 'c:\\Users\\micha\\Desktop\\random-chart-generator\\styles\\area.py'>},
 'bar': {'generator': <function generate_data at 0x000001B33AB1E9D0>,
         'graphs': [{'data': (array([-5.        , -3.33333333, -1.66666667,  0.        ,  1.66666667,
        3.33333333,  5.        ]),
                              array([ 7.7099185 , 14.91004357, 17.22025834, 17.56896568, 18.68025764,
       19.15131334, 45.76113219])),
                     'library': 'bokeh',
                     'style': {}},
                    {'data': (array([-5.  , -3.75, -2.5 , -1.25,  0.  ,  1.25,  2.5 ,  3.75,  5.  ]),
                              [7.241917055977886,
                            

## Define Hyperparameters
1. Define hyperparameters such as number of total graphs, what types of libraries and graphs to be used, etc.
## Data Separation
1. Split the dataset up by library / graph type
1. Allow for distribution flags (so not entirely random)
    - `library_distribution` and `graph_distribution` flags

## Data Generation
1. Generate the corresponding data (i.e. `generate_bar()`)
1. Attach data to graph object (i.e. add `X` attribute to graph object)

# Style Generation
1. Chart Stylization (i.e. each library will be in charge of their own stylization)

<br />

## TODO:
- Add in contour generation functions
- Add in area plot generation functions
- Create chart generation process (below)
- Add in chart styles

<br />
<hr />

## Chart Generation
1. Generate graph (i.e. generate graphs using `generate_graph(library, graph)`)

In [None]:
"""
def generate_bokeh(graph):
    # set theme
    theme = pick_random_theme()
    curdoc().theme = theme
    # generate graph
    generate_bokeh_graph[graph](data)

def generate_plotnine():
    # set theme
    theme = pick_random_theme()
    theme_set(theme)
    # generate graph
    generate_plotnine_graph[graph](data)
"""