## Imports
Contains import statements for all the required libraries and frameworks

In [2]:
import random
import numpy as np
from pprint import pprint
from importlib import import_module

## Define Hyperparameters
Specifies the maximum number of graphs to be generated

Additionally, the types of libraries and plots can be specified as well

In [3]:
# number of graphs to be generated
max_num_graphs = 35

# set of libraries generating graphs
libraries = [
    'bokeh',
    'altair',
    'plotnine',
]

# list of all potential graph types
graph_types = [
    'scatter',
    'bar',
    'line',
    'contour',
    'kd',
    'histogram',
    'errorbar',
    'bubble',
    'area',
    'box',
]

# generate dict for each possible graph type
# (e.g. { 'scatter': { 'generator': function, 'stylizer': module, ... })
def map_graph_to_object(graph):
    return (graph, {
        'generator': import_module('generators.{graph}'.format(graph=graph)).generate_data,
        'stylizer': import_module('styles.{graph}'.format(graph=graph)),
        'creator': import_module('creators.{graph}'.format(graph=graph)),
    })

graphs = dict(map(map_graph_to_object, graph_types))
pprint(graphs)

{'area': {'creator': <module 'creators.area' from '/Users/seniorfluffie/Desktop/random-graph-generator/creators/area.py'>,
          'generator': <function generate_data at 0x7f88209789d0>,
          'stylizer': <module 'styles.area' from '/Users/seniorfluffie/Desktop/random-graph-generator/styles/area.py'>},
 'bar': {'creator': <module 'creators.bar' from '/Users/seniorfluffie/Desktop/random-graph-generator/creators/bar.py'>,
         'generator': <function generate_data at 0x7f88209710d0>,
         'stylizer': <module 'styles.bar' from '/Users/seniorfluffie/Desktop/random-graph-generator/styles/bar.py'>},
 'box': {'creator': <module 'creators.box' from '/Users/seniorfluffie/Desktop/random-graph-generator/creators/box.py'>,
         'generator': <function generate_data at 0x7f8820978f70>,
         'stylizer': <module 'styles.box' from '/Users/seniorfluffie/Desktop/random-graph-generator/styles/box.py'>},
 'bubble': {'creator': <module 'creators.bubble' from '/Users/seniorfluffie/Deskt

## Data Separation
Generates a dict representing the number of graphs that need to be created for each library/graph pair

The dictionary keys represent `(library, graph)` where the value represents the number of graphs to be generated

In [4]:
def split_number_evenly(n, n_arrays):
    values = np.linspace(n, 0, n_arrays+1).astype(int)
    return [value - values[i+1] for i, value in enumerate(values[:-1])]

def split_number_randomly(n, n_arrays):
    number, numbers = n, []
    while (number > 0):
        random_number = round(random.random()*number)
        numbers.append(random_number)
        number -= random_number
    numbers.extend([0] * (n_arrays - len(numbers)))
    return random.sample(numbers, len(numbers))

def generate_occurences_dict(
    num_graphs,
    libraries,
    graphs,
    equal_library_distribution=True,
    equal_graph_distribution=True,
):
    # dict for storing the number of graphs per library / graph to generate
    # (e.g. occurences['bokeh', 'bar'] might return a value of 5)
    occurences, library_occurences, graph_occurences = {}, [], []  
    # split library occurences (1-d array)
    split_library_func = split_number_evenly if (equal_library_distribution) else split_number_randomly
    library_occurences = split_library_func(num_graphs, len(libraries))
    # split graph occurences (1-d array)
    split_graph_func =  split_number_evenly if equal_graph_distribution else split_number_randomly
    graph_occurences = [split_graph_func(num_graphs_per_library, len(graphs)) for num_graphs_per_library in library_occurences]
    # add values generated above to occurence dict
    for i, num_graphs_per_library in enumerate(graph_occurences):
        for j, num_graph_occurences in enumerate(num_graphs_per_library):
            library, graph = libraries[i], graphs[j]
            occurences[library, graph] = num_graph_occurences
    return occurences

# generate graph occurence dictionary
graph_names = list(graphs.keys())
occurences = generate_occurences_dict(100, libraries, graph_names)
pprint(occurences)

{('altair', 'area'): 3,
 ('altair', 'bar'): 3,
 ('altair', 'box'): 3,
 ('altair', 'bubble'): 3,
 ('altair', 'contour'): 4,
 ('altair', 'errorbar'): 4,
 ('altair', 'histogram'): 3,
 ('altair', 'kd'): 3,
 ('altair', 'line'): 3,
 ('altair', 'scatter'): 4,
 ('bokeh', 'area'): 3,
 ('bokeh', 'bar'): 3,
 ('bokeh', 'box'): 3,
 ('bokeh', 'bubble'): 4,
 ('bokeh', 'contour'): 3,
 ('bokeh', 'errorbar'): 3,
 ('bokeh', 'histogram'): 4,
 ('bokeh', 'kd'): 3,
 ('bokeh', 'line'): 4,
 ('bokeh', 'scatter'): 4,
 ('plotnine', 'area'): 3,
 ('plotnine', 'bar'): 3,
 ('plotnine', 'box'): 3,
 ('plotnine', 'bubble'): 3,
 ('plotnine', 'contour'): 4,
 ('plotnine', 'errorbar'): 4,
 ('plotnine', 'histogram'): 3,
 ('plotnine', 'kd'): 3,
 ('plotnine', 'line'): 3,
 ('plotnine', 'scatter'): 4}


## Data Generation
Generates the corresponding data based on graph type (i.e. `generate_bar()`)

Data is stored in a generated_graphs object (i.e. an `X` attribute in said graph object)

In [5]:
# generates an X value (and optional y value) based on graph type
def generate_data(graph):
    data_generator = graphs[graph]['generator']
    data = data_generator()
    return data

# append tuples in the format (library, (X, y)) to each corresponding graph type
# e.g. graphs['bar']['data'] might equal [('bokeh', ([1, 2, 3], ['a', 'b', 'c'])]
generated_graphs = graphs.copy()
for (library, graph) in occurences:
    data = generate_data(graph)
    generated_graphs[graph].setdefault('graphs', []).append({
        'library': library,
        'data': data,
    })

pprint(generated_graphs)

{'area': {'creator': <module 'creators.area' from '/Users/seniorfluffie/Desktop/random-graph-generator/creators/area.py'>,
          'generator': <function generate_data at 0x7f88209789d0>,
          'graphs': [{'data': [], 'library': 'bokeh'},
                     {'data': [], 'library': 'altair'},
                     {'data': [], 'library': 'plotnine'}],
          'stylizer': <module 'styles.area' from '/Users/seniorfluffie/Desktop/random-graph-generator/styles/area.py'>},
 'bar': {'creator': <module 'creators.bar' from '/Users/seniorfluffie/Desktop/random-graph-generator/creators/bar.py'>,
         'generator': <function generate_data at 0x7f88209710d0>,
         'graphs': [{'data': (array([-5.        , -3.57142857, -2.14285714, -0.71428571,  0.71428571,
        2.14285714,  3.57142857,  5.        ]),
                              array([11.47855489, 12.85630823, 13.81172895, 18.72866844, 20.45106039,
       37.34561899, 40.56910311, 45.17459222])),
                     'library': 

## Data Stylization
Generates styles for a given library-graph pair (e.g. `generate_style(library, graph)`)

Calls the `generate_bokeh_style`, `generate_altair_style`, and `generate_plotnine_style` functions for each style module dynamically

The stylization code for each graph can be found in the `styles` dir respectively (i.e. `styles/bar.py`)

In [6]:
# def test(module_name, function_name):
#     return None

for (graph, graph_object) in generated_graphs.items():
    styles_module = graph_object['stylizer']
    for graph_content in graph_object['graphs']:
        # retrieves the generate_LIBRARY_style function based on the graphs
        # library and then appends the generated style to the same object
        library = graph_content['library']
        style_function = 'generate_{0}_style'.format(library)
        style_generator = getattr(styles_module, style_function)
        graph_content['styles'] = style_generator()

pprint(generated_graphs)

{'area': {'creator': <module 'creators.area' from '/Users/seniorfluffie/Desktop/random-graph-generator/creators/area.py'>,
          'generator': <function generate_data at 0x7f88209789d0>,
          'graphs': [{'data': [], 'library': 'bokeh', 'styles': {}},
                     {'data': [], 'library': 'altair', 'styles': {}},
                     {'data': [], 'library': 'plotnine', 'styles': {}}],
          'stylizer': <module 'styles.area' from '/Users/seniorfluffie/Desktop/random-graph-generator/styles/area.py'>},
 'bar': {'creator': <module 'creators.bar' from '/Users/seniorfluffie/Desktop/random-graph-generator/creators/bar.py'>,
         'generator': <function generate_data at 0x7f88209710d0>,
         'graphs': [{'data': (array([-5.        , -3.57142857, -2.14285714, -0.71428571,  0.71428571,
        2.14285714,  3.57142857,  5.        ]),
                              array([11.47855489, 12.85630823, 13.81172895, 18.72866844, 20.45106039,
       37.34561899, 40.56910311, 45.174

## Graph Creation
Uses the generated data points and styles to create the respective graphs

Additional flags can be used to specify the output file path and file type

In [7]:
for (graph, graph_object) in generated_graphs.items():
    creator_module = graph_object['creator']
    for graph_content in graph_object['graphs']:
        # retrieves the create_LIBRARY_graph function based on the graphs
        # library and then appends the created graph to the same object
        library = graph_content['library']
        create_function = 'create_{0}_graph'.format(library)
        graph_creator = getattr(creator_module, create_function)
        graph_content['graph'] = graph_creator(graph_content)
        # export content to filepath
        utils_module = import_module('creators.utils')
        export_function = 'create_{0}_graph'.format(library)
        exporter = getattr(utils_module, export_function)
        file_path = utils_module.generate_file_path(library, graph_content, 'TODO')
        exporter(graph_content, file_path)
        

TypeError: create_bokeh_graph() takes 0 positional arguments but 1 was given

<hr/>
<br/>

## Define Hyperparameters
1. Define hyperparameters such as number of total graphs, what types of libraries and graphs to be used, etc.
## Data Separation
1. Split the dataset up by library / graph type
1. Allow for distribution flags (so not entirely random)
    - `library_distribution` and `graph_distribution` flags

## Data Generation
1. Generate the corresponding data (i.e. `generate_bar()`)
1. Attach data to graph object (i.e. add `X` attribute to graph object)

# Style Generation
1. Chart Stylization (i.e. each library will be in charge of their own stylization)

# Chart Generation
1. Chart Generation

<br />

## TODO:
- Add in export code
- Decide whether to function-ize the stylization / creation code
- Add in documentation (both in Notebook and README)

Small Work Break
- Add in theming / stylization options

## POSSIBLE TODOS:
- Possibly batching (if not enough resources to run pipeline for large number of graphs)

In [None]:
"""
def generate_bokeh(graph):
    # set theme
    theme = pick_random_theme()
    curdoc().theme = theme
    # generate graph
    generate_bokeh_graph[graph](data)

def generate_plotnine():
    # set theme
    theme = pick_random_theme()
    theme_set(theme)
    # generate graph
    generate_plotnine_graph[graph](data)
"""

'\ndef generate_bokeh(graph):\n    # set theme\n    theme = pick_random_theme()\n    curdoc().theme = theme\n    # generate graph\n    generate_bokeh_graph[graph](data)\n\ndef generate_plotnine():\n    # set theme\n    theme = pick_random_theme()\n    theme_set(theme)\n    # generate graph\n    generate_plotnine_graph[graph](data)\n'