# Layout optimisation

This example uses the same data as in the [US energy consumption example](us-energy-consumption.ipynb) to demonstrate node order and position optimisation. 

In [1]:
from attr import evolve
import pandas as pd
from floweaver import *

Load the data and set up the Sankey Diagram Definition, as in the previous example:

In [2]:
dataset = Dataset.from_csv("us-energy-consumption.csv", dim_process_filename="us-energy-consumption-processes.csv")

In [3]:
sources = ['Solar', 'Nuclear', 'Hydro', 'Wind', 'Geothermal',
           'Natural_Gas', 'Coal', 'Biomass', 'Petroleum']

uses = ['Residential', 'Commercial', 'Industrial', 'Transportation']

In [4]:
nodes = {
    'sources': ProcessGroup('type == "source"', Partition.Simple('process', sources), title='Sources'),
    'imports': ProcessGroup(['Net_Electricity_Import'], title='Net electricity imports'),
    'electricity': ProcessGroup(['Electricity_Generation'], title='Electricity Generation'),
    'uses': ProcessGroup('type == "use"', partition=Partition.Simple('process', uses)),
    
    'energy_services': ProcessGroup(['Energy_Services'], title='Energy services'),
    'rejected': ProcessGroup(['Rejected_Energy'], title='Rejected energy'),
    
    'direct_use': Waypoint(Partition.Simple('source', [
        # This is a hack to hide the labels of the partition, there should be a better way...
        (' '*i, [k]) for i, k in enumerate(sources)
    ])),
}

ordering = [
    [[], ['sources'], []],
    [['imports'], ['electricity', 'direct_use'], []],
    [[], ['uses'], []],
    [[], ['rejected', 'energy_services'], []]
]

bundles = [
    Bundle('sources', 'electricity'),
    Bundle('sources', 'uses', waypoints=['direct_use']),
    Bundle('electricity', 'uses'),
    Bundle('imports', 'uses'),
    Bundle('uses', 'energy_services'),
    Bundle('uses', 'rejected'),
    Bundle('electricity', 'rejected'),
]

In [5]:
palette = {
    'Solar': 'gold',
    'Nuclear': 'red',
    'Hydro': 'blue',
    'Wind': 'purple',
    'Geothermal': 'brown',
    'Natural_Gas': 'steelblue',
    'Coal': 'black',
    'Biomass': 'lightgreen',
    'Petroleum': 'green',
    'Electricity': 'orange',
    'Rejected energy': 'lightgrey',
    'Energy services': 'dimgrey',
}

In [6]:
sdd = SankeyDefinition(nodes, bundles, ordering,
                       flow_partition=dataset.partition('type'))
sankey_data = weave(sdd, dataset, palette=palette)

This is the default, un-optimised layout:

In [7]:
sankey_data.to_widget(width=700, height=450, margins=dict(left=100, right=120))

SankeyWidget(groups=[{'id': 'sources', 'type': 'process', 'title': 'Sources', 'nodes': ['sources^Solar', 'sour…

Optimise the node ordering:

In [8]:
sankey_data_evolved = optimise_node_order(sankey_data, group_nodes=True)

<generator object optimise_node_order_model.<locals>.<genexpr> at 0x000001887C534900>


In [9]:
sankey_data_evolved.to_widget(width=700, height=450, margins=dict(left=100, right=120), debugging=True)

VBox(children=(SankeyWidget(groups=[{'id': 'sources', 'type': 'process', 'title': 'Sources', 'nodes': ['source…

Optimise the node positions to make flows as straight as possible:

In [10]:
sankey_data_evolved.to_widget(layout=optimise_node_positions(sankey_data_evolved, scale=1.5))

SankeyWidget(groups=[{'id': 'sources', 'type': 'process', 'title': 'Sources', 'nodes': ['sources^Solar', 'sour…

In [11]:
edges = []
for flow in sankey_data_evolved.links:
    edges.append((flow.source,flow.target))
print(edges)

[('sources^Solar', 'electricity^*'), ('sources^Solar', 'direct_use^'), ('sources^Nuclear', 'electricity^*'), ('sources^Hydro', 'electricity^*'), ('sources^Wind', 'electricity^*'), ('sources^Geothermal', 'electricity^*'), ('sources^Geothermal', 'direct_use^    '), ('sources^Natural_Gas', 'electricity^*'), ('sources^Natural_Gas', 'direct_use^     '), ('sources^Coal', 'electricity^*'), ('sources^Coal', 'direct_use^      '), ('sources^Biomass', 'electricity^*'), ('sources^Biomass', 'direct_use^       '), ('sources^Petroleum', 'electricity^*'), ('sources^Petroleum', 'direct_use^        '), ('imports^*', 'uses^Residential'), ('electricity^*', 'uses^Commercial'), ('electricity^*', 'uses^Industrial'), ('electricity^*', 'uses^Residential'), ('electricity^*', 'uses^Transportation'), ('electricity^*', '__electricity_rejected_2^*'), ('direct_use^', 'uses^Commercial'), ('direct_use^', 'uses^Industrial'), ('direct_use^', 'uses^Residential'), ('direct_use^    ', 'uses^Commercial'), ('direct_use^    '

In [12]:
sankey_data_evolved.ordering

Ordering( ; sources^Petroleum, sources^Biomass, sources^Natural_Gas, sources^Solar, sources^Geothermal, sources^Coal, sources^Wind, sources^Hydro, sources^Nuclear;  | imports^*; direct_use^        , direct_use^       , direct_use^     , direct_use^, direct_use^      , direct_use^    , electricity^*;  | ; uses^Transportation, uses^Industrial, uses^Commercial, uses^Residential, __electricity_rejected_2^*;  | ; energy_services^*, rejected^*;  )