## Set Spark Configuration

In [None]:
%%configure
{
    // You can get a list of valid parameters to config the session from https://github.com/cloudera/livy#request-body.
    "driverMemory":
    {
        "activityParameterName": "driverMemoryParameter", 
        "defaultValue": "28g"
    }, // Recommended values: ["28g", "56g", "112g", "224g", "400g", "472g"]
    "driverCores":{
        "activityParameterName": "driverCoresParameter", 
        "defaultValue": 4
    }, // Recommended values: [4, 8, 16, 32, 64, 80]
    "executorMemory":{
        "activityParameterName": "executorMemoryParameter", 
        "defaultValue": "28g" 
    },
    "executorCores":{
        "activityParameterName": "executorCoresParameter", 
        "defaultValue": 4
    },
    "numExecutors":{
        "activityParameterName": "numExecutorsParameter", 
        "defaultValue": 1
    }
}

## Creating Global Views 

In [None]:
# Placeholder for creating global views so they can be accessed by all notebooks

# Calling the framework

In [12]:
from geniepackage import GenieProcessExecution

# Parameters

The following parameters can be passed through a Synapse/ADF pipeline
- state -> Denotes whether the framework should start from the beginning or restart from where it failed. Takes either of the two values - "start" and "restart" \<String>
- pipeline -> Name of the pipeline to be run in the framework. Case-sensitive and should be in the metadata table. \<String>

In [13]:
state = "start"
pipeline = "demo1_pipeline"

# --NOT Required
# triggerType and runID are variables set by the pipeline. No need to pass any parameter from the user side 
triggerType = ""
runID = ""

- threads -> The maximum number of notebooks that can be run together in a pipeline or sub-pipeline. By default value is set to number of executor cores available. \<integer>


In [14]:
# By default threads is set equal to the number of executor cores. "threads" variable can be tuned to a particular number depending on requirements
import os
threads = (sc._jsc.sc().getExecutorMemoryStatus().keySet().size()-1)*os.cpu_count() 

## Instantiating and Executing the framework

In [16]:
genie_obj = GenieProcessExecution.Genie(spark,mssparkutils.notebook,threads) #Creating the object of framework

In [22]:
genie_obj.execute(state,pipeline)

### View Metadata

In [19]:
%%sql
select * from genie.metadata where pipeline like '%demo1%' or pipeline like '%demo2%'

### Visualizing the Graph

In [6]:
# if not present in requirements.yml file for pool, run the following commands, otherwise proceed to the next cell
# conda install -y graphviz 
# pip install pydot
# pip install graphviz

In [20]:
# import graphviz
# import pydot
# from IPython.display import display

# obj.visualizeGraph(pipeline)

In [None]:
mssparkutils.notebook.exit()