<img width="8%" alt="Naas.png" src="https://raw.githubusercontent.com/jupyter-naas/awesome-notebooks/master/.github/assets/logos/Naas.png" style="border-radius: 15%">

# Pipeline

**Tags:** #naas #pipeline #jupyter #notebook #dataanalysis #workflow #streamline

**Author:** [Florent Ravenel](https://www.linkedin.com/in/florent-ravenel)

**Description:** This notebook creates a notebooks pipeline.

## Input

### Import libraries

In [None]:
from naas.pipeline import (
    Pipeline,
    NotebookStep,
    End,
    ParallelStep,
    DummyStep
)
from datetime import date
import os
import glob
import naas
import naas_data_product

### Setup variables

In [None]:
# Storage
datalake_dir = os.path.join("/", "home", "ftp", "abi", "outputs")

# Entity
entity_index = "0"
entity_name = pload(os.path.join(datalake_dir, "entities", entity_index), "entity_name") or ""
emails = pload(os.path.join(datalake_dir, "entities", entity_index), "emails") or []
linkedin_url = pload(os.path.join(datalake_dir, "entities", entity_index), "linkedin_url") or ""
li_at = naas.secret.get("LINKEDIN_LI_AT")
JSESSIONID = naas.secret.get("LINKEDIN_JSESSIONID")
entity_dir = pload(os.path.join(datalake_dir, "entities", entity_index), "entity_dir") or ""

# Google Sheets
spreadsheet_url = pload(os.path.join(datalake_dir, "entities", entity_index), "abi_spreadsheet") or ""
sheet_content = "CONTENT"

# Engine
engine_name = "content-engine"
assistant_name = "📲 Content Assistant"
custom_pipeline_path = os.path.join(naas_data_product.MODELS_PATH, engine_name, "custom", "__pipeline__.ipynb")

## Model

### Create output directory by date

In [None]:
engine_dir = os.path.join(entity_dir, engine_name, date.today().isoformat())
print("✅ Output directory:", engine_dir)

### Setup notebooks
This section is made to declare all the notebooks and create unique IDs that will be used in the pipeline. 

In [None]:
extraction = NotebookStep(
    name="LinkedIn",
    notebook_path=os.path.join(naas_data_product.MODELS_PATH, engine_name, "core", "domain", "LinkedIn_Get_entity_posts_stats.ipynb"),
    parameters={
        "li_at": li_at,
        "JSESSIONID": JSESSIONID,
        "linkedin_url": linkedin_url,
        "output_dir": engine_dir
    }
)
cleaning = NotebookStep(
    name="Content",
    notebook_path=os.path.join(naas_data_product.MODELS_PATH, engine_name, "core", "domain", "Google_Sheets_Send_content_db_to_spreadsheet.ipynb"),
    parameters={
        "input_dir": engine_dir,
        "entity_name": entity_name,
        "spreadsheet_url": spreadsheet_url,
        "sheet_name": sheet_content,
        "output_dir": engine_dir,
    }
)
analytics = NotebookStep(
    name="Analytics",
    notebook_path=os.path.join(naas_data_product.MODELS_PATH, engine_name, "core", "domain", "Plotly_Create_analytics.ipynb"),
    parameters={
        "input_dir": engine_dir,
        "entity_name": entity_name,
        "spreadsheet_url": spreadsheet_url,
        "sheet_name": sheet_content,
        "linkedin_url": linkedin_url,
        "output_dir": engine_dir,
    }
)  
plugin = NotebookStep(
    name="Plugin",
    notebook_path=os.path.join(naas_data_product.MODELS_PATH, engine_name, "__plugin__.ipynb"),
    parameters={
        "input_dir": engine_dir,
        "spreadsheet_url": spreadsheet_url,
        "sheet_name": sheet_content,
        "name": assistant_name,
        "entity_name": entity_name,
    }
)
email = NotebookStep(
    name="Notification",
    notebook_path=os.path.join(naas_data_product.MODELS_PATH, engine_name, "core", "domain", "Naas_Send_notification.ipynb"),
    parameters={
        "input_dir": engine_dir,
        "spreadsheet_url": spreadsheet_url,
        "sheet_name": sheet_content,
        "datalake_dir": datalake_dir,
        "entity_name": entity_name,
        "email_to": emails,
    }
)

if os.path.exists(custom_pipeline_path):
    custom = NotebookStep(
        name="Custom",
        notebook_path=os.path.join(naas_data_product.MODELS_PATH, engine_name, "custom", "__pipeline__.ipynb"),
        parameters={}
    )
else:
    custom = DummyStep("Custom")

## Output

### Run pipeline

In [None]:
pipeline = Pipeline()

pipeline >> extraction >> cleaning >> analytics >> plugin >> email >> custom >> End()

pipeline.run(outputs_path=os.path.join(engine_dir, "pipeline_executions"))