<img width="8%" alt="Naas.png" src="https://raw.githubusercontent.com/jupyter-naas/awesome-notebooks/master/.github/assets/logos/Naas.png" style="border-radius: 15%">

# Growth Pipeline

**Tags:** #naas #pipeline #jupyter #notebook #dataanalysis #workflow #streamline

**Author:** [Florent Ravenel](https://www.linkedin.com/in/florent-ravenel)

**Description:** This notebook creates a notebooks pipeline.

## Input

### Import libraries

In [None]:
from naas.pipeline import (
    Pipeline,
    NotebookStep,
    End,
    ParallelStep,
    DummyStep
)
from datetime import date
import os
import glob
import naas
import naas_data_product

### Setup variables

In [None]:
# Storage
datalake_dir = naas.secret.get("ABI_DATALAKE_DIR")
api_key = naas.secret.get('NAAS_API_TOKEN')

# Entity
entity_index = "0"
entity_name = pload(os.path.join(datalake_dir, "entities", entity_index), "entity_name") or ""
emails = pload(os.path.join(datalake_dir, "entities", entity_index), "emails") or []
linkedin_url = pload(os.path.join(datalake_dir, "entities", entity_index), "linkedin_url") or ""
li_at = naas.secret.get("LINKEDIN_LI_AT")
JSESSIONID = naas.secret.get("LINKEDIN_JSESSIONID")
entity_dir = pload(os.path.join(datalake_dir, "entities", entity_index), "entity_dir") or ""

# Google Sheets
spreadsheet_url = pload(os.path.join(datalake_dir, "entities", entity_index), "abi_spreadsheet") or ""
sheet_posts = "POSTS"
sheet_interaction = "INTERACTIONS"
sheet_growth = "GROWTH"
sheet_people = "PEOPLE"
sheet_people_organizations = "ORGANIZATIONS"
sheet_contacts = "CONTACTS"

# Engine
engine_name = "growth-engine"
assistant_name = "Growth Assistant"
custom_pipeline_path = os.path.join(naas_data_product.MODELS_PATH, engine_name, "custom", "__pipeline__.ipynb")

## Model

### Set environ

In [None]:
os.environ["NAAS_API_TOKEN"] = api_key
os.environ["LINKEDIN_LI_AT"] = li_at
os.environ["LINKEDIN_JSESSIONID"] = JSESSIONID

### Create output directory by date

In [None]:
engine_dir = os.path.join(entity_dir, engine_name, date.today().isoformat())
print("✅ Output directory:", engine_dir)
pipeline_dir = os.path.join(engine_dir, "pipeline_executions") if naas.is_production() else os.path.join(naas_data_product.MODELS_PATH, engine_name, "pipeline_executions")
print("✅ Pipeline directory:", pipeline_dir)

### Setup notebooks
This section is made to declare all the notebooks and create unique IDs that will be used in the pipeline. 

In [None]:
linkedin = NotebookStep(
    name="LinkedIn",
    notebook_path=os.path.join(naas_data_product.MODELS_PATH, engine_name, "core", "domain", "LinkedIn_Get_interactions_from_posts.ipynb"),
    parameters={
        "entity_name": entity_name,
        "spreadsheet_url": spreadsheet_url,
        "sheet_posts": sheet_posts,
        "li_at": li_at,
        "JSESSIONID": JSESSIONID,
        "output_dir": engine_dir
    }
)
interactions = NotebookStep(
    name="Interactions",
    notebook_path=os.path.join(naas_data_product.MODELS_PATH, engine_name, "core", "domain", "Google_Sheets_Update_interactions_db.ipynb"),
    parameters={
        "entity_dir": entity_dir,
        "input_dir": engine_dir,
        "output_dir": engine_dir,
        "spreadsheet_url": spreadsheet_url,
        "sheet_interaction": sheet_interaction,
    }
)
growth = NotebookStep(
    name="Growth",
    notebook_path=os.path.join(naas_data_product.MODELS_PATH, engine_name, "core", "domain", "Google_Sheets_Update_growth_db.ipynb"),
    parameters={
        "input_dir": engine_dir,
        "output_dir": engine_dir,
        "spreadsheet_url": spreadsheet_url,
        "sheet_growth": sheet_growth,
    }
)
people = NotebookStep(
    name="People",
    notebook_path=os.path.join(naas_data_product.MODELS_PATH, engine_name, "core", "domain", "Google_Sheets_Update_people_db.ipynb"),
    parameters={
        "input_dir": engine_dir,
        "api_key": api_key,
        "li_at": li_at,
        "JSESSIONID": JSESSIONID,
        "spreadsheet_url": spreadsheet_url,
        "sheet_people": sheet_people,
        "output_dir": engine_dir,
        "datalake_dir": datalake_dir,
    }
)
organizations = NotebookStep(
    name="Organizations",
    notebook_path=os.path.join(naas_data_product.MODELS_PATH, engine_name, "core", "domain", "Google_Sheets_Update_organizations_db.ipynb"),
    parameters={
        "input_dir": engine_dir,
        "api_key": api_key,
        "li_at": li_at,
        "JSESSIONID": JSESSIONID,
        "spreadsheet_url": spreadsheet_url,
        "sheet_people_organizations": sheet_people_organizations,
        "sheet_people": sheet_people,
        "output_dir": engine_dir,
        "datalake_dir": datalake_dir,
    }
)
contacts = NotebookStep(
    name="Contacts",
    notebook_path=os.path.join(naas_data_product.MODELS_PATH, engine_name, "core", "domain", "Google_Sheets_Update_contacts_view.ipynb"),
    parameters={
        "input_dir": engine_dir,
        "api_key": api_key,
        "spreadsheet_url": spreadsheet_url,
        "sheet_contacts": sheet_contacts,
        "entity_name": entity_name,
        "linkedin_url": linkedin_url,
        "datalake_dir": datalake_dir,
        "output_dir": engine_dir,
    }
)
analytics = NotebookStep(
    name="Analytics",
    notebook_path=os.path.join(naas_data_product.MODELS_PATH, engine_name, "core", "domain", "Plotly_Follow_leads_by_cohorts.ipynb"),
    parameters={
        "input_dir": engine_dir,
        "entity_name": entity_name,
        "spreadsheet_url": spreadsheet_url,
        "sheet_name": sheet_growth,
        "linkedin_url": linkedin_url,
        "output_dir": engine_dir,
    }
)
plugin = NotebookStep(
    name="Plugin",
    notebook_path=os.path.join(naas_data_product.MODELS_PATH, engine_name, "__plugin__.ipynb"),
    parameters={
        "input_dir": engine_dir,
        "spreadsheet_url": spreadsheet_url,
        "sheet_name": sheet_growth,
        "name": assistant_name,
        "entity_name": entity_name,
        "output_dir": os.path.join(entity_dir, "plugins")
    }
)
email = NotebookStep(
    name="Notification",
    notebook_path=os.path.join(naas_data_product.MODELS_PATH, engine_name, "core", "domain", "Naas_Send_leads_generation_notification.ipynb"),
    parameters={
        "entity_dir": entity_dir,
        "entity_name": entity_name,
        "input_dir": engine_dir,
        "spreadsheet_url": spreadsheet_url,
        "sheet_name": sheet_growth,
        "output_dir": engine_dir,
        "email_to": emails,
    }
)

if os.path.exists(custom_pipeline_path):
    custom = NotebookStep(
        name="Custom",
        notebook_path=os.path.join(naas_data_product.MODELS_PATH, engine_name, "custom", "__pipeline__.ipynb"),
        parameters={
            "datalake_dir": datalake_dir,
            "api_key": api_key,
            "entity_name": entity_name,
            "entity_dir": entity_dir,
            "spreadsheet_url": spreadsheet_url,
        }
    )
else:
    custom = DummyStep("Custom")

## Output

### Run pipeline

In [None]:
pipeline = Pipeline()

pipeline >> linkedin >> interactions >> growth >> people >> organizations >> contacts >> analytics >> plugin >> email >> custom >> End()

pipeline.run(outputs_path=pipeline_dir)