<img width="8%" alt="Naas.png" src="https://raw.githubusercontent.com/jupyter-naas/awesome-notebooks/master/.github/assets/logos/Naas.png" style="border-radius: 15%">

# Pipeline

**Tags:** #naas #pipeline #jupyter #notebook #dataanalysis #workflow #streamline

**Author:** [Florent Ravenel](https://www.linkedin.com/in/florent-ravenel)

**Description:** This notebook creates a notebooks pipeline.

## Input

### Import libraries

In [1]:
from naas.pipeline import (
    Pipeline,
    DummyStep,
    DummyErrorStep,
    NotebookStep,
    End,
    ParallelStep,
)
from datetime import date
import os
import glob
import naas
import naas_data_product

✅ utils file '/home/ftp/abi/utils/data.ipynb' successfully loaded.
✅ utils file '/home/ftp/abi/utils/naas_chat_plugin.ipynb' successfully loaded.
✅ utils file '/home/ftp/abi/utils/naas_lab.ipynb' successfully loaded.
✅ utils file '/home/ftp/abi/utils/openai.ipynb' successfully loaded.


### Setup variables

In [2]:
# Datalake
datalake_dir = os.path.join("/", "home", "ftp", "abi", "outputs")

# Google Sheets
spreadsheet_url = naas.secret.get("ABI_SPREADSHEET") or "YOUR_GOOGLE_SPREADSHEET_URL"
sheet_content = "CONTENT"
sheet_interaction = "INTERACTIONS"
sheet_growth = "GROWTH"

# LinkedIn
li_at = naas.secret.get("LINKEDIN_LI_AT") or "YOUR_LINKEDIN_LI_AT" #example: AQFAzQN_PLPR4wAAAXc-FCKmgiMit5FLdY1af3-2
JSESSIONID = naas.secret.get("LINKEDIN_JSESSIONID") or "YOUR_LINKEDIN_JSESSIONID" #example: ajax:8379907400220387585
linkedin_url = "https://www.linkedin.com/in/jeremyravenel/"  # EXAMPLE "https://www.linkedin.com/in/XXXXXX/"

# Notification
email_to = ["jeremy@naas.ai", "florent@naas.ai"]

# Scheduler
cron = "30 8 * * *"

## Model

### Create output directory by date

In [3]:
output_dir = os.path.join(datalake_dir, "growth-engine", date.today().isoformat())
print("✅ Output directory:", datalake_dir)

✅ Output directory: /home/ftp/abi/outputs


### Setup notebooks
This section is made to declare all the notebooks and create unique IDs that will be used in the pipeline. 

In [4]:
extraction = NotebookStep(
    name="Get LinkedIn interactions",
    notebook_path="pipeline_templates/LinkedIn_Get_interactions_from_posts.ipynb",
    parameters={
        "spreadsheet_url": spreadsheet_url,
        "sheet_name": sheet_content,
        "li_at": li_at,
        "JSESSIONID": JSESSIONID,
        "output_dir": output_dir
    }
)
cleaning1 = NotebookStep(
    name="Update BDD Interactions",
    notebook_path="pipeline_templates/Google_Sheets_Update_interactions_db.ipynb",
    parameters={
        "input_dir": output_dir,
        "output_dir": output_dir,
        "spreadsheet_url": spreadsheet_url,
        "sheet_name": sheet_interaction,
    }
)
cleaning2 = NotebookStep(
    name="Update BDD Growth",
    notebook_path="pipeline_templates/Google_Sheets_Update_growth_db.ipynb",
    parameters={
        "input_dir": output_dir,
        "output_dir": output_dir,
        "spreadsheet_url": spreadsheet_url,
        "sheet_name": sheet_growth,
    }
)
analytics = NotebookStep(
    name="Create analytics",
    notebook_path="pipeline_templates/Plotly_Create_analytics.ipynb",
    parameters={
        "input_dir": output_dir,
        "output_dir": output_dir,
        "linkedin_url": linkedin_url,
    }
)
plugin = NotebookStep(
    name="Generate plugin",
    notebook_path="__plugin__.ipynb",
    parameters={
        "input_dir": output_dir,        
    }
)
email = NotebookStep(
    name="Send Email notification",
    notebook_path="pipeline_templates/Naas_Send_notification.ipynb",
    parameters={
        "input_dir": output_dir,
        "datalake_dir": datalake_dir,
        "email_to": email_to,
    }
)

## Output

### Run pipeline

In [None]:
pipeline = Pipeline()

pipeline >> extraction >> cleaning1 >> cleaning2 >> analytics >> plugin >> email >> End()

pipeline.run(outputs_path=os.path.join(output_dir, "pipeline_executions"))

### Add dependencies

In [None]:
pipeline_templates = glob.glob("pipeline_templates/*.ipynb")
utils_files = glob.glob(f"{naas_data_product.UTILS_PATH}/*.ipynb")
files = pipeline_templates + utils_files + ["__plugin__.ipynb"]

for file in files: 
    naas.dependency.add(file)

### Add scheduler

In [None]:
naas.scheduler.add(cron=cron)

# Uncomment the line below to delete your automation
# naas.scheduler.delete()