<img width="8%" alt="Naas.png" src="https://raw.githubusercontent.com/jupyter-naas/awesome-notebooks/master/.github/assets/logos/Naas.png" style="border-radius: 15%">

# Pipeline

**Tags:** #naas #pipeline #jupyter #notebook #dataanalysis #workflow #streamline

**Author:** [Florent Ravenel](https://www.linkedin.com/in/florent-ravenel)

**Description:** This notebook creates a notebooks pipeline.

## Input

### Import libraries

In [None]:
from naas.pipeline import (
    Pipeline,
    DummyStep,
    DummyErrorStep,
    NotebookStep,
    End,
    ParallelStep,
)
from datetime import date
import os

### Setup variables

In [None]:
# Datalake
datalake_dir = os.path.join("/", "home", "ftp", "abi", "outputs")

# LinkedIn
li_at = naas.secret.get("LINKEDIN_LI_AT") or "YOUR_LINKEDIN_LI_AT" #example: AQFAzQN_PLPR4wAAAXc-FCKmgiMit5FLdY1af3-2
JSESSIONID = naas.secret.get("LINKEDIN_JSESSIONID") or "YOUR_LINKEDIN_JSESSIONID" #example: ajax:8379907400220387585
linkedin_url = "https://www.linkedin.com/in/jeremyravenel/"  # EXAMPLE "https://www.linkedin.com/in/XXXXXX/"

# Google Sheets
spreadsheet_url = "https://docs.google.com/spreadsheets/d/1GDVJxBajYx7VR48yKGnSvMsEjzXvqI94hlmPXI5qEp8/edit#gid=0"
sheet_name = "CONTENT"

# Notification
email_to = "jeremy@naas.ai"

# Scheduler
cron = "0 8 * * *"

## Model

### Create output directory by date

In [None]:
output_dir = os.path.join(datalake_dir, "content-engine", date.today().isoformat())
print("✅ Output directory:", datalake_dir)

### Setup notebooks
This section is made to declare all the notebooks and create unique IDs that will be used in the pipeline. 

In [None]:
extraction = NotebookStep(
    name="Get LinkedIn posts",
    notebook_path="pipeline_templates/LinkedIn_Get_profile_posts_stats.ipynb",
    parameters={
        "li_at": li_at,
        "JSESSIONID": JSESSIONID,
        "linkedin_url": linkedin_url,
        "output_dir": output_dir
    }
)
cleaning = DummyStep(
    "Send data to Google Sheets"
)
analytics = DummyStep(
    "Create analytics"
)  
distribution1 = DummyStep(
    "Send Email notification"
)
distribution2 = DummyStep(
    "Generate plugin"
)

## Output

### Run pipeline

In [None]:
pipeline = Pipeline()

pipeline >> extraction >> cleaning >> analytics >> [distribution1, distribution2] >> End()

pipeline.run(outputs_path=os.path.join(output_dir, "pipeline_executions"))

### Add scheduler

In [None]:
naas.scheduler.add(cron=cron)

# Uncomment the line below to delete your automation
# naas.scheduler.delete()