<img width="8%" alt="Content" src="https://naasai-public.s3.eu-west-3.amazonaws.com/abi-demo/content_creation.png" style="border-radius: 15%">

# Content Pipeline

**Tags:** #naas #pipeline #jupyter #notebook #dataanalysis #workflow #streamline

**Author:** [Florent Ravenel](https://www.linkedin.com/in/florent-ravenel)

**Description:** This notebook creates a notebooks pipeline.

## Input

### Import libraries

In [None]:
from datetime import date
import os
import glob
import naas_data_product
import naas_python

### Setup variables

In [None]:
# Storage
datalake_dir = naas_python.secret.get("ABI_DATALAKE_DIR").value
api_key = naas_python.secret.get('NAAS_API_TOKEN').value

# Entity
entity_index = "0"
entity_name = pload(os.path.join(datalake_dir, "entities", entity_index), "entity_name") or ""
emails = pload(os.path.join(datalake_dir, "entities", entity_index), "emails") or []
linkedin_url = pload(os.path.join(datalake_dir, "entities", entity_index), "linkedin_url") or ""
li_at = naas_python.secret.get("li_at").value or naas_python.secret.get("LINKEDIN_LI_AT").value
JSESSIONID = naas_python.secret.get("JSESSIONID").value or naas_python.secret.get("LINKEDIN_JSESSIONID").value
entity_dir = pload(os.path.join(datalake_dir, "entities", entity_index), "entity_dir") or ""

# Google Sheets
spreadsheet_url = pload(os.path.join(datalake_dir, "entities", entity_index), "abi_spreadsheet") or ""
sheet_posts = "POSTS"

# Engine
engine_name = "content-engine"
assistant_name = "Content Assistant"
days_start = -7
custom_pipeline_path = os.path.join(naas_data_product.MODELS_PATH, engine_name, "custom", "__pipeline__.ipynb")

## Model

### Set environ

In [None]:
os.environ["NAAS_API_TOKEN"] = api_key
os.environ["LINKEDIN_LI_AT"] = li_at
os.environ["LINKEDIN_JSESSIONID"] = JSESSIONID

### Create output directory by date

In [None]:
engine_dir = os.path.join(entity_dir, engine_name, date.today().isoformat())
print("✅ Output directory:", engine_dir)
pipeline_dir = os.path.join(engine_dir, "pipeline_executions")
print("✅ Pipeline directory:", pipeline_dir)

### Setup notebooks
This section is made to declare all the notebooks and create unique IDs that will be used in the pipeline. 

In [None]:
extraction = NotebookStep(
    name="LinkedIn",
    notebook_path=os.path.join(naas_data_product.MODELS_PATH, engine_name, "core", "domain", "LinkedIn_Get_entity_posts.ipynb"),
    parameters={
        "li_at": li_at,
        "JSESSIONID": JSESSIONID,
        "linkedin_url": linkedin_url,
        "days_start": days_start,
        "output_dir": engine_dir
    }
)
cleaning = NotebookStep(
    name="Content",
    notebook_path=os.path.join(naas_data_product.MODELS_PATH, engine_name, "core", "domain", "Content_Create_Posts_database.ipynb"),
    parameters={
        "input_dir": engine_dir,
        "entity_dir": entity_dir,
        "entity_name": entity_name,
        "spreadsheet_url": spreadsheet_url,
        "sheet_name": sheet_posts,
        "output_dir": engine_dir,
    }
)
analytics = NotebookStep(
    name="Analytics",
    notebook_path=os.path.join(naas_data_product.MODELS_PATH, engine_name, "core", "domain", "Plotly_Follow_reach_by_day.ipynb"),
    parameters={
        "input_dir": engine_dir,
        "entity_name": entity_name,
        "spreadsheet_url": spreadsheet_url,
        "sheet_name": sheet_posts,
        "linkedin_url": linkedin_url,
        "output_dir": engine_dir,
    }
)  
plugin = NotebookStep(
    name="Plugin",
    notebook_path=os.path.join(naas_data_product.MODELS_PATH, engine_name, "__plugin__.ipynb"),
    parameters={
        "input_dir": engine_dir,
        "spreadsheet_url": spreadsheet_url,
        "sheet_name": sheet_posts,
        "name": assistant_name,
        "entity_name": entity_name,
        "output_dir": os.path.join(entity_dir, "plugins")
    }
)
email = NotebookStep(
    name="Notification",
    notebook_path=os.path.join(naas_data_product.MODELS_PATH, engine_name, "core", "domain", "Naas_Send_content_performance_notification.ipynb"),
    parameters={
        "entity_dir": entity_dir,
        "entity_name": entity_name,
        "input_dir": engine_dir,
        "spreadsheet_url": spreadsheet_url,
        "sheet_name": sheet_posts,
        "output_dir": engine_dir,
        "email_to": emails,
    }
)

if os.path.exists(custom_pipeline_path):
    custom = NotebookStep(
        name="Custom",
        notebook_path=os.path.join(naas_data_product.MODELS_PATH, engine_name, "custom", "__pipeline__.ipynb"),
        parameters={
            "datalake_dir": datalake_dir,
            "api_key": api_key,
            "entity_name": entity_name,
            "entity_dir": entity_dir,
            "spreadsheet_url": spreadsheet_url,
        }
    )
else:
    custom = DummyStep("Custom")

## Output

### Run pipeline

In [None]:
pipeline = Pipeline()

pipeline >> extraction >> cleaning >> analytics >> plugin >> email >> custom >> End()

pipeline.run(outputs_path=pipeline_dir)