<img width="8%" alt="LinkedIn.png" src="https://raw.githubusercontent.com/jupyter-naas/awesome-notebooks/master/.github/assets/logos/LinkedIn.png" style="border-radius: 15%">

# LinkedIn - Get entity posts

**Tags:** #linkedin #profile #company #post #stats #naas_drivers #content #automation

**Author:** [Florent Ravenel](https://www.linkedin.com/in/florent-ravenel/)

**Description:** This notebook fetches your profile's or company's post data from LinkedIn.


<div class="alert alert-info" role="info" style="margin: 10px">
<b>Disclaimer:</b><br>
This code is in no way affiliated with, authorized, maintained, sponsored or endorsed by Linkedin or any of its affiliates or subsidiaries. It uses an independent and unofficial API. Use at your own risk.

This project violates Linkedin's User Agreement Section 8.2, and because of this, Linkedin may (and will) temporarily or permanently ban your account. We are not responsible for your account being banned.
<br>
</div>

## Input

### Import libraries

In [None]:
from naas_drivers import linkedin
import naas_python
import os
from datetime import date, timedelta, datetime
import pandas as pd
import naas_data_product

### Setup variables
**Inputs**
- `entity_index`: Entity index.
- `entity_dir`: Entity directory.
- `linkedin_url`: LinkedIn URL.
- `li_at`: Cookie used to authenticate Members and API clients.
- `JSESSIONID`: Cookie used for Cross Site Request Forgery (CSRF) protection and URL signature validation.
- `days_start`: Number of day to start from the beginning of the current week.
- `force_update`: Boolean to force update.

**Outputs**
- `output_dir`: Output directory
- `file_name`: Name of the file to be saved in your local.

In [None]:
# Inputs
entity_index = "0"
entity_dir = sm.pload(os.path.join(naas_data_product.OUTPUTS_PATH, "entities", entity_index), "entity_dir")
linkedin_url = sm.pload(os.path.join(naas_data_product.OUTPUTS_PATH, "entities", entity_index), "linkedin_url")
li_at = naas_python.secret.get("li_at").value or naas_python.secret.get("LINKEDIN_LI_AT").value
JSESSIONID = naas_python.secret.get("JSESSIONID").value or naas_python.secret.get("LINKEDIN_JSESSIONID").value
days_start = -7
force_update = False

# Outputs
output_dir = os.path.join(entity_dir, "content-engine", date.today().isoformat())
file_name = "linkedin_posts"

## Model

### Get posts from LinkedIn

In [None]:
def get_posts(
    li_at: str,
    JSESSIONID: str,
    linkedin_url: str,
    days_start: int = -7,
    force_update: bool = False,
):
    # Init
    df = pd.DataFrame()
    date_start = None
    if isinstance(days_start, int):
        date_start = date.today() - timedelta(days=datetime.now(TIMEZONE).weekday() - days_start)
    print(f"⚠️ Limit Date: {date_start}")

    # Get posts
    i = 1
    start = 0
    pagination_token = None
    while True:
        if "/in/" in linkedin_url:
            try:
                # Requests Naas LinkedIn API
                tmp_df = linkedin.connect(li_at, JSESSIONID).profile.get_posts_feed(
                    linkedin_url,
                    pagination_token=pagination_token,
                    limit=1,
                    sleep=False
                )
                title = tmp_df.loc[0, "TITLE"]
                pagination_token = tmp_df.loc[0, "PAGINATION_TOKEN"]
                published_date = tmp_df.loc[0, "PUBLISHED_DATE"]
                post_url = tmp_df.loc[0, "POST_URL"]
                datetime_obj = datetime.strptime(published_date, "%Y-%m-%d %H:%M:%S%z").date()
            except Exception as e:
                print(e)
                if e.response.status_code == 302:
                    raise(e)
        elif "/company/" in linkedin_url:
            try:
                # Requests Naas LinkedIn API
                tmp_df = linkedin.connect(li_at, JSESSIONID).company.get_posts_feed(
                    linkedin_url,
                    start=start,
                    limit=1,
                    sleep=False
                )
                title = tmp_df.loc[0, "TITLE"]
                published_date = tmp_df.loc[0, "PUBLISHED_DATE"]
                post_url = tmp_df.loc[0, "POST_URL"]
                datetime_obj = datetime.strptime(published_date, "%Y-%m-%d %H:%M:%S").date()
            except Exception as e:
                print(e)
                if e.response.status_code == 302:
                    raise(e)
            start += 1
        # Check if published date > date_limit
        if date_start > datetime_obj:
            break
         
        # Concat df
        print(f"{i} - ✅ '{title}' published on {published_date} ({post_url})")
        df = pd.concat([df, tmp_df])
        i += 1 # Count
    return df.reset_index(drop=True)

# Load post from picke file
df_posts = sm.pload(output_dir, file_name)

# Get posts from LinkedIn
if df_posts is None or force_update:
    df_posts = get_posts(
        li_at,
        JSESSIONID,
        linkedin_url,
        days_start=days_start,
        force_update=force_update,
    )
    
print('✍️ Posts:', len(df_posts))
df_posts.head(1)

## Output

### Save data

In [None]:
sm.pdump(output_dir, df_posts, file_name)