<img width="8%" alt="LinkedIn.png" src="https://raw.githubusercontent.com/jupyter-naas/awesome-notebooks/master/.github/assets/logos/LinkedIn.png" style="border-radius: 15%">

# LinkedIn - Get entity posts stats

**Tags:** #linkedin #profile #company #post #stats #naas_drivers #content #automation #picke

**Author:** [Florent Ravenel](https://www.linkedin.com/in/florent-ravenel/)

**Description:** This notebook fetches your profile's or company's post statistics from LinkedIn and stores them in a pickle file.


<div class="alert alert-info" role="info" style="margin: 10px">
<b>Disclaimer:</b><br>
This code is in no way affiliated with, authorized, maintained, sponsored or endorsed by Linkedin or any of its affiliates or subsidiaries. It uses an independent and unofficial API. Use at your own risk.

This project violates Linkedin's User Agreement Section 8.2, and because of this, Linkedin may (and will) temporarily or permanently ban your account. We are not responsible for your account being banned.
<br>
</div>

## Input

### Import libraries

In [None]:
from naas_drivers import linkedin
import naas
import os
from datetime import date, timedelta, datetime
import pandas as pd
import naas_data_product

### Setup variables
**Inputs**
- `entity_dir`: This variable represents the entity directory.
- `linkedin_url`: This variable represents the LinkedIn profile URL.
- `li_at`: Cookie used to authenticate Members and API clients.
- `JSESSIONID`: Cookie used for Cross Site Request Forgery (CSRF) protection and URL signature validation.
- `limit`: Date limit.
- `force_update`: Boolean to force update.

**Outputs**
- `output_dir`: Output directory
- `file_name`: Name of the file to be saved in your local.

In [None]:
# Inputs
entity_dir = pload(os.path.join(naas_data_product.OUTPUTS_PATH, "entities", "0"), "entity_dir")
linkedin_url = pload(os.path.join(naas_data_product.OUTPUTS_PATH, "entities", "0"), "linkedin_url")
li_at = os.environ.get("LINKEDIN_LI_AT") or naas.secret.get("LINKEDIN_LI_AT")
JSESSIONID = os.environ.get("LINKEDIN_JSESSIONID") or naas.secret.get("LINKEDIN_JSESSIONID")
limit = date.today() - timedelta(days=datetime.now(TIMEZONE).weekday() + 7)
force_update = False

# Outputs
output_dir = os.path.join(entity_dir, "content-engine", date.today().isoformat())
file_name = "linkedin_posts"

## Model

### Get posts from LinkedIn

In [None]:
def get_posts(
    li_at,
    JSESSIONID,
    linkedin_url,
    limit=None,
    force_update=False,
):
    # Init
    df = pd.DataFrame()
    
    # Get posts
    i = 1
    start = 0
    pagination_token = None
    while True:
        if "/in/" in linkedin_url:
            # Requests from LinkedIn API
            tmp_df = linkedin.connect(li_at, JSESSIONID).profile.get_posts_feed(
                linkedin_url,
                pagination_token=pagination_token,
                limit=1,
                sleep=False
            )
            title = tmp_df.loc[0, "TITLE"]
            pagination_token = tmp_df.loc[0, "PAGINATION_TOKEN"]
            published_date = tmp_df.loc[0, "PUBLISHED_DATE"]
            post_url = tmp_df.loc[0, "POST_URL"]
            # Check if published date > limit
            datetime_obj = datetime.strptime(published_date, "%Y-%m-%d %H:%M:%S%z").date()
        elif "/company/" in linkedin_url:
            try:
                # Requests from LinkedIn API
                tmp_df = linkedin.connect(li_at, JSESSIONID).company.get_posts_feed(
                    linkedin_url,
                    start=start,
                    limit=1,
                    sleep=False
                )
                title = tmp_df.loc[0, "TITLE"]
                published_date = tmp_df.loc[0, "PUBLISHED_DATE"]
                post_url = tmp_df.loc[0, "POST_URL"]
                # Check if published date > limit
                datetime_obj = datetime.strptime(published_date, "%Y-%m-%d %H:%M:%S").date()
            except Exception as e:
                print(e)
            start += 1
        if limit > datetime_obj:
            break
         
        # Concat df
        print(f"{i} - ✅ '{title}' published on {published_date} ({post_url})")
        df = pd.concat([df, tmp_df])
        i += 1 # Count
    return df.reset_index(drop=True)

# Load post from picke file
df_posts = pload(output_dir, file_name)

# Get posts from LinkedIn
if df_posts is None or (force_update or naas.is_production()):
    df_posts = get_posts(
        li_at,
        JSESSIONID,
        linkedin_url,
        limit=limit,
        force_update=force_update,
    )
    
print('✍️ Posts:', len(df_posts))
df_posts.head(1)

## Output

### Save data

In [None]:
pdump(output_dir, df_posts, file_name)