In [47]:
# With this code you should be able to retrieve data from any company page on LinkedIn
# Remember to set up a date range and edit your login information when sending your request to the API
# You can expect to download a .xlsx file with: post id, post URL, description, likes, comments and shares
# Feel free to clone it and use as you wish

In [48]:
# libraries
# !pip install scrape linkedin-api openpyxl

# importing

import scrape as scr
import pandas as pd
import csv
import openpyxl
import json
from linkedin_api import Linkedin 

In [49]:
# your login information, page you want to scrape and date range

login_lkd = "xxxxx" 
password_lkd = "xxxxx"  
company_user = "nike"  # company username 
since = "2024-01-01"
include = ["urn", "time", "impressions", "reactions", "comments"]

In [None]:
# authenticate using your linkedIn information

api = Linkedin(login_lkd, password_lkd)

# fetch profile

profile = api.get_company(company_user)

# print profile details so you can find the company's id

print(profile)


In [53]:
# you can find the company's ID above, it's the number sequence in the end of the first URL
company_id = '2029'  

# fetch company posts (limited to 50 posts, more than that may cause ChunkedEncodingError)
def fetch_company_posts(api, company_id):
    posts = api.get_company_updates(company_id, max_results=50)
    return posts

In [54]:
# process and extract posts information
def process_posts(posts):
    processed_posts = []
    for post in posts:
        post_activity = post.get('id')
        permalink = post.get('permalink')
        
        # company name
        company_user = post.get('value', {}).get('com.linkedin.voyager.feed.render.UpdateV2', {}).get('actor', {}).get('name', 'N/A')
        
        # post text
        post_text = post.get('value', {}).get('com.linkedin.voyager.feed.render.UpdateV2', {}).get('commentary', {}).get('text', {}).get('text', 'N/A')
        
        # engagement metrics
        total_shares = post.get('value', {}).get('com.linkedin.voyager.feed.render.UpdateV2', {}).get('socialDetail', {}).get('totalShares', 0)
        num_comments = post.get('value', {}).get('com.linkedin.voyager.feed.render.UpdateV2', {}).get('socialDetail', {}).get('totalSocialActivityCounts', {}).get('numComments', 0)
        num_likes = post.get('value', {}).get('com.linkedin.voyager.feed.render.UpdateV2', {}).get('socialDetail', {}).get('totalSocialActivityCounts', {}).get('numLikes', 0)

        # store the data as the list
        processed_posts.append({
            'id': post_activity,
            'url': permalink,
            'company_name': company_user,
            'text': post_text,
            'shares': total_shares,
            'comments': num_comments,
            'likes': num_likes
        })

    return processed_posts


In [55]:
# saving data processed as JSON
def save_posts_to_json(posts, filename='linkedin_posts.json'):
    with open(filename, 'w', encoding='utf-8') as f:
        json.dump(posts, f, ensure_ascii=False, indent=4)

# converting JSON to CSV
def json_to_csv(data, csv_file):
    with open(csv_file, 'w', newline='', encoding='utf-8') as f:
        writer = csv.DictWriter(f, fieldnames=[
            'id', 'company_name', 'url', 'text',
            'shares', 'comments', 'likes'
        ])
        writer.writeheader()
        writer.writerows(data)

# now CSV to XLSX
def csv_to_xlsx(csv_file, xlsx_file):
    df = pd.read_csv(csv_file)
    df.to_excel(xlsx_file, index=False, engine='openpyxl')

In [56]:
# now, search, process and save posts
def fetch_process_and_save_posts(api, company_id):
    posts = fetch_company_posts(api, company_id)  
    processed_posts = process_posts(posts)  
    
    save_posts_to_json(processed_posts)  
    json_to_csv(processed_posts, 'linkedin_posts.csv')  
    csv_to_xlsx('linkedin_posts.csv', 'linkedin_posts.xlsx')  

fetch_process_and_save_posts(api, company_id)