<img width="8%" alt="LinkedIn.png" src="https://raw.githubusercontent.com/jupyter-naas/awesome-notebooks/master/.github/assets/logos/LinkedIn.png" style="border-radius: 15%">

# LinkedIn - Rank Direct Conversations by Messages Count
<a href="https://bit.ly/3JyWIk6">Give Feedback</a> | <a href="https://github.com/jupyter-naas/awesome-notebooks/issues/new?assignees=&labels=bug&template=bug_report.md&title=LinkedIn+-+Rank+Direct+Conversations+by+Messages+Count:+Error+short+description">Bug report</a>

**Tags:** #linkedin #messages #analytics #dataframe #growth #sales

**Author:** [Florent Ravenel](https://www.linkedin.com/in/florent-ravenel/)

**Last update:** 2024-06-26 (Created: 2024-06-26)

**Description:** This notebook ranks direct conversations by message count. Download your LinkedIn data, extract all files from Zip and use the messages.csv file as input.

**References:**
- [LinkedIn Download data](https://www.linkedin.com/mypreferences/d/download-my-data)

## Input

### Import libraries

In [None]:
import requests
import pandas as pd
from datetime import datetime

### Setup variables

- `linkedin_url`: Your LinkedIn profile URL
- `file_path`: Path to the messages.csv file
- `limit`: Limit of days to get the conversations

In [None]:
linkedin_url = "https://www.linkedin.com/in/jeremyravenel"
file_path = "messages.csv"
limit = 30

## Model

### Get messages

In [None]:
# Read csv
df_conversations = pd.read_csv(file_path)

# Include only "INBOX"
df_conversations = df_conversations[df_conversations["FOLDER"] == "INBOX"].sort_values(by="DATE", ascending=False).reset_index(drop=True)
print("Messages:", len(df_conversations))
print("Conversations:", len(df_conversations["CONVERSATION ID"].unique()))
df_conversations.head(1)

## Output

### Rank Direct Conversations by Messages Count

In [None]:
def get_messages_counts(df_init, linkedin_url, limit):
    # Init
    public_id = linkedin_url.split("/in/")[1].split("/")[0]
    df = df_init.copy()
    
    # Groupby conversation
    df_output = df.groupby(["CONVERSATION ID"], as_index=False
                          ).agg({"SENDER PROFILE URL": "nunique", "CONTENT": "count"}
                               ).rename(columns={"CONTENT": 'COUNT'}
                                       ).sort_values(by="COUNT", ascending=False
                                                    ).reset_index(drop=True)
    
    # Keep only conversations (not group or one message without answer)
    df_output = df_output[(df_output["SENDER PROFILE URL"] == 2) & (df_output["COUNT"] >= 2)].drop("SENDER PROFILE URL", axis=1).reset_index(drop=True)
    
    # Get details
    df_total = df[df["CONVERSATION ID"].isin(df_output["CONVERSATION ID"].unique())].groupby(["CONVERSATION ID", "SENDER PROFILE URL"], as_index=False).agg({"CONTENT": "count"})
    df_me = df_total[df_total["SENDER PROFILE URL"].str.contains(public_id)].reset_index(drop=True).rename(columns={"CONTENT": 'COUNT_ME'})
    df_them = df_total[~df_total["SENDER PROFILE URL"].str.contains(public_id)].reset_index(drop=True).rename(columns={"CONTENT": 'COUNT_THEM'})
    df_output = pd.merge(df_output, df_me.drop("SENDER PROFILE URL", axis=1), how="left")
    df_output = pd.merge(df_output, df_them, how="left")
    
    # Get last messages
    df_last = df[["CONVERSATION ID", "DATE"]].drop_duplicates("CONVERSATION ID")
    df_last["DATE"] = df_last["DATE"].str[:-4]
    df_last["DAYS_SINCE_LAST_MESSAGE"] = (datetime.now() - pd.to_datetime(df_last["DATE"])).dt.days
    df_output = pd.merge(df_output, df_last, how="left").rename(columns={"DATE": "LAST_MESSAGE_DATE"})
    
    # Filter on limit
    df_output = df_output[df_output["DAYS_SINCE_LAST_MESSAGE"] <= limit]
    return df_output.reset_index(drop=True)

df_messages_stat = get_messages_counts(df_conversations, linkedin_url, limit)
print(f"Conversations over the last {limit} days:", len(df_messages_stat))
df_messages_stat