### Messenger Analysis Tools
This is a notebook to provide some analysis tools on facebook messenger data. As long as all the variables are defined in a .env file (using .env.example as a template) and you have installed the requirements.txt file, you should be good to go. 


In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from dotenv import load_dotenv

load_dotenv()

from message_analysis_tools import (
    build_message_df,
    get_message_counts,
    get_response_times_from_me,
    get_response_times_to_me,
    get_ghost_pct,
    get_comedian_rankings,
    get_professor_rankings
)

### Measure relationship strength from messages.
- We are going to start with a simple score of total number of messages between participants
- We will visualize "message imbalance" in conversations
- We will see how relationship strength varies over time 

In [None]:
## Switch hide_names to False to see individual's names
message_df = build_message_df(include_group_chats=False, hide_names=False)
daily_message_counts = get_message_counts(message_df)

In [None]:
## Who are the top 10 people I talked to in the last year?
date_last_year = (pd.Timestamp.now() - pd.Timedelta(days=365)).date()
last_year_message_counts = daily_message_counts[daily_message_counts.date >= date_last_year].groupby('conversation_name').total.sum()
top_message_counts = last_year_message_counts.sort_values(ascending=False).head(10)
display(top_message_counts)

In [None]:
## Over the whole history, what is the message imbalance of the top 20 people I have talked to?
message_count_total = daily_message_counts.drop(columns=['date']).groupby('conversation_name').sum()
message_count_total_top =message_count_total.sort_values(by='total',ascending=False).head(20)
message_count_total_top
# ## make a barchart of message_total but color code by messages_from_me and messages_from_other
plt.figure(figsize=(10,5))

# set width of bar
barWidth = 0.25
bars1 = message_count_total_top['messages_from_me']
bars2 = message_count_total_top['messages_from_other']

# Set position of bar on X axis
r1 = np.arange(len(bars1))
r2 = [x + barWidth for x in r1]

# Make the plot
plt.bar(r1, bars1, color='b', width=barWidth, edgecolor='grey', label='messages_from_me')
plt.bar(r2, bars2, color='r', width=barWidth, edgecolor='grey', label='messages_from_other')

# Adding xticks
plt.xlabel('conversation_name', fontweight='bold')
plt.xticks([r + barWidth for r in range(len(bars1))], message_count_total_top.index, rotation=90)

plt.legend()
plt.show()

In [None]:
## Of the top 5 people I have talked to, how does our message total change over time?
message_count_over_time = daily_message_counts[daily_message_counts.conversation_name.isin(message_count_total_top.head(5).index)]
message_count_over_time = message_count_over_time.pivot(index='date', columns='conversation_name', values='total').sort_index()
message_count_over_time = message_count_over_time.fillna(0).rolling(30).mean()
plt.figure(figsize=(10,5))
plt.plot(message_count_over_time)
plt.legend(message_count_over_time.columns, bbox_to_anchor=(1.05, 1), loc='upper left')
plt.show()

### Let's look at some more complex metrics from message data
- Reponse times
- Amount of laughter
- Questions

In [None]:
## Let's compare my reponse times to others to their response time to me
response_time_to_me_df = get_response_times_to_me(message_df)
response_time_from_me_df = get_response_times_from_me(message_df)

my_ghost_pct = round(get_ghost_pct(response_time_from_me_df),3)
others_ghost_pct = round(get_ghost_pct(response_time_to_me_df),3)

print(f"How frequently do I ghost others: {my_ghost_pct}%")
print(f"How frequently do others ghost me: {others_ghost_pct}%")


plt.hist(np.log(response_time_to_me_df['response_time_in_min']), bins=100, alpha=0.5, label='to_me')
plt.hist(np.log(response_time_from_me_df['response_time_in_min']), bins=100, alpha=0.5, label='from_me')
plt.legend(loc='upper right')
plt.title('Response Time Distribution')
plt.xlabel('Log Response Time')
plt.show()

In [None]:
## Who made the laught the most in the last year
comedian_rankings = get_comedian_rankings(message_df[message_df.date >= date_last_year])
display(comedian_rankings.head(10))

In [None]:
## Wh answers the most questions in the last year
get_professor_rankings = get_professor_rankings(message_df[message_df.date >= date_last_year])
display(get_professor_rankings.head(10))

## 