# Facebook Messenger Stats!

In [None]:
%matplotlib inline

import JSON_message_parser, os, stats
import datetime

#create path for csv files to be saved
if not os.path.exists('.\\csv files'):
	os.mkdir('.\\csv files\\')

**Parse all your data into a dict to be manipulated**

This may take a while!

In [None]:
chat_dict, totals = JSON_message_parser.parse()
stats.setTotals(totals)

# Who do you talk to most?

First, let's find your chats with the most messages!
getMostMessaged takes 2 arguments: dict of your chats and how many chats to display

In [None]:
num_chats = 10 #how many chats to show? top num_chats will be displayed

df_most_messaged = stats.getMostMessaged(chat_dict, num_chats)
most_active_chats = df_most_messaged['chat']

df_most_messaged

Now let's plot this data:

In [None]:
x = 'chat'
y = 'number of messages' #or plot '% of total messages'
data = df_most_messaged
plot_title = 'Most messaged users'
stats.plot(x,y, data, plot_title)

Of the people you talk to most, who sends the longest messages? (including all participants of top group chats)

Let's see who sends the most **words** per message:

In [None]:
df_lengthiest_sender = stats.getAverageMessageLength(chat_dict, len(most_active_chats), most_active_chats, typeLen='words')

stats.plot('sender', 'average message length', df_lengthiest_sender, 'Average message length (words)')
df_lengthiest_sender


What about most **characters** per message?

In [None]:
df_lengthiest_sender = stats.getAverageMessageLength(chat_dict, len(most_active_chats), most_active_chats, typeLen='chars')

stats.plot('sender', 'average message length', df_lengthiest_sender, 'Longest average message length (characters)')
df_lengthiest_sender

# Are your chats balanced?

Are your most active chats 50/50? or are you sending more messages than you're receiving?
Who's leaving you on 'read'?

In [None]:
df_imbalance = stats.chatImbalance(chat_dict, most_active_chats)

stats.plot('chat', '% of messages were received', df_imbalance, 'Chat Imbalance')
df_imbalance

# When are you procrastinating the most?

Now let's look at your most active times on Facebook Messenger: 

We can compare most active time, minute, hour, day, month, or year using stats.getMostActiveTime

However, the most active minute, day and time are usually boring and uniform.


In [None]:
#stats.getMostActiveTime takes arguments: chat dictionary, number of items to display ('max' to display all), type of time

#displays most active hours
df_hour = stats.getMostActiveTime(chat_dict, 'max', "hour")
df_hour = df_hour.sort_values('hour', ascending=True)

#displays most active year
df_year = stats.getMostActiveTime(chat_dict,'max', "year")
df_year = df_year.sort_values('year', ascending=True)

#displays most active months
df_month = stats.getMostActiveTime(chat_dict, 'max', "month")
months = {datetime.datetime(2000,i,1).strftime("%B"): i for i in range(1, 13)}
df_month["month_number"] = df_month["month"].map(months)
df_month = df_month.sort_values('month_number', ascending=True)

stats.plot('hour','number of messages', df_hour, 'Most active hour')
stats.plot('month','number of messages', df_month, 'Most active month')
stats.plot('year','number of messages', df_year, 'Most active year')

# What are you sending to people?

What are your most sent words? Let's find out:


In [None]:
top_words = 5 #how many words to display (i.e. topNum = 10 displays top 10 words)
chars = 6 #the minimum length of the words, single letter words are boring!
sender = 'ANY_SENDER' #can use any name to find the most used words from a certain sender. or 'ANY_SENDER' for any sender

df_words = stats.getMostUsedWords(chat_dict, top_words, sender, chars)
df_words



# How are you communicating?

See your breakdown between text, image, video and stickers!

In [None]:
#can use any chat to find the types of messages from a certain chat. or 'ANY_CHAT' for any chat
chat = 'ANY_CHAT'
df_type = stats.typesOfMessages(chat_dict, chat) 

# Who are you in the most group chats with?

In [None]:
num_chats = 10 #how many people to show

df_most_common = stats.inMostGroupChats(chat_dict, num_chats)
df_most_common

Let's visualize this!

In [None]:
stats.plot('participant','number of group chats', df_most_common, 'Most common group chat participant')