See the README for an explanation of how this code runs and functions.

Contact michaeldezube at gmail dot com with questions.

In [None]:
import table_connector

import pandas as pd
from IPython.display import display
from IPython.display import HTML

In [None]:
# Load up the message and address book dateframe.

messages_df = table_connector.get_message_df()
# Drop some columns that we don't use now, but may in the future.
messages_df.drop(['version', 'is_emote', 'is_read','is_system_message',
                  'is_service_message', 'has_dd_results'],
                  inplace=True, axis=1)
print 'Loaded {0:,} messages.'.format(messages_df.shape[0])
display(messages_df.head(1))


address_book_df = table_connector.get_address_book()
# Drop a column that we don't use now, but may in the future.
address_book_df = address_book_df.drop('property', axis=1)
print 'Loaded {0:,} contacts.'.format(address_book_df.shape[0])
display(address_book_df.head(1))

In [None]:
# Join the address book (it has the names) with the messages.

# Joins the address book dataframe with the messages dataframe.
fully_merged_messages_df = table_connector.get_merged_message_df(messages_df, address_book_df)
# Drop a few columns we don't care about for now
fully_merged_messages_df = fully_merged_messages_df.drop(['handle_id',
                                                          'country_messages_df',
                                                          'country_other_join_tbl',
                                                          'service_messages_df',
                                                          'service_other_join_tbl'],
                                                          axis=1)

print 'Messages with phone numbers not found in address book: {0:,}'.format(
    fully_merged_messages_df[fully_merged_messages_df.merge_chat_with_address != 'both'].shape[0])
print ('Messages loaded: {0:,} (this is larger than the length of the messages_df since certain '
       'message IDs were sent in group messages.)').format(fully_merged_messages_df.shape[0])

# Drop the messages dateframe since we have a merged version as well.
del messages_df
# Drop some columns that we're no longer going to need.
fully_merged_messages_df = fully_merged_messages_df.drop(['merge_chat_with_address',
                                                          'merge_chat_with_address_and_messages'],
                                                          axis=1)

In [None]:
# Merge the first name, last name and company column together to create a "full_name" column, runs in place.
table_connector.collapse_first_last_company_columns(fully_merged_messages_df)
table_connector.collapse_first_last_company_columns(address_book_df)

display(fully_merged_messages_df.head(1))
display(address_book_df.head(1))

# End Setting Up Tables

Use `fully_merged_messages_df` and `address_book_df`

<hr/>

In [None]:
# Visualize and output a word tree.

import json
import wordtree

CONTACT_NAME = 'Mom'  #  Freely change this value.
ROOT_WORD = 'feel'  #  Freely change this value, note emojis are supported.

filtered_texts_i_sent = fully_merged_messages_df[
    (fully_merged_messages_df.full_name == CONTACT_NAME) &
    (fully_merged_messages_df.is_from_me == 1)]
print 'I sent {0:,} texts to {1}'.format(filtered_texts_i_sent.shape[0], CONTACT_NAME)

filtered_texts_i_received = fully_merged_messages_df[
    (fully_merged_messages_df.full_name == CONTACT_NAME) &
    (fully_merged_messages_df.is_from_me == 0)]
print 'I received {0:,} texts from {1}'.format(filtered_texts_i_received.shape[0], CONTACT_NAME)

filtered_texts =  fully_merged_messages_df[(fully_merged_messages_df.full_name == CONTACT_NAME)]
print 'I exchanged {0:,} texts with {1}'.format(filtered_texts.shape[0], CONTACT_NAME)

# Returns JSON in the format needed for word trees.
def get_google_json_for_dataframe(df):
    array_for_json = [[text[1]] for text in df.text.iteritems()]
    array_for_json.insert(0, [['Phrases']])
    return json.dumps(array_for_json)

# You can change tree_type to 'double', 'suffix', or 'prefix', see 
# https://developers.google.com/chart/interactive/docs/gallery/wordtree#configuration-options for details
HTML(wordtree.get_word_tree_html(get_google_json_for_dataframe(filtered_texts_i_received),
                                 ROOT_WORD, lowercase=True, tree_type='double'))

In [None]:
# Just a bit of fun to show you who you text the most.
# Note "nan" means the number was not found in your address book.

def get_message_counts(dataframe):
    return pd.Series({'Texts sent': dataframe[dataframe.is_from_me == 1].shape[0],
                      'Texts received': dataframe[dataframe.is_from_me == 0].shape[0],
                      'Texts exchanged': dataframe.shape[0]})
messages_grouped = fully_merged_messages_df.groupby('full_name').apply(get_message_counts)
messages_grouped = messages_grouped.sort_values(by='Texts exchanged', ascending=False)
messages_grouped.head(10)  # Change this value to see more rows

In [None]:
# A bit more fun.
%matplotlib inline
import matplotlib
matplotlib.style.use('ggplot')

messages_grouped.head(10).plot(figsize=(20,10), kind='bar')  # Change this value to see more rows