In [10]:
import datetime
import json
import os
import pickle
import random
import re
import textwrap
from pathlib import Path
from collections import OrderedDict

import matplotlib as mpl
import matplotlib.dates as mdates
import matplotlib.pyplot as plt
from matplotlib.font_manager import FontProperties
import numpy as np
import pandas as pd
import seaborn as sns
from scipy.optimize import curve_fit
from scipy.spatial import ConvexHull

import message_helpers as mh


In [11]:
with open("Hangouts.json", encoding="utf-8") as h:
    the_json = json.load(h)


```
sender_name                    Kimberly Ward
timestamp_ms                   1613776978603
content                     Good. Don't fit.
type                                 Generic
is_unsent                              False
users                                    NaN
source_convo         kimberlyward_wskykd6lkg
datetime          2021-02-20 10:22:58.603000
message_length                            16
platform                            Facebook
photos                                   NaN
reactions                                NaN
call_duration                            NaN
sticker                                  NaN
videos                                   NaN
share                                    NaN
ip                                       NaN
gifs                                     NaN
files                                    NaN
audio_files                              NaN
missed                                   NaN
clean_content               Good. Don't fit.```

In [12]:
messages = []


def process_segments(segs):
    text = []
    for segment in segs:
        if segment["type"] in ["TEXT", "LINE_BREAK", "LINK"]:
            text.append(segment["text"])
        else:
            print(segs)
    return " ".join(text)


def make_payload_for_standard_chat(event, participants):
    segs = event["chat_message"]["message_content"]["segment"]
    return {
        "sender_name": participants.get(event["sender_id"]["gaia_id"], "unknown"),
        "timestamp_ms": event["timestamp"],
        "content": process_segments(segs),
        # "sender_ID": event["sender_id"]["gaia_id"],
        "platform": "Hangouts",
        "type": "Generic",
        "is_unsent": False,
    }


def make_payload_for_attachment(event, participants):
    url = event["chat_message"]["message_content"]["attachment"][0].get("url", "")
    name = participants.get(event["sender_id"]["gaia_id"], "unknown")
    return {
        "sender_name": name,
        "timestamp_ms": event["timestamp"],
        "share": {"link": url},
        "content": url,
        # "sender_ID": event["sender_id"]["gaia_id"],
        "type": "Share",
        "is_unsent": False,
        "platform": "Hangouts",
    }


def make_payload_for_hangout_event(event, participants):

    return {
        "content": np.nan,
        # "sender_ID": event["sender_id"]["gaia_id"],
        "timestamp_ms": event["timestamp"],
        "sender_name": name,
        "platform": "Hangouts",
        "type": "Call",
        "event_id": "event_id",
        "is_unsent": False,
    }


for conversation in the_json["conversations"]:

    ev = conversation["events"]
    conv = conversation["conversation"]

    participants = {}
    for p in conv["conversation"]["participant_data"]:
        name = p.get("fallback_name", "unknown")
        pid = p["id"]["gaia_id"]
        participants[pid] = name
    participants

    for event in ev:
        try:
            if event.get("chat_message") and event["chat_message"][
                "message_content"
            ].get("segment"):
                payload = make_payload_for_standard_chat(event, participants)
                messages.append(payload)
            elif event.get("chat_message") and event["chat_message"][
                "message_content"
            ].get("attachment"):
                payload = make_payload_for_attachment(event, participants)
                messages.append(payload)
            elif event.get("hangout_event"):
                payload = make_payload_for_hangout_event(event, participants)
                messages.append(payload)
        except Exception as error:
            # print("error:\n", error, event, "\n")
            pass


In [13]:
df = pd.DataFrame(messages)

In [14]:
df.sample(10)


Unnamed: 0,sender_name,timestamp_ms,content,platform,type,is_unsent,event_id,share
110692,Irina Belova,1370927568109586,"yeah, it's really cool",Hangouts,Generic,False,,
141660,David Wilcox,1402969851201967,"uck yeah, that usb adapter you left on my desk...",Hangouts,Generic,False,,
74993,Ben Doherty,1467733694242068,I'll look it up,Hangouts,Generic,False,,
88426,Irina Belova,1435098414226746,So that's good feedback,Hangouts,Generic,False,,
34720,Ben Doherty,1472959827874176,I'd be up for some vigorous exercise if you're...,Hangouts,Generic,False,,
73116,Irina Belova,1471676918548514,Done,Hangouts,Generic,False,,
90962,Ben Doherty,1394510362748260,but they seem like a lot,Hangouts,Generic,False,,
78457,Ben Doherty,1463617230905522,"I think I'd do an arbour tree graph, the synta...",Hangouts,Generic,False,,
115003,Ben Doherty,1493425373808667,probably plenty,Hangouts,Generic,False,,
15369,unknown,1428356094465073,http://sourcejs.com/,Hangouts,Generic,False,,


In [None]:
df.sender_name.value_counts()

Ben Doherty                 58946
Irina Belova                37635
unknown                     15977
David Wilcox                 9482
Lucy Rimmer                  8615
Charles Ogilvie              6699
Alessandra Moschella         2613
Melanie Mury                 1673
Dan R                        1226
Andrew Burrow                 971
Barry Dineen                  769
Angela Woda                   477
Anthea Murray                 295
Jonathan Capparelli           255
Daniel Elias                  166
Annisa Rizal                   55
ronainniss@gmail.com           54
ben@notionparallax.co.uk       53
John Doherty                   51
Alison Griffiths               46
Julia Mintzer                  23
Alex Lee                       23
Maryam Alavi                   20
Judy Lee                       15
Tiara Dobbs                    15
Amanda Tenn                    12
Barnaby Bennett                12
dave pigram                     8
Mitchell Hinds                  5
M. Hank Haeusl