In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np


In [16]:
#!/usr/bin/env python
# -*- coding: utf-8 -*-

# Author : Luca Corrieri
# JSON-messenger-exporter
# 2019 MIT License

import datetime
from enum import Enum

class MonthEN(Enum):
    JAN = ('January', 1)
    FEB = ('February', 2)
    MAR = ('March', 3)
    APR = ('April', 4)
    MAY = ('May', 5)
    JUN = ('June', 6)
    JUL = ('July', 7)
    AUG = ('August', 8)
    SEP = ('September', 9)
    OCT = ('October', 10)
    NOV = ('November', 11)
    DEC = ('December', 12)

class MonthFR(Enum):
    JAN = ('Janvier', 1)
    FEB = ('Février', 2)
    MAR = ('Mars', 3)
    APR = ('Avril', 4)
    MAY = ('Mai', 5)
    JUN = ('Juin', 6)
    JUL = ('Juillet', 7)
    AUG = ('Août', 8)
    SEP = ('Septembre', 9)
    OCT = ('Octobre', 10)
    NOV = ('Novembre', 11)
    DEC = ('Décembre', 12)

def dateFormat(s):
    '''
        Format the timestamp_ms into a readable date like this :
        'January 1 2019 at 00:42:00'
        @param s: milliseconds from unix epoch
    '''
    s /= 1000.0

    messageDay = datetime.datetime.fromtimestamp(s).strftime('%d ')
    messageMonthInt = int(datetime.datetime.fromtimestamp(s).strftime('%m'))
    messageMonthStr = 'ERROR '
    messageYear = datetime.datetime.fromtimestamp(s).strftime('%Y')
    messageTime = datetime.datetime.fromtimestamp(s).strftime('%H:%M:%S')

    for month in MonthEN:
        if messageMonthInt == month.value[1]:
            messageMonthStr = month.value[0] + ' '
            break

    fullyReadableMessageDate = messageMonthStr + messageDay + messageYear + ' at ' + messageTime

    return fullyReadableMessageDate

def frenchDateFormat(s):
    '''
        Format the timestamp_ms into a readable date like this :
        '1 Janvier 2019 à 00:42:00'
        @param s: milliseconds from unix epoch
    '''
    s /= 1000.0

    messageDay = datetime.datetime.fromtimestamp(s).strftime('%d ')
    messageMonthInt = int(datetime.datetime.fromtimestamp(s).strftime('%m'))
    messageMonthStr = 'ERROR'
    messageYear = datetime.datetime.fromtimestamp(s).strftime('%Y')
    messageTime = datetime.datetime.fromtimestamp(s).strftime('%H:%M:%S')

    for month in MonthFR:
        if messageMonthInt == month.value[1]:
            messageMonthStr = month.value[0] + ' '
            break

    fullyReadableMessageDate = messageDay + messageMonthStr + messageYear + ' à ' + messageTime

    return fullyReadableMessageDate


In [54]:


import sys, getopt, json, time, os.path
from jinja2 import Environment, FileSystemLoader

# ------------------- Message and Conversation classes -------------------------

class Message():
    def __init__(self, sender, contentType, content, addContent, date):
        self.sender = sender
        self.contentType = contentType # text / photos / audio / gif / sticker / video
        self.content = content # plain text or a media link
        self.addContent = addContent # sometimes an additional text content comes (like in videos)
        self.date = date # pretty formated


class Conversation():
    def __init__(self, title, participants, messages, username):
        self.title = title
        self.participants = participants
        self.messages = messages
        self.username = username

# ---------------------------- JSON file ---------------------------------------

def loadJSONFile(file):
    '''
        Returns a json object from the json file
        @param file: path to the file
    '''
    try:
        with open(file) as file:
            data = file.read()
    except FileNotFoundError:
        print("ERROR: You have to specify a correct path for input folder")
        sys.exit(42)

    return json.loads(data)


def buildMessageList(messages, language, inputfolder, stickers):
    '''
        Returns the built list of messages correctly formatted in the chosen language
        @param messages: the message dictionnary
        @param language: the language (FR/EN)
    '''
    n = len(messages)
    L = {"sender":[], "contentType":[], "content":[], "addContent":[], "date":[]}
    
    for i in range(n - 1, -1, -1): # in order to be sorted
        sender = encodingCorrection(messages[i]["sender_name"])
        addContent = "" # by default
        content = []

        # 6 types : photos, audio_files, sticker, gifs, videos, content (text)
        if "content" in messages[i].keys() and not "videos" in messages[i].keys(): # text
            content.append(encodingCorrection(messages[i]["content"]))
            contentType = "text"
        elif "photos" in messages[i].keys(): # photos (path)
            for photo in messages[i]["photos"]:
                content.append(mediaManager(encodingCorrection(photo["uri"]), "photos", inputfolder, stickers))
            contentType = "photos"
        elif "audio_files" in messages[i].keys(): # audio_files (path)
            for audio in messages[i]["audio_files"]:
                content.append(mediaManager(encodingCorrection(audio["uri"]), "audio_files", inputfolder, stickers))
            contentType = "audio"
        elif "gifs" in messages[i].keys(): # gifs (path)
            for gif in messages[i]["gifs"]:
                content.append(mediaManager(encodingCorrection(gif["uri"]), "gifs", inputfolder, stickers))
            contentType = "gif"
        elif "videos" in messages[i].keys(): # videos (path)
            if "content" in messages[i].keys(): # because sometimes videos come with a text content...
                addContent = encodingCorrection(messages[i]["content"])
            for video in messages[i]["videos"]:
                content.append(mediaManager(encodingCorrection(video["uri"]), "videos", inputfolder, stickers))
            contentType = "video"
        elif "sticker" in messages[i].keys(): # sticker (path)
            content.append(mediaManager(encodingCorrection(messages[i]["sticker"]["uri"]), "sticker", inputfolder, stickers))
            contentType = "sticker"
        
        timestamp = messages[i]["timestamp_ms"]
        date = ""
        if language == "FR":
            date = frenchDateFormat(timestamp)
        elif language == "EN":
            date = dateFormat(timestamp)
        else:
            raise Exception("Unknown language")
        L["sender"].append(sender)
        L["contentType"].append(contentType)
        L["content"].append(content)
        L["addContent"].append(addContent)
        L["date"].append(date)
        
#         message = Message(sender, contentType, content, addContent, date)
        
        
#         L.append({"sender":})

    return pd.DataFrame(L)


def mediaManager(path, contentType, inputfolder, stickers):
    '''
    returns the correct path for a media file
    '''
    filename = os.path.basename(path)
    filepath = ""
    if contentType == "photos":
        filepath = inputfolder + '/photos/' + filename
    elif contentType == "audio_files":
        filepath = inputfolder + '/audio/' + filename
    elif contentType == "gifs":
        filepath = inputfolder + '/gifs/' + filename
    elif contentType == "videos":
        filepath = inputfolder + '/videos/' + filename
    elif contentType == "sticker" and stickers != '':
        filepath = stickers + filename

    return os.path.normpath(filepath)

# ------------------------------- Program --------------------------------------

def helpDisplay():
    print("Basic usage: main.py -i <inputfolder> -o <htmlouputfile> [-s <stickerfolder>] -n <your_username> -l <FR/EN>")
    print("")
    print("Arguments:")
    print("-i, --input <path>: the path to the folder containing your conversation (the JSON file must be named 'message_1.json')")
    print("-o, --output <path>: the path to the HTML output file (created if it does not exist)")
    print("-s, --stickers <path>: the path to the folder containing your stickers (optional)")
    print("-n, --username <your_username>: your username in the conversation (ex: -n 'John Doe')")
    print("-l, --lang <FR/EN>: the language to display dates and other elements")
    print("-g, --log: save a log with the messages in [outputfile].log")
    print("-h, --help: display this help")
    print("")


def wrongArguments():
    print("Wrong arguments: main.py -i <inputfolder> -o <htmlouputfile> [-s <stickerfolder>] -n <your_username> -l <FR/EN>")
    print("Run main.py -h for more info")


def loadArguments(argv):
    inputfolder = ''
    outputfile = ''
    username = 'NOBODY'
    language = 'ERROR'
    saveLog = False
    stickers = ''

    if len(argv) == 0:
        wrongArguments()
        sys.exit(2)

    try:
        opts, args = getopt.getopt(argv, "hi:o:n:l:gs:", ["help", "input=", "output=", "username=", "lang=", "log", "stickers="])
    except getopt.GetoptError:
        wrongArguments()
        sys.exit(2)

    for opt, arg in opts:
        if opt in ("-h", "--help"):
            helpDisplay()
            sys.exit()
        elif opt in ("-i", "--input"):
            inputfolder = arg
        elif opt in ("-o", "--output"):
            outputfile = arg
        elif opt in ("-n", "--username"):
            username = arg
        elif opt in ("-l", "--lang") and arg in ("FR", "EN"):
            language = arg
        elif opt in ("-g", "--log"):
            saveLog = True
        elif opt in ("-s", "--stcikers"):
            stickers = arg

    return (inputfolder, outputfile, username, language, saveLog, stickers)


def encodingCorrection(string):
    return string.encode('latin1').decode('utf-8')

# ------------------------------ Main ------------------------------------------




In [75]:
def loadJsonFiles(files):
    df=pd.DataFrame()
    for file in files:
        jsonData = loadJSONFile(file)
        messages = buildMessageList(jsonData["messages"], "EN", '','')
        df=df.append(messages)
    df["title"]=encodingCorrection(jsonData["title"])
    return df

            

            

In [81]:
data=loadJsonFiles(["message_1.json","message_2.json","message_3.json","message_4.json","message_5.json"])

In [73]:
participants = jsonData["participants"]
for participant in participants:
    participant = encodingCorrection(participant["name"])

title = encodingCorrection(jsonData["title"])
messages = buildMessageList(jsonData["messages"], "EN", '','')

conversation = Conversation(title, participants, messages, 'Tariq Massaoudi')

In [74]:
title

'Adil El Bay'

# Data vis part

In [87]:
data["month_year"]=pd.to_datetime(data["date"]).dt.to_period('M')

In [92]:
group=data.groupby("month_year").count()["sender"].reset_index()



In [101]:
# -*- coding: utf-8 -*-

# Run this app with `python app.py` and
# visit http://127.0.0.1:8050/ in your web browser.

import dash
import dash_core_components as dcc
import dash_html_components as html
import plotly.graph_objects as go

external_stylesheets = ['https://codepen.io/chriddyp/pen/bWLwgP.css']

app = dash.Dash(__name__, external_stylesheets=external_stylesheets)

# assume you have a "long-form" data frame
# see https://plotly.com/python/px-arguments/ for more options
df = pd.DataFrame({
    "Fruit": ["Apples", "Oranges", "Bananas", "Apples", "Oranges", "Bananas"],
    "Amount": [4, 1, 2, 2, 4, 5],
    "City": ["SF", "SF", "SF", "Montreal", "Montreal", "Montreal"]
})

# group=data.groupby("month_year").count()["sender"].reset_index()
fig = go.Figure()
fig.add_trace(go.Scatter(x=group["month_year"].astype(str), y=group["sender"],
                    mode='lines+markers',
                    name='lines+markers',line={'shape': 'spline', 'smoothing': 1.3}))

app.layout = html.Div(children=[
    html.H1(children='Messenger Data Vis'),

    html.Div(children='''
        Dash: A web application framework for Python.
    '''),

    dcc.Graph(
        id='example-graph',
        figure=fig
    )
])

if __name__ == '__main__':
    app.run_server(debug=False)

Dash is running on http://127.0.0.1:8050/

Dash is running on http://127.0.0.1:8050/

Dash is running on http://127.0.0.1:8050/

Dash is running on http://127.0.0.1:8050/

Dash is running on http://127.0.0.1:8050/

Dash is running on http://127.0.0.1:8050/

Dash is running on http://127.0.0.1:8050/



INFO:__main__:Dash is running on http://127.0.0.1:8050/



 * Serving Flask app "__main__" (lazy loading)
 * Environment: production
   Use a production WSGI server instead.
 * Debug mode: off
