In [None]:
import json

with open('export/users.json', 'r') as file:
    users = json.loads(file.read())
    users = [(user['id'], user['name']) for user in users]
    users = dict(users)

print(f'{len(users)} users found')

In [None]:
from glob import glob
import datetime
import re
import os
import json

all_messages = []

def replace_user_id(match):
    id = match.group(1)
    return users.get(id, 'unknown_user')

ignored_message_subtypes = [
    'channel_join', 
    'channel_leave', 
    'channel_purpose', 
    'file_share', 
    'channel_archive', 
    'bot_message'
]

channel_dirs = glob('export/*/')
for channel_dir in channel_dirs:
    channel_name = channel_dir.split('/')[-2]
    days = os.listdir(channel_dir)
    for day in days:
#         print(f'Processing {channel_name}/{day}')
        with open(f'{channel_dir}{day}', 'r') as file:
            day = json.loads(file.read())
            
            for message in day:
#                 user = users.get(message.get('user'))
#                 message['user'] = user
                    
                msg = message.get('text')
                if msg is None:
                    msg = ''
                text = re.sub(r'<@(.+)>', replace_user_id, msg)
                message['text'] = text
                message['channel'] = channel_name
                
                all_messages.append(message)

with open('dump.json', 'w') as f:
    json = json.dumps(all_messages)
    f.write(json)
    
print(f'Wrote {len(all_messages)} messages to dump.json.')

In [None]:
from collections import defaultdict
import re

emoji_reactions = defaultdict(lambda: 0)
inline_emoji = defaultdict(lambda: 0)

for message in all_messages:
    if 'reactions' in message:
        for reaction in message['reactions']:
            emoji_reactions[reaction['name']] += 1
    if 'text' in message and message['text'] is not None:
        for emoji in re.findall(':(\S?\w+):', message['text'], re.MULTILINE):
            inline_emoji[emoji] += 1
            
print('EMOJI REACTIONS')
for reaction in sorted(emoji_reactions, key=emoji_reactions.get, reverse=True):
    print(f':{reaction}:, {emoji_reactions[reaction]}')
    
print('\n\n\nINLINE EMOJI')
for reaction in sorted(inline_emoji, key=inline_emoji.get, reverse=True):
    print(f':{reaction}:, {inline_emoji[reaction]}')

In [None]:
user_messages = {}

for user_id, username in users.items():
    messages = [msg for msg in all_messages if msg['user'] == username]
    user_messages[username] = messages

In [None]:
user_messages_count = {}
for user, msgs in user_messages.items():
    user_messages_count[user] = len(msgs)

import operator
sorted_messages = sorted(user_messages_count.items(), key=operator.itemgetter(1))

for user, count in sorted_messages[::-1]:
    if count < 100:
        break
    print(f'{user},{count}')

In [None]:
import markovify

text = '\n'.join([msg['text'] for msg in user_messages['kilian']])
text_model = markovify.Text(text)

for i in range(10):
    print(text_model.make_short_sentence(140))