In [250]:
%reload_ext autoreload
%autoreload 2

In [251]:
import os, sys
import re
import json
import glob
import datetime
from collections import Counter

import pandas as pd
from matplotlib import pyplot as plt
import seaborn as sns

from nltk.corpus import stopwords
# from wordcloud import WordCloud

In [252]:
# Add parent directory to path to import modules from src
rpath = os.path.abspath('..')
if rpath not in sys.path:
    sys.path.insert(0, rpath)

from src.loader import SlackDataLoader
import src.utils as utils

### Columns we can get from a slack message<br>

message_type, message_content, sender_id, time_sent, message_distribution, time_thread_start, reply_count, reply_user_count, time_thread_end, reply_users

From a single slack message, we can get <br>

1. The message<br>
2. Type (message, file, link, etc)<br>
3. The sender_id (assigned by slack)<br>
4. The time the message was sent<br>
5. The team (i don't know what that is now)<br>
6. The type of the message (broadcast message, inhouse, just messgae)<br>
7. The thread the message generated (from here we can go):<br>
    7.1 Text/content of the message<br>
    7.2 The thread time of the message<br>
    7.3 The thread count (reply count)<br>
    7.4 The number of user that reply the message (count of users that participated in the thread)<br>
    7.5 The time the last thread message was sent <br>
    7.6 The users that participated in the thread (their ids are stored as well)<br>

In [253]:
# combine all json file in all-weeks8-9
from src.utils  import slack_parser
from src.utils import parse_slack_reaction
from src.utils import get_community_participation

In [254]:
# from get_tagged_users import "/src/utils.py"
# from convert_2_timestamp import "/src/utils.py"

def get_tagged_users(df):
    """get all @ in the messages"""

    return df['msg_content'].map(lambda x: re.findall(r'@U\w+', x))


    
def map_userid_2_realname(user_profile: dict, comm_dict: dict, plot=False):
    """
    map slack_id to realnames
    user_profile: a dictionary that contains users info such as real_names
    comm_dict: a dictionary that contains slack_id and total_message sent by that slack_id
    """
    user_dict = {} # to store the id
    real_name = [] # to store the real name
    ac_comm_dict = {} # to store the mapping
    count = 0
    # collect all the real names
    for i in range(len(user_profile['profile'])):
        real_name.append(dict(user_profile['profile'])[i]['real_name'])

    # loop the slack ids
    for i in user_profile['id']:
        user_dict[i] = real_name[count]
        count += 1

    # to store mapping
    for i in comm_dict:
        if i in user_dict:
            ac_comm_dict[user_dict[i]] = comm_dict[i]

    ac_comm_dict = pd.DataFrame(data= zip(ac_comm_dict.keys(), ac_comm_dict.values()),
    columns=['LearnerName', '# of Msg sent in Threads']).sort_values(by='# of Msg sent in Threads', ascending=False)
    
    if plot:
        ac_comm_dict.plot.bar(figsize=(15, 7.5), x='LearnerName', y='# of Msg sent in Threads')
        plt.title('Student based on Message sent in thread', size=20)
        
    return ac_comm_dict

In [255]:
def get_top_20_user(data, channel='Random'):
    """get user with the highest number of message sent to any channel"""

    data['sender_name'].value_counts()[:20].plot.bar(figsize=(15, 7.5))
    plt.title(f'Top 20 Message Senders in #{channel} channels', size=15, fontweight='bold')
    plt.xlabel("Sender Name", size=18); plt.ylabel("Frequency", size=14);
    plt.xticks(size=12); plt.yticks(size=12);
    plt.show()

    data['sender_name'].value_counts()[-10:].plot.bar(figsize=(15, 7.5))
    plt.title(f'Bottom 10 Message Senders in #{channel} channels', size=15, fontweight='bold')
    plt.xlabel("Sender Name", size=18); plt.ylabel("Frequency", size=14);
    plt.xticks(size=12); plt.yticks(size=12);
    plt.show()

def draw_avg_reply_count(data, channel='Random'):
    """who commands many reply?"""

    data.groupby('sender_name')['reply_count'].mean().sort_values(ascending=False)[:20]\
        .plot(kind='bar', figsize=(15,7.5));
    plt.title(f'Average Number of reply count per Sender in #{channel}', size=20, fontweight='bold')
    plt.xlabel("Sender Name", size=18); plt.ylabel("Frequency", size=18);
    plt.xticks(size=14); plt.yticks(size=14);
    plt.show()

def draw_avg_reply_users_count(data, channel='Random'):
    """who commands many user reply?"""

    data.groupby('sender_name')['reply_users_count'].mean().sort_values(ascending=False)[:20].plot(kind='bar',
     figsize=(15,7.5));
    plt.title(f'Average Number of reply user count per Sender in #{channel}', size=20, fontweight='bold')
    plt.xlabel("Sender Name", size=18); plt.ylabel("Frequency", size=18);
    plt.xticks(size=14); plt.yticks(size=14);
    plt.show()

def draw_wordcloud(msg_content, week):    
    # word cloud visualization
    allWords = ' '.join([twts for twts in msg_content])
    wordCloud = WordCloud(background_color='#975429', width=500, height=300, random_state=21, max_words=500, mode='RGBA',
                            max_font_size=140, stopwords=stopwords.words('english')).generate(allWords)
    plt.figure(figsize=(15, 7.5))
    plt.imshow(wordCloud, interpolation="bilinear")
    plt.axis('off')
    plt.tight_layout()
    plt.title(f'WordCloud for {week}', size=30)
    plt.show()

def draw_user_reaction(data, channel='General'):
    data.groupby('sender_name')[['reply_count', 'reply_users_count']].sum()\
        .sort_values(by='reply_count',ascending=False)[:10].plot(kind='bar', figsize=(15, 7.5))
    plt.title(f'User with the most reaction in #{channel}', size=25);
    plt.xlabel("Sender Name", size=18); plt.ylabel("Frequency", size=18);
    plt.xticks(size=14); plt.yticks(size=14);
    plt.show()

**************************

In [256]:
from src.loader import SlackDataLoader

data_path = '../anonymized/'

loader = SlackDataLoader(data_path)

channels = loader.get_channels()
users = loader.get_users()



In [257]:
channels_df = pd.DataFrame(channels)
channels_df.head(3)

Unnamed: 0,id,name,created,creator,is_archived,is_general,members,topic,purpose,pins
0,C03T0APHX63,all-community-building,1660301317,U03TEPYRM2P,False,False,"[U03T89ACUUW, U03TEPYRM2P, U03TNP8Q8CT, U03TT5...","{'value': '', 'creator': '', 'last_set': 0}","{'value': '', 'creator': '', 'last_set': 0}",
1,C03T0AX4K6K,all-technical-support,1660301462,U03TEPYRM2P,False,False,"[U03T89ACUUW, U03TEPYRM2P, U03TNP8Q8CT, U03TT5...","{'value': '', 'creator': '', 'last_set': 0}","{'value': '', 'creator': '', 'last_set': 0}",
2,C03T89KDGA2,all-career-exercises,1660301361,U03TEPYRM2P,False,False,"[U03T89ACUUW, U03TEPYRM2P, U03TNP8Q8CT, U03TT5...","{'value': '', 'creator': '', 'last_set': 0}","{'value': '', 'creator': '', 'last_set': 0}","[{'id': '1663839365.770289', 'type': 'C', 'cre..."


In [258]:
channels_df.columns

Index(['id', 'name', 'created', 'creator', 'is_archived', 'is_general',
       'members', 'topic', 'purpose', 'pins'],
      dtype='object')

In [271]:
pth = '../anonymized/all-week1/'
msg = utils.get_community_participation(path=pth)
# msg_df = 
print(msg)

{'U03U1GHT39V': 3, 'U03UJGRN5E0': 21, 'U03UFV7TUTV': 3, 'U03U93GNNVB': 5, 'U03U1FNPEUX': 25, 'U03UG5VFN03': 2, 'U03UG0YHAUT': 1, 'U03UH397319': 35, 'U03T89ACUUW': 4, 'U03UD68RQH3': 5, 'U03UUR571A5': 13, 'U03V785NLSU': 2, 'U03UDBUL7CL': 1, 'U03UJGP0C68': 13, 'U03UVHCV6KB': 42, 'U03UYNR4TS4': 1, 'U03TEPYRM2P': 11, 'U03U9FWPNCE': 9, 'U03UUS0MZCZ': 1, 'U03UP7V9Q57': 1, 'U03U9EJR362': 10, 'U03UJN29Y4C': 28, 'U03V8LHPDME': 6, 'U03UG32J3PC': 14, 'U03UUMM7Y8H': 26, 'U03UG1Z21JP': 8, 'U03V6HMRPGQ': 47, 'U03UKL27B0R': 22, 'U03UKGSDGSG': 1, 'U03V1AM5TFA': 2, 'U03UG4Q7V42': 18, 'U03U1FQKEMV': 7, 'U03UG569P7U': 1, 'U03UHB8CXDY': 4, 'U03UJKJGRAQ': 5, 'U03UAKATQ22': 3, 'U03UR2LQ4DR': 1, 'U03U1J51VFZ': 2, 'U03UUP56MDF': 3, 'U03U9DB7REG': 5, 'U03UD63A8PP': 1, 'U03UFV7HFNF': 7, 'U03TT5KEYCF': 1, 'U03V61VGQG0': 2, 'U03UL5LSTG9': 3, 'U03UGB3T3MY': 1}


In [301]:
msg1 = loader.get_channel_messages("all-technical-support")
msg12 = utils.get_messages_dict(msg1)
print(msg1)
print(len(msg12.keys()))
# iterate over each key to see how much value it holds
# print(msg12["attachments"])
for key in msg12.keys():
    print((key))
    print(len(msg12[key]))

[{'type': 'message', 'subtype': 'channel_join', 'ts': '1660301462.271759', 'user': 'U03TEPYRM2P', 'text': '<@u03tepyrm2p> has joined the channel', 'blocks': None, 'attachments': None}, {'type': 'message', 'subtype': 'channel_join', 'ts': '1660301593.261919', 'user': 'U03T89ACUUW', 'text': '<@u03t89acuuw> has joined the channel', 'blocks': None, 'attachments': None}, {'type': 'message', 'subtype': 'channel_join', 'ts': '1660833152.721689', 'user': 'U03U9DB7REG', 'text': '<@u03u9db7reg> has joined the channel', 'blocks': None, 'attachments': None}, {'type': 'message', 'subtype': 'channel_join', 'ts': '1660833720.896569', 'user': 'U03UKL27B0R', 'text': '<@u03ukl27b0r> has joined the channel', 'blocks': None, 'attachments': None}, {'type': 'message', 'subtype': 'channel_join', 'ts': '1660838314.161079', 'user': 'U03UL5LSTG9', 'text': '<@u03ul5lstg9> has joined the channel', 'blocks': None, 'attachments': None}, {'type': 'message', 'subtype': 'channel_join', 'ts': '1660840086.464919', 'user

In [310]:
msg123 = utils.msgs_to_df(msg1)
msg123.head(3)

Unnamed: 0,msg_id,text,user,mentions,emojis,reactions,replies,replies_to,ts,links,link_count
0,be2fee88-87a5-4377-8893-7b2b51600634,which feature did you use to make the aggregat...,U03UVHCV6KB,[],[],,,,1661205808.184729,[],0
1,fae00ffb-ab43-4b84-a329-0bc349fcea40,<@u03uvhcv6kb> please ask this question in <#c...,U03U93GNNVB,[U03UVHCV6KB],[],,"[{'user': 'U03UVHCV6KB', 'ts': '1661242841.901...",,1661232016.288839,[],0
2,c8a12441-631a-481b-b745-32c0f5dfddb7,"done already, thanks",U03UVHCV6KB,[],[],,,1661242841.901149,1661242841.901149,[],0


In [316]:
# saving all messages into one df using the function provided in the utils file
msg = []
for channel in list(channels_df.name):
    channel_msg = loader.get_channel_messages(channel)
    msg.extend(channel_msg)
msg_df_from_utils = utils.msgs_to_df(msg)
msg_df_from_utils.head(3)
# len(msg_df_from_utils)

Unnamed: 0,msg_id,text,user,mentions,emojis,reactions,replies,replies_to,ts,links,link_count
0,64a66a92-177f-49ef-9e6c-5649d360c6ae,"hi all, looking forward to starting together, ...",U03U93GNNVB,[],[],"[{'name': '+1', 'users': ['U03UFV7TUTV', 'U03U...",,,1661094957.241139,[],0
1,dde9555e-5f8d-4d40-91c5-151f4e2e3dba,hello everyone. it's my hope that you are doin...,U03V1AM5TFA,[],[hugging_face],"[{'name': '+1', 'users': ['U03UFV7HFNF', 'U03U...",,,1661151244.337329,[],0
2,61220a48-08fa-41f0-90eb-b58d196edd2b,*community building session reminder!*:timer_c...,U03V1AM5TFA,[U03V1AM5TFA],[timer_clock],"[{'name': '+1', 'users': ['U03UG4Q7V42', 'U03U...","[{'user': 'U03UG0YHAUT', 'ts': '1661169461.106...",,1661169000.633059,[],0


In [334]:
msg_df_from_utils.columns

Index(['msg_id', 'text', 'user', 'mentions', 'emojis', 'reactions', 'replies',
       'replies_to', 'ts', 'links', 'link_count'],
      dtype='object')

In [318]:
# saving all messages into one df
msg = []
for channel in list(channels_df.name):
    channel_msg = loader.get_channel_messages(channel)
    msg.extend(channel_msg)
msg_df = pd.DataFrame(msg)
msg_df.head(3)

Unnamed: 0,type,subtype,ts,user,text,blocks,attachments,client_msg_id,team,user_team,...,hidden,bot_id,app_id,bot_profile,channel,room,no_notifications,permalink,bot_link,inviter
0,message,channel_join,1660301317.785879,U03TEPYRM2P,<@u03tepyrm2p> has joined the channel,,,,,,...,,,,,,,,,,
1,message,channel_join,1660301593.241889,U03T89ACUUW,<@u03t89acuuw> has joined the channel,,,,,,...,,,,,,,,,,
2,message,channel_join,1660833152.540199,U03U9DB7REG,<@u03u9db7reg> has joined the channel,,,,,,...,,,,,,,,,,


In [None]:
msg_test = []
for channel in list(channels_df.name):
    # Get the messages for the current channel
    channel_msg3 = loader.get_channel_messages(channel)
    msg_test.extend(list(channel_msg3.values()))
    # msg_test.extend(utils.get_messages_dict(channel_msg3))

# Create a DataFrame from the combined 'msg_list'
msg_df3 = pd.DataFrame(msg_test)
msg_df3

In [None]:
msg1_org = utils.get_messages_dict(msg)
msg1_org.keys()

dict_keys(['msg_id', 'text', 'attachments', 'user', 'mentions', 'emojis', 'reactions', 'replies', 'replies_to', 'ts', 'links', 'link_count'])

In [None]:
print(list(channels_df.name))

['all-community-building', 'all-technical-support', 'all-career-exercises', 'all-resources', 'random', 'all-ideas', 'all-week1', 'all-broadcast', 'tenx-bot', 'team-10', 'all-week2', 'week2-group', 'ab_test-group', 'week-2-group-8', 'dsa-sql', 'all-week3', 'week4-teamwork', 'study-group', 'happy-new-year-study-group', 'all-week4', 'batch6_week4_studygroup', 'all-week5', 'all-week6', 'all-week7', 'kafka_de', 'all-week8', 'all-week9', 'all-week10', 'week-11-group4', 'gokada-challenge-presentation', 'all-week11', 'adludios-challange', 'chang-w11', 'all-ml-week12', 'all-de-week12', 'all-week12', 'all-web3-week12', 'machine-learning', 'data-engineering']


In [None]:
msg_df.head(3)

Unnamed: 0,type,subtype,ts,user,text,blocks,attachments,client_msg_id,team,user_team,...,hidden,bot_id,app_id,bot_profile,channel,room,no_notifications,permalink,bot_link,inviter
0,message,channel_join,1660301317.785879,U03TEPYRM2P,<@u03tepyrm2p> has joined the channel,,,,,,...,,,,,,,,,,
1,message,channel_join,1660301593.241889,U03T89ACUUW,<@u03t89acuuw> has joined the channel,,,,,,...,,,,,,,,,,
2,message,channel_join,1660833152.540199,U03U9DB7REG,<@u03u9db7reg> has joined the channel,,,,,,...,,,,,,,,,,


In [None]:
msg_df.columns

Index(['type', 'subtype', 'ts', 'user', 'text', 'blocks', 'attachments',
       'client_msg_id', 'team', 'user_team', 'source_team', 'user_profile',
       'reactions', 'thread_ts', 'reply_count', 'reply_users_count',
       'latest_reply', 'reply_users', 'replies', 'is_locked', 'subscribed',
       'parent_user_id', 'edited', 'files', 'upload', 'display_as_bot', 'root',
       'last_read', 'x_files', 'hidden', 'bot_id', 'app_id', 'bot_profile',
       'channel', 'room', 'no_notifications', 'permalink', 'bot_link',
       'inviter'],
      dtype='object')

In [None]:
user_reply_counts = msg_df.groupby("user")["reply_count"].sum()
user_reply_counts

user
U03T89ACUUW     89.0
U03TEPYRM2P     28.0
U03TT5KEYCF     73.0
U03TX2VN6H5      0.0
U03U1FNPEUX    308.0
               ...  
U03V785NLSU    257.0
U03V8LHPDME     15.0
U03VAH809FC      0.0
U04718Y7SQ0      8.0
USLACKBOT       15.0
Name: reply_count, Length: 67, dtype: float64

In [None]:
users_mapped = loader.get_user_map()
print(users_mapped)

({'U03T89ACUUW': 'Rachel', 'U03TEPYRM2P': 'Bridget', 'U03TNP8Q8CT': 'Victor', 'U03TT5KEYCF': 'Brian', 'U03TX2VN6H5': 'Joanne', 'U03U1FNPEUX': 'Jennifer', 'U03U1FQKEMV': 'Kevin', 'U03U1GHT39V': 'Daniel', 'U03U1HAG9TR': 'Melanie', 'U03U1J51VFZ': 'Heidi', 'U03U4GULU3Y': 'Sara', 'U03U93GNNVB': 'Lawrence', 'U03U9DB7REG': 'Dawn', 'U03U9EJR362': 'Catherine', 'U03U9FWPNCE': 'Maria', 'U03UAKATQ22': 'Thomas', 'U03UCCRJME2': 'Nicole', 'U03UD4FEDHB': 'Michael', 'U03UD5B7C3X': 'Christopher', 'U03UD5K7HAR': 'Linda', 'U03UD63A8PP': 'Jonathan', 'U03UD68RQH3': 'James', 'U03UDA3R8S2': 'Michelle', 'U03UDBUL7CL': 'Luis', 'U03UDKKESB1': 'Melissa', 'U03UFT20ZJR': 'Austin', 'U03UFV7HFNF': 'Glenn', 'U03UFV7TUTV': 'Bryan', 'U03UG03HQ2F': 'Mary', 'U03UG0SFHGT': 'Robert', 'U03UG0YHAUT': 'David', 'U03UG1RTXAP': 'Teresa', 'U03UG1Z21JP': 'Tiffany', 'U03UG32J3PC': 'Zachary', 'U03UG4Q7V42': 'Ariel', 'U03UG569P7U': 'Karen', 'U03UG5VFN03': 'Susan', 'U03UGB3T3MY': 'Allison', 'U03UH397319': 'Monica', 'U03UH760JQK': 'Vero

## Insight Extraction

Below are some useful questions to answer. Feel free to explore to answer other interesting questions that may be of help to get insight about student's behaviour, need, and future performance 

In [None]:
def search_user_by_id(user_id):
    user_name = users_mapped[0].get(user_id, None)
    return user_name

### Which user has the highest number of reply counts?

In [None]:
# which user has the highest number of reply counts?
user_reply_counts = msg_df.groupby("user")["reply_count"].sum()
user_with_highest_replies = user_reply_counts.idxmax()
print("The User with the highest number of reply counts: ")
print(f'User ID: {user_with_highest_replies}')
search_user_by_id(user_with_highest_replies)

The User with the highest number of reply counts: 
User ID: U03V6HMRPGQ


'Lisa'

Who are the top and bottom 10 users by Reply count?

In [None]:
# Who are the top and bottom 10 users by Reply count?
top_users = user_reply_counts.sort_values(ascending=False).reset_index()
top_users["name"] = top_users["user"].apply(search_user_by_id)
print("Top 10 users by Reply counts")
top_users.head(10)

Top 10 users by Reply counts


Unnamed: 0,user,reply_count,name
0,U03V6HMRPGQ,727.0,Lisa
1,U03V1AM5TFA,614.0,Lynn
2,U03UH397319,453.0,Monica
3,U03UJKJGRAQ,443.0,Kenneth
4,U03UVHCV6KB,435.0,Gregory
5,U03UUR571A5,414.0,Kelsey
6,U03UJGP0C68,378.0,Tammy
7,U03UG32J3PC,363.0,Zachary
8,U03UJN29Y4C,355.0,Cynthia
9,U03UD68RQH3,341.0,James


In [None]:
print("Least (Bottom) 10 users by Reply counts")
top_users.tail(10)

Least (Bottom) 10 users by Reply counts


Unnamed: 0,user,reply_count,name
57,U03UYNR4TS4,1.0,Frances
58,U03UDBUL7CL,0.0,Luis
59,U03UDKKESB1,0.0,Melissa
60,U03UFT20ZJR,0.0,Austin
61,U03UH760JQK,0.0,Veronica
62,U03TX2VN6H5,0.0,Joanne
63,U03UCCRJME2,0.0,Nicole
64,U03VAH809FC,0.0,Stephanie
65,U03UK9CB71A,0.0,Clayton
66,U03UR2LQ4DR,0.0,Allen


Who are the top and bottom 10 users by Message count?

In [None]:
user_msg_counts = msg_df["user"].value_counts().reset_index()
user_msg_counts.columns = ["user", "message_count"]
user_msg_counts["name"] = user_msg_counts["user"].apply(search_user_by_id)
user_msg_counts_sorted = user_msg_counts.sort_values(by="message_count",ascending=False)
print("Top 10:")
user_msg_counts_sorted.head(10)

Top 10:


Unnamed: 0,user,message_count,name
0,U03V1AM5TFA,1554,Lynn
1,U03UUR571A5,1206,Kelsey
2,U03UVHCV6KB,1178,Gregory
3,U03UG32J3PC,1070,Zachary
4,U03UH397319,910,Monica
5,U03V6HMRPGQ,902,Lisa
6,U03UG4Q7V42,803,Ariel
7,U03UD68RQH3,706,James
8,U03UJGP0C68,675,Tammy
9,U03U1FNPEUX,575,Jennifer


In [None]:
print("Bottom 10:")
user_msg_counts_sorted.tail(10)

Bottom 10:


Unnamed: 0,user,message_count,name
57,U03UDKKESB1,13,Melissa
58,USLACKBOT,9,
59,U03UG03HQ2F,7,Mary
60,U03UH760JQK,6,Veronica
61,U03UCCRJME2,5,Nicole
62,U03UFT20ZJR,4,Austin
63,U03UK9CB71A,4,Clayton
64,U04718Y7SQ0,3,Phillip
65,U03UR2LQ4DR,3,Allen
66,U03VAH809FC,1,Stephanie


Who are the top and bottom 10 users by Mention?

In [341]:
mmm = msg_df_from_utils.groupby("user")["mentions"].sum().reset_index()
mmm

Unnamed: 0,user,mentions
0,U03T89ACUUW,"[U03U1FNPEUX, U03UJN29Y4C, U03UUN8M4RX, U03UFV..."
1,U03TEPYRM2P,"[U03UUR571A5, U03UJN29Y4C, U03UHB8CXDY, U03UKL..."
2,U03TT5KEYCF,"[U03UJGP0C68, U03U1GHT39V, U03UUMR26Q1, U03U4G..."
3,U03U1FNPEUX,"[U03UUMM7Y8H, U03UP7V9Q57, U03UJGP0C68, U03UKL..."
4,U03U1FQKEMV,"[U03UG32J3PC, U03UUR571A5]"
...,...,...
58,U03V6HMRPGQ,"[U03U1FNPEUX, U03UJKJGRAQ, U03UUR571A5, U03UG4..."
59,U03V785NLSU,"[U03V1AM5TFA, U03UJKJGRAQ, U03UVHCV6KB, U03V1A..."
60,U03V8LHPDME,"[U03UD68RQH3, U03UJN29Y4C, U03UG32J3PC, U03UVH..."
61,U04718Y7SQ0,[]


In [354]:
mention_counts = Counter(msg_df_from_utils['mentions'])

TypeError: unhashable type: 'list'

In [352]:
#the new approch
from collections import Counter
# mentions_list = [mention for mentions in msg_df_from_utils["mentions"] if mentions]
mention_list = [mentions for mentions in msg_df_from_utils['mentions'] if mentions]

print(mention_list)
mention_counts = Counter(mention_list)
# print(mention_counts)

[['U03V1AM5TFA'], ['U03V1AM5TFA'], ['U03UUMM7Y8H'], ['U03UJGP0C68'], ['U03V1AM5TFA'], ['U03UG0SFHGT', 'U03UUP56MDF'], ['U03UD68RQH3'], ['U03V1AM5TFA'], ['U03UFV7HFNF'], ['U03UD68RQH3'], ['U03UP7V9Q57'], ['U03UD4FEDHB'], ['U03V1AM5TFA'], ['U03UJN29Y4C'], ['U03UJN29Y4C'], ['U03U9FWPNCE'], ['U03V785NLSU'], ['U03V785NLSU'], ['U03V785NLSU'], ['U03V1AM5TFA'], ['U03V785NLSU'], ['U03V1AM5TFA'], ['U03V785NLSU'], ['U03V785NLSU'], ['U03V785NLSU'], ['U03V1AM5TFA'], ['U03V785NLSU'], ['U03V785NLSU'], ['U03UJN29Y4C'], ['U03UVHCV6KB'], ['U03UUR571A5'], ['U03UUS0MZCZ'], ['U03UJKJGRAQ'], ['U03UJGP0C68'], ['U03UVHCV6KB'], ['U03UVHCV6KB'], ['U03UUR571A5'], ['U03UVHCV6KB'], ['U03T89ACUUW'], ['U03V1AM5TFA'], ['U03V785NLSU'], ['U03V785NLSU'], ['U03V785NLSU'], ['U03V785NLSU'], ['U03V1AM5TFA'], ['U03V1AM5TFA'], ['U03V785NLSU'], ['U03V1AM5TFA'], ['U03U1FNPEUX'], ['U03U1FNPEUX'], ['U03UVHCV6KB'], ['U03UJN29Y4C'], ['U03UJN29Y4C'], ['U03V785NLSU'], ['U03U1FNPEUX'], ['U03UVHCV6KB'], ['U03U1FNPEUX'], ['U03U1FNPEUX']

TypeError: unhashable type: 'list'

### Visualize reply counts per user per channel

In [None]:
# Visualize reply counts per user per channel

In [None]:
# reply counts per user per channel
reply_counts_per_user_per_channel = msg_df.groupby(["user", "channel"])["reply_count"].sum().reset_index()
print(reply_counts_per_user_per_channel)

        user      channel  reply_count
0  USLACKBOT  C03T0APHX63          0.0
1  USLACKBOT  C049GV7JK4Y          0.0


In [None]:
# what is the time range of the day that most messages are sent?


In [None]:
# what kind of messages are replied faster than others?

In [None]:
# Relationship between # of messages and # of reactions

In [None]:
# Classify messages into different categories such as questions, answers, comments, etc.

In [None]:
# Which users got the most reactions?

In [None]:
# Model topics mentioned in the channel

In [None]:
# What are the topics that got the most reactions?

### Harder questions to look into

In [None]:
# Based on messages, reactions, references shared, and other relevant data such as classification of questions into techical question, comment, answer, aorder stu the python, statistics, and sql skill level of a user?