In [None]:
# main.ipynb
#
# Copyright 2022 Martin Pobaschnig
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.
#
# SPDX-License-Identifier: GPL-3.0-or-later

import igraph as ig

import random
import numpy as np

import plotly.express as px
import matplotlib.pyplot as plt
import plotly.graph_objects as go

from datetime import datetime
import pandas as pd

from collections import defaultdict
from enum import Enum

from typing import Dict, List, Optional, Tuple

from cdhf.data import Data


In [None]:
data: Data = Data("../../input/mmdata.json")

data.load_all()


In [None]:
def hist_plot(l: List[int],
              text_title: str,
              text_x_axis_title: str,
              text_y_axis_title: str,
              show: bool = False,
              nbins: Optional[int] = None):
    df = pd.DataFrame({'data': l})

    if nbins:
        fig = px.histogram(df,
                           x="data",
                           marginal="box",
                           hover_data=df.columns,
                           nbins=nbins)
    else:
        fig = px.histogram(df,
                           x="data",
                           marginal="box",
                           hover_data=df.columns)

    fig.update_layout(title_x=0.5,
                      title_text=text_title,
                      xaxis_title=text_x_axis_title,
                      yaxis_title=text_y_axis_title,
                      font_size=18)

    if show:
        fig.show()

    return fig


def fns(l: List):
    minimum = np.min(l)
    q1, q2, q3 = np.percentile(l, [25, 50, 75])
    maximum = np.max(l)

    iqr = q3 - q1
    lower_fence = q1 - 1.5*iqr
    if lower_fence < minimum:
        lower_fence = minimum
    upper_fence = q3 + 1.5*iqr
    if upper_fence > maximum:
        upper_fence = maximum

    return (round(minimum), round(lower_fence),  round(q1), round(q2), round(q3), round(upper_fence), round(maximum))


In [None]:
# Number of people per team

l = []

counter = 0
for team in data.teams:
    s = len(team.team_members)
    l.append(s)

plot_number_users_in_team = hist_plot(l,
                                      "",
                                      "Team Size",
                                      "Number of Teams")

print(fns(l))
plot_number_users_in_team.show()


In [None]:
# Number of people per channel in team

l = []

for (i, team) in enumerate(data.teams):
    for channel in team.channels:
        s = len(channel.channel_members)
        l.append(s)

print(fns(l))
plot_number_people_in_channels = hist_plot(l,
                                           "",
                                           "Channel Size",
                                           "Users")
plot_number_people_in_channels.show()


In [None]:
# Number of people that join the channel, stay there

z: Dict[str, Tuple[int, int]] = {}

# Init values
for year in range(2018, 2022):
    for month in range(1, 13):
        s = f"{year}-{month}"

        z[s] = [0, 0]

for team in data.teams:
    team.channels.sort(reverse=True, key=lambda x: len(x.channel_members))

    for channel in team.channels:
        if channel.channel_member_history is None:
            continue

        for history_entry in channel.channel_member_history:
            join_date = datetime.utcfromtimestamp(int(history_entry.join_time / 1000)).date()
            join_date_idx = f"{join_date.year}-{join_date.month}"
            z[join_date_idx][0] += 1

            if history_entry.leave_time is None:
                continue

            leave_date = datetime.utcfromtimestamp(int(history_entry.leave_time / 1000)).date()
            leave_idx = f"{leave_date.year}-{leave_date.month}"
            z[leave_idx][1] += 1

dataframe_dict = {
    'date': list(z.keys()),
    'people joining': list(zip(*z.values()))[0],
    'people leaving': list(zip(*z.values()))[1]
}

df = pd.DataFrame(dataframe_dict)

join_data = go.Bar(x=df["date"].to_numpy(), y=df["people joining"].to_numpy(), name="People joining")
leave_data = go.Bar(x=df["date"].to_numpy(), y=df["people leaving"].to_numpy(), name="People leaving")

fig = go.Figure(data=[join_data, leave_data])
fig.update_layout(autosize=False, 
                  title_x=0.5,
                  title_text=f"",
                  xaxis_title="Time",
                  yaxis_title="People",
                  font_size=18)
fig.show()


In [None]:
# Number of people that are in multiple channels (0, 1, ... 5, ... 20, ...)

d = defaultdict(int)

for team in data.teams:
    for channel in team.channels:
        for channel_member in channel.channel_members:
            d[channel_member.user_id] += 1

fig = hist_plot(list(d.values()),
                "",
                "Number of Channels",
                "Number of Users")

print(fns(list(d.values())))
fig.show()


In [None]:
# How many people are part of how many teams

d = defaultdict(int)

for team in data.teams:
    for team_member in team.team_members:
        d[team_member.user_id] += 1

fig = hist_plot(list(d.values()),
                "",
                "Teams",
                "People")

print(fns(list(d.values())))
fig.show()