## LBC group management


In [205]:
import pandas as pd
import numpy as np
from collections import namedtuple

requirement = namedtuple('requirement', 'target min max')
sc_group_size = requirement(8,6,9)
sc_singles_count = requirement(2,0,4)
sc_couple_count = requirement(3,3,4)
lb_group_size = requirement(6,5,7)
lb_singles_count = requirement(2,0,3)
lb_couple_count = requirement(3,2,3)
group_chars = [c for c in 'ABCDEFGHIJKL']


### import historical groupings
- columns are groupings, of either type 'lb' or 'sc'
- rows are members
- character denotes membership in a group. same letter same group. null for no involvement

In [206]:
column_names = ['size'] + list(f'{"sc" if i%2 else "lb"}{i//2}'
                             for i in range(29)
                             )
group_history = pd.read_csv("LBC_group_history.csv", names=column_names,header=0)

display(group_history.head(5))

Unnamed: 0,size,lb0,sc0,lb1,sc1,lb2,sc2,lb3,sc3,lb4,...,sc9,lb10,sc10,lb11,sc11,lb12,sc12,lb13,sc13,lb14
0,2,,,,,,,,,,...,B,,C,,,,,,A,
1,1,D,,,,,,,,,...,,,,,,,,,,
2,2,D,,F,,G,,,B,E,...,,,,,,,,,,
3,1,A,E,F,C,I,B,F,C,F,...,A,,A,,,,C,,D,
4,2,,E,E,,,A,,,C,...,,,,,,,,,E,


### measure group sizes
- get total group sizes using weights from size column


In [207]:
weights = group_history.iloc[:, 0].values

group_sizes = pd.DataFrame([
    [
        np.sum([weights[i] if value == group_char else 0 for i, value in enumerate(group_history[column])])
        for group_char in group_chars
    ] for column in group_history.columns[1:]
],
columns=group_chars,
index=[column for column in group_history.columns[1:]],
).replace(0, np.nan)

display(group_sizes.head(5))


Unnamed: 0,A,B,C,D,E,F,G,H,I,J,K,L
lb0,6.0,4.0,4.0,6.0,6.0,,,,,,,
sc0,7.0,6.0,7.0,5.0,4.0,6.0,,,,,,
lb1,5.0,1.0,4.0,4.0,6.0,7.0,6.0,,,,,
sc1,7.0,8.0,4.0,5.0,,,,,,,,
lb2,5.0,4.0,3.0,5.0,6.0,4.0,6.0,6.0,3.0,,,


### create met_before lookup dict
- key: group history index
- value: list of group history indexes


In [208]:
all_groups = list()
for col_name, grouping in group_history.iteritems():
    for group_char in set(grouping):
        if group_char in group_chars:
            all_groups.append(group_history.index[group_history[col_name] == group_char].tolist())

met_before = {i: set([i,]) for i, groups in group_history.iterrows()}
for group in all_groups:
    for i in group:
        for j in group:
            met_before[i].add(j)



### data sanity check
- expect 10-25 unique groups in each round
- groups around 4-8 in size
- max possible group memberships is 29
- 20-50% of members never active before


In [212]:
# filter row data
# [list(filter(lambda x: x>0, grouping)) for i, grouping in group_sizes.iterrows()]

group_counts = [np.sum(len(list(filter(lambda x: x>0, grouping)))) for i, grouping in group_sizes.iterrows()]

features = pd.DataFrame(
    [
        [
            # number of groups
            np.mean(group_counts),
            np.min(group_counts),
            np.max(group_counts),
        ],
        [
            # group size
            np.mean([np.nanmean(group_sizes)]),
            np.min([np.nanmin(group_sizes)]),
            np.max([np.nanmax(group_sizes)]),
        ],
        [
            # socializing
            np.mean([len(met_set) for i, met_set in met_before.items()]),
            np.min([len(met_set) for i, met_set in met_before.items()]),
            np.max([len(met_set) for i, met_set in met_before.items()]),
        ],
    ],
    columns=['mean', 'min', 'max'],
    index=['number of groups', 'group size head count', 'met counts'],
)

display(features)

Unnamed: 0,mean,min,max
number of groups,4.448276,0.0,11.0
group size head count,5.906977,1.0,10.0
met counts,11.403226,3.0,46.0
